[arvados] updated: 2.6.0-137-gb4f85f65a

git repository hosting git at public.arvados.org
Fri May 5 17:59:24 UTC 2023


Summary of changes:
 lib/dispatchcloud/worker/pool.go | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

       via  b4f85f65a07e02d710ad32edeb86a1c5ea76d475 (commit)
       via  3677f4004aef0c64c316b31955502308a72797f1 (commit)
      from  64639ed6313f01016da4e0ffd81752dedf9b052b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b4f85f65a07e02d710ad32edeb86a1c5ea76d475
Merge: 64639ed63 3677f4004
Author: Tom Clegg <tom at curii.com>
Date:   Fri May 5 13:57:11 2023 -0400

    Merge branch '20457-max-supervisors-overquota'
    
    refs #20457
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>


commit 3677f4004aef0c64c316b31955502308a72797f1
Author: Tom Clegg <tom at curii.com>
Date:   Wed May 3 09:51:57 2023 -0400

    20457: Add dispatchcloud_probe_age_seconds_max and _median metrics.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index c270eef49..4bf969358 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -197,6 +197,8 @@ type Pool struct {
 	mTimeFromShutdownToGone   prometheus.Summary
 	mTimeFromQueueToCrunchRun prometheus.Summary
 	mRunProbeDuration         *prometheus.SummaryVec
+	mProbeAgeMax              prometheus.Gauge
+	mProbeAgeMedian           prometheus.Gauge
 }
 
 type createCall struct {
@@ -626,6 +628,20 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 		Help:      "Number of containers reported running by cloud VMs.",
 	})
 	reg.MustRegister(wp.mContainersRunning)
+	wp.mProbeAgeMax = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_max",
+		Help:      "Maximum number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMax)
+	wp.mProbeAgeMedian = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_median",
+		Help:      "Median number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMedian)
 	wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
@@ -738,6 +754,8 @@ func (wp *Pool) updateMetrics() {
 	cpu := map[string]int64{}
 	mem := map[string]int64{}
 	var running int64
+	now := time.Now()
+	var probed []time.Time
 	for _, wkr := range wp.workers {
 		var cat string
 		switch {
@@ -757,6 +775,7 @@ func (wp *Pool) updateMetrics() {
 		cpu[cat] += int64(wkr.instType.VCPUs)
 		mem[cat] += int64(wkr.instType.RAM)
 		running += int64(len(wkr.running) + len(wkr.starting))
+		probed = append(probed, wkr.probed)
 	}
 	for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
 		wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
@@ -773,6 +792,15 @@ func (wp *Pool) updateMetrics() {
 		wp.mInstances.WithLabelValues(k.cat, k.instType).Set(float64(v))
 	}
 	wp.mContainersRunning.Set(float64(running))
+
+	if len(probed) == 0 {
+		wp.mProbeAgeMax.Set(0)
+		wp.mProbeAgeMedian.Set(0)
+	} else {
+		sort.Slice(probed, func(i, j int) bool { return probed[i].Before(probed[j]) })
+		wp.mProbeAgeMax.Set(now.Sub(probed[0]).Seconds())
+		wp.mProbeAgeMedian.Set(now.Sub(probed[len(probed)/2]).Seconds())
+	}
 }
 
 func (wp *Pool) runProbes() {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list