[arvados] updated: 2.6.0-137-gb4f85f65a
git repository hosting
git at public.arvados.org
Fri May 5 17:59:24 UTC 2023
Summary of changes:
lib/dispatchcloud/worker/pool.go | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
via b4f85f65a07e02d710ad32edeb86a1c5ea76d475 (commit)
via 3677f4004aef0c64c316b31955502308a72797f1 (commit)
from 64639ed6313f01016da4e0ffd81752dedf9b052b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit b4f85f65a07e02d710ad32edeb86a1c5ea76d475
Merge: 64639ed63 3677f4004
Author: Tom Clegg <tom at curii.com>
Date: Fri May 5 13:57:11 2023 -0400
Merge branch '20457-max-supervisors-overquota'
refs #20457
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
commit 3677f4004aef0c64c316b31955502308a72797f1
Author: Tom Clegg <tom at curii.com>
Date: Wed May 3 09:51:57 2023 -0400
20457: Add dispatchcloud_probe_age_seconds_max and _median metrics.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index c270eef49..4bf969358 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -197,6 +197,8 @@ type Pool struct {
mTimeFromShutdownToGone prometheus.Summary
mTimeFromQueueToCrunchRun prometheus.Summary
mRunProbeDuration *prometheus.SummaryVec
+ mProbeAgeMax prometheus.Gauge
+ mProbeAgeMedian prometheus.Gauge
}
type createCall struct {
@@ -626,6 +628,20 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
Help: "Number of containers reported running by cloud VMs.",
})
reg.MustRegister(wp.mContainersRunning)
+ wp.mProbeAgeMax = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "probe_age_seconds_max",
+ Help: "Maximum number of seconds since an instance's most recent successful probe.",
+ })
+ reg.MustRegister(wp.mProbeAgeMax)
+ wp.mProbeAgeMedian = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "probe_age_seconds_median",
+ Help: "Median number of seconds since an instance's most recent successful probe.",
+ })
+ reg.MustRegister(wp.mProbeAgeMedian)
wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "arvados",
Subsystem: "dispatchcloud",
@@ -738,6 +754,8 @@ func (wp *Pool) updateMetrics() {
cpu := map[string]int64{}
mem := map[string]int64{}
var running int64
+ now := time.Now()
+ var probed []time.Time
for _, wkr := range wp.workers {
var cat string
switch {
@@ -757,6 +775,7 @@ func (wp *Pool) updateMetrics() {
cpu[cat] += int64(wkr.instType.VCPUs)
mem[cat] += int64(wkr.instType.RAM)
running += int64(len(wkr.running) + len(wkr.starting))
+ probed = append(probed, wkr.probed)
}
for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
@@ -773,6 +792,15 @@ func (wp *Pool) updateMetrics() {
wp.mInstances.WithLabelValues(k.cat, k.instType).Set(float64(v))
}
wp.mContainersRunning.Set(float64(running))
+
+ if len(probed) == 0 {
+ wp.mProbeAgeMax.Set(0)
+ wp.mProbeAgeMedian.Set(0)
+ } else {
+ sort.Slice(probed, func(i, j int) bool { return probed[i].Before(probed[j]) })
+ wp.mProbeAgeMax.Set(now.Sub(probed[0]).Seconds())
+ wp.mProbeAgeMedian.Set(now.Sub(probed[len(probed)/2]).Seconds())
+ }
}
func (wp *Pool) runProbes() {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list