[arvados] updated: 2.6.1-14-g115097426

git repository hosting git at public.arvados.org
Thu May 18 14:30:27 UTC 2023


Summary of changes:
 lib/dispatchcloud/scheduler/run_queue.go | 14 +++++++++++++-
 lib/dispatchcloud/worker/pool.go         | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

       via  11509742679c983bc9eb68c5875593cce25764d5 (commit)
       via  c3b8668deb1b9739fdb30c1194d3bac8a72fb98c (commit)
      from  546ec93955c0c77107360f841de31fd7b7983c6c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 11509742679c983bc9eb68c5875593cce25764d5
Author: Tom Clegg <tom at curii.com>
Date:   Wed May 17 11:19:35 2023 -0400

    Merge branch '20457-queue-churn'
    
    refs #20457
    refs #20511
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index e6b1b06a2..8f4c2e083 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -22,7 +22,19 @@ func (sch *Scheduler) runQueue() {
 		sorted = append(sorted, ent)
 	}
 	sort.Slice(sorted, func(i, j int) bool {
-		if pi, pj := sorted[i].Container.Priority, sorted[j].Container.Priority; pi != pj {
+		ilocked := sorted[i].Container.State == arvados.ContainerStateLocked
+		jlocked := sorted[j].Container.State == arvados.ContainerStateLocked
+		if ilocked != jlocked {
+			// Give precedence to containers that we have
+			// already locked, even if higher-priority
+			// containers have since arrived in the
+			// queue. This avoids undesirable queue churn
+			// effects including extra lock/unlock cycles
+			// and bringing up new instances and quickly
+			// shutting them down to make room for
+			// different instance sizes.
+			return ilocked
+		} else if pi, pj := sorted[i].Container.Priority, sorted[j].Container.Priority; pi != pj {
 			return pi > pj
 		} else {
 			// When containers have identical priority,

commit c3b8668deb1b9739fdb30c1194d3bac8a72fb98c
Author: Tom Clegg <tom at curii.com>
Date:   Fri May 5 13:57:11 2023 -0400

    Merge branch '20457-max-supervisors-overquota'
    
    refs #20457
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index c270eef49..4bf969358 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -197,6 +197,8 @@ type Pool struct {
 	mTimeFromShutdownToGone   prometheus.Summary
 	mTimeFromQueueToCrunchRun prometheus.Summary
 	mRunProbeDuration         *prometheus.SummaryVec
+	mProbeAgeMax              prometheus.Gauge
+	mProbeAgeMedian           prometheus.Gauge
 }
 
 type createCall struct {
@@ -626,6 +628,20 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 		Help:      "Number of containers reported running by cloud VMs.",
 	})
 	reg.MustRegister(wp.mContainersRunning)
+	wp.mProbeAgeMax = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_max",
+		Help:      "Maximum number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMax)
+	wp.mProbeAgeMedian = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_median",
+		Help:      "Median number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMedian)
 	wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
@@ -738,6 +754,8 @@ func (wp *Pool) updateMetrics() {
 	cpu := map[string]int64{}
 	mem := map[string]int64{}
 	var running int64
+	now := time.Now()
+	var probed []time.Time
 	for _, wkr := range wp.workers {
 		var cat string
 		switch {
@@ -757,6 +775,7 @@ func (wp *Pool) updateMetrics() {
 		cpu[cat] += int64(wkr.instType.VCPUs)
 		mem[cat] += int64(wkr.instType.RAM)
 		running += int64(len(wkr.running) + len(wkr.starting))
+		probed = append(probed, wkr.probed)
 	}
 	for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
 		wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
@@ -773,6 +792,15 @@ func (wp *Pool) updateMetrics() {
 		wp.mInstances.WithLabelValues(k.cat, k.instType).Set(float64(v))
 	}
 	wp.mContainersRunning.Set(float64(running))
+
+	if len(probed) == 0 {
+		wp.mProbeAgeMax.Set(0)
+		wp.mProbeAgeMedian.Set(0)
+	} else {
+		sort.Slice(probed, func(i, j int) bool { return probed[i].Before(probed[j]) })
+		wp.mProbeAgeMax.Set(now.Sub(probed[0]).Seconds())
+		wp.mProbeAgeMedian.Set(now.Sub(probed[len(probed)/2]).Seconds())
+	}
 }
 
 func (wp *Pool) runProbes() {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list