[arvados] updated: 2.5.0-111-g663bc7640

git repository hosting git at public.arvados.org
Fri Feb 10 15:55:22 UTC 2023


Summary of changes:
 lib/dispatchcloud/scheduler/run_queue.go |  2 ++
 lib/dispatchcloud/scheduler/scheduler.go | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

       via  663bc76408fc7b1a7a02c0f82aa2bf003c30b78a (commit)
      from  35e9b21d80569fec8860596213c72d199f79a593 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 663bc76408fc7b1a7a02c0f82aa2bf003c30b78a
Author: Tom Clegg <tom at curii.com>
Date:   Fri Feb 10 10:54:49 2023 -0500

    19973: Add metrics for automatic container concurrency limit.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index cfd95e945..057ff8d6e 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -62,6 +62,8 @@ func (sch *Scheduler) runQueue() {
 			sch.maxConcurrency = max
 		}
 	}
+	sch.mLast503Time.Set(float64(sch.last503time.Unix()))
+	sch.mMaxContainerConcurrency.Set(float64(sch.maxConcurrency))
 
 	sch.logger.WithFields(logrus.Fields{
 		"Containers":     len(sorted),
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
index 589aa3ec1..4644dc4ea 100644
--- a/lib/dispatchcloud/scheduler/scheduler.go
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -52,6 +52,8 @@ type Scheduler struct {
 	mContainersAllocatedNotStarted   prometheus.Gauge
 	mContainersNotAllocatedOverQuota prometheus.Gauge
 	mLongestWaitTimeSinceQueue       prometheus.Gauge
+	mLast503Time                     prometheus.Gauge
+	mMaxContainerConcurrency         prometheus.Gauge
 }
 
 // New returns a new unstarted Scheduler.
@@ -101,6 +103,20 @@ func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) {
 		Help:      "Current longest wait time of any container since queuing, and before the start of crunch-run.",
 	})
 	reg.MustRegister(sch.mLongestWaitTimeSinceQueue)
+	sch.mLast503Time = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "last_503_time",
+		Help:      "Time of most recent 503 error received from API.",
+	})
+	reg.MustRegister(sch.mLast503Time)
+	sch.mMaxContainerConcurrency = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "max_concurrent_containers",
+		Help:      "Dynamically assigned limit on number of containers scheduled concurrency, set after receiving 503 errors from API.",
+	})
+	reg.MustRegister(sch.mMaxContainerConcurrency)
 }
 
 func (sch *Scheduler) updateMetrics() {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list