[arvados] updated: 2.5.0-111-g663bc7640
git repository hosting
git at public.arvados.org
Fri Feb 10 15:55:22 UTC 2023
Summary of changes:
lib/dispatchcloud/scheduler/run_queue.go | 2 ++
lib/dispatchcloud/scheduler/scheduler.go | 16 ++++++++++++++++
2 files changed, 18 insertions(+)
via 663bc76408fc7b1a7a02c0f82aa2bf003c30b78a (commit)
from 35e9b21d80569fec8860596213c72d199f79a593 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 663bc76408fc7b1a7a02c0f82aa2bf003c30b78a
Author: Tom Clegg <tom at curii.com>
Date: Fri Feb 10 10:54:49 2023 -0500
19973: Add metrics for automatic container concurrency limit.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index cfd95e945..057ff8d6e 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -62,6 +62,8 @@ func (sch *Scheduler) runQueue() {
sch.maxConcurrency = max
}
}
+ sch.mLast503Time.Set(float64(sch.last503time.Unix()))
+ sch.mMaxContainerConcurrency.Set(float64(sch.maxConcurrency))
sch.logger.WithFields(logrus.Fields{
"Containers": len(sorted),
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
index 589aa3ec1..4644dc4ea 100644
--- a/lib/dispatchcloud/scheduler/scheduler.go
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -52,6 +52,8 @@ type Scheduler struct {
mContainersAllocatedNotStarted prometheus.Gauge
mContainersNotAllocatedOverQuota prometheus.Gauge
mLongestWaitTimeSinceQueue prometheus.Gauge
+ mLast503Time prometheus.Gauge
+ mMaxContainerConcurrency prometheus.Gauge
}
// New returns a new unstarted Scheduler.
@@ -101,6 +103,20 @@ func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) {
Help: "Current longest wait time of any container since queuing, and before the start of crunch-run.",
})
reg.MustRegister(sch.mLongestWaitTimeSinceQueue)
+ sch.mLast503Time = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "last_503_time",
+ Help: "Time of most recent 503 error received from API.",
+ })
+ reg.MustRegister(sch.mLast503Time)
+ sch.mMaxContainerConcurrency = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "max_concurrent_containers",
+ Help: "Dynamically assigned limit on number of containers scheduled concurrency, set after receiving 503 errors from API.",
+ })
+ reg.MustRegister(sch.mMaxContainerConcurrency)
}
func (sch *Scheduler) updateMetrics() {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list