[arvados] updated: 2.6.1-14-g115097426
git repository hosting
git at public.arvados.org
Thu May 18 14:30:27 UTC 2023
Summary of changes:
lib/dispatchcloud/scheduler/run_queue.go | 14 +++++++++++++-
lib/dispatchcloud/worker/pool.go | 28 ++++++++++++++++++++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)
via 11509742679c983bc9eb68c5875593cce25764d5 (commit)
via c3b8668deb1b9739fdb30c1194d3bac8a72fb98c (commit)
from 546ec93955c0c77107360f841de31fd7b7983c6c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 11509742679c983bc9eb68c5875593cce25764d5
Author: Tom Clegg <tom at curii.com>
Date: Wed May 17 11:19:35 2023 -0400
Merge branch '20457-queue-churn'
refs #20457
refs #20511
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index e6b1b06a2..8f4c2e083 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -22,7 +22,19 @@ func (sch *Scheduler) runQueue() {
sorted = append(sorted, ent)
}
sort.Slice(sorted, func(i, j int) bool {
- if pi, pj := sorted[i].Container.Priority, sorted[j].Container.Priority; pi != pj {
+ ilocked := sorted[i].Container.State == arvados.ContainerStateLocked
+ jlocked := sorted[j].Container.State == arvados.ContainerStateLocked
+ if ilocked != jlocked {
+ // Give precedence to containers that we have
+ // already locked, even if higher-priority
+ // containers have since arrived in the
+ // queue. This avoids undesirable queue churn
+ // effects including extra lock/unlock cycles
+ // and bringing up new instances and quickly
+ // shutting them down to make room for
+ // different instance sizes.
+ return ilocked
+ } else if pi, pj := sorted[i].Container.Priority, sorted[j].Container.Priority; pi != pj {
return pi > pj
} else {
// When containers have identical priority,
commit c3b8668deb1b9739fdb30c1194d3bac8a72fb98c
Author: Tom Clegg <tom at curii.com>
Date: Fri May 5 13:57:11 2023 -0400
Merge branch '20457-max-supervisors-overquota'
refs #20457
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index c270eef49..4bf969358 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -197,6 +197,8 @@ type Pool struct {
mTimeFromShutdownToGone prometheus.Summary
mTimeFromQueueToCrunchRun prometheus.Summary
mRunProbeDuration *prometheus.SummaryVec
+ mProbeAgeMax prometheus.Gauge
+ mProbeAgeMedian prometheus.Gauge
}
type createCall struct {
@@ -626,6 +628,20 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
Help: "Number of containers reported running by cloud VMs.",
})
reg.MustRegister(wp.mContainersRunning)
+ wp.mProbeAgeMax = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "probe_age_seconds_max",
+ Help: "Maximum number of seconds since an instance's most recent successful probe.",
+ })
+ reg.MustRegister(wp.mProbeAgeMax)
+ wp.mProbeAgeMedian = prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "probe_age_seconds_median",
+ Help: "Median number of seconds since an instance's most recent successful probe.",
+ })
+ reg.MustRegister(wp.mProbeAgeMedian)
wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "arvados",
Subsystem: "dispatchcloud",
@@ -738,6 +754,8 @@ func (wp *Pool) updateMetrics() {
cpu := map[string]int64{}
mem := map[string]int64{}
var running int64
+ now := time.Now()
+ var probed []time.Time
for _, wkr := range wp.workers {
var cat string
switch {
@@ -757,6 +775,7 @@ func (wp *Pool) updateMetrics() {
cpu[cat] += int64(wkr.instType.VCPUs)
mem[cat] += int64(wkr.instType.RAM)
running += int64(len(wkr.running) + len(wkr.starting))
+ probed = append(probed, wkr.probed)
}
for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
@@ -773,6 +792,15 @@ func (wp *Pool) updateMetrics() {
wp.mInstances.WithLabelValues(k.cat, k.instType).Set(float64(v))
}
wp.mContainersRunning.Set(float64(running))
+
+ if len(probed) == 0 {
+ wp.mProbeAgeMax.Set(0)
+ wp.mProbeAgeMedian.Set(0)
+ } else {
+ sort.Slice(probed, func(i, j int) bool { return probed[i].Before(probed[j]) })
+ wp.mProbeAgeMax.Set(now.Sub(probed[0]).Seconds())
+ wp.mProbeAgeMedian.Set(now.Sub(probed[len(probed)/2]).Seconds())
+ }
}
func (wp *Pool) runProbes() {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list