[arvados] updated: 2.6.0-198-g11fb3d91a
git repository hosting
git at public.arvados.org
Thu May 25 21:16:16 UTC 2023
Summary of changes:
lib/dispatchcloud/scheduler/run_queue.go | 2 +
lib/dispatchcloud/scheduler/run_queue_test.go | 56 +++++++++++++++++++++++++++
2 files changed, 58 insertions(+)
via 11fb3d91a28bf51803f170a53709d709941de788 (commit)
from 40dcd44173ea6d6cdc53206766875151eac7b0fd (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 11fb3d91a28bf51803f170a53709d709941de788
Author: Tom Clegg <tom at curii.com>
Date: Thu May 25 17:10:48 2023 -0400
20511: Don't shutdown excess instances just because MaxSupervisors.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index db6e97b59..adf897789 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -204,6 +204,8 @@ tryrun:
}
}
}
+ }
+ if len(overquota) > 0 {
// Shut down idle workers that didn't get any
// containers mapped onto them before we hit quota.
for it, n := range unalloc {
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index 2deff69e9..8192d4721 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -374,6 +374,62 @@ func (*SchedulerSuite) TestIdleIn503QuietPeriod(c *check.C) {
c.Check(queue.StateChanges(), check.HasLen, 0)
}
+// If we somehow have more supervisor containers in Locked state than
+// we should (e.g., config changed since they started), and some
+// appropriate-sized instances booting up, unlock the excess
+// supervisor containers, but let the instances keep booting.
+func (*SchedulerSuite) TestUnlockExcessSupervisors(c *check.C) {
+ ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+ queue := test.Queue{
+ ChooseType: chooseType,
+ }
+ for i := 1; i <= 6; i++ {
+ queue.Containers = append(queue.Containers, arvados.Container{
+ UUID: test.ContainerUUID(i),
+ Priority: int64(1000 - i),
+ State: arvados.ContainerStateLocked,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 2,
+ RAM: 2 << 30,
+ },
+ SchedulingParameters: arvados.SchedulingParameters{
+ Supervisor: true,
+ },
+ })
+ }
+ queue.Update()
+ pool := stubPool{
+ quota: 16,
+ unalloc: map[arvados.InstanceType]int{
+ test.InstanceType(2): 2,
+ },
+ idle: map[arvados.InstanceType]int{
+ test.InstanceType(2): 1,
+ },
+ running: map[string]time.Time{
+ test.ContainerUUID(1): {},
+ test.ContainerUUID(2): {},
+ test.ContainerUUID(3): {},
+ test.ContainerUUID(4): {},
+ },
+ creates: []arvados.InstanceType{},
+ starts: []string{},
+ canCreate: 0,
+ }
+ sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 4)
+ sch.sync()
+ sch.runQueue()
+ sch.sync()
+
+ c.Check(pool.starts, check.DeepEquals, []string{})
+ c.Check(pool.shutdowns, check.Equals, 0)
+ c.Check(pool.creates, check.HasLen, 0)
+ c.Check(queue.StateChanges(), check.DeepEquals, []test.QueueStateChange{
+ {UUID: test.ContainerUUID(5), From: "Locked", To: "Queued"},
+ {UUID: test.ContainerUUID(6), From: "Locked", To: "Queued"},
+ })
+}
+
// Don't flap lock/unlock when equal-priority containers compete for
// limited workers.
//
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list