[ARVADOS] updated: 1.3.0-2845-gb2a1842d6
Git user
git at public.arvados.org
Tue Aug 4 21:21:32 UTC 2020
Summary of changes:
lib/dispatchcloud/scheduler/run_queue_test.go | 4 ++-
lib/dispatchcloud/scheduler/sync_test.go | 50 +++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 1 deletion(-)
via b2a1842d611a2739066a819badafaa2d2f14015d (commit)
from 3aa3fb78afa46e98c9be345045f4fea9fea0f08c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit b2a1842d611a2739066a819badafaa2d2f14015d
Author: Tom Clegg <tom at tomclegg.ca>
Date: Tue Aug 4 17:19:45 2020 -0400
16663: Test for errant cancel while VMs have state==unknown.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at tomclegg.ca>
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index 8ab1cd9ba..32c6b3b24 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -36,6 +36,7 @@ type stubPool struct {
notify <-chan struct{}
unalloc map[arvados.InstanceType]int // idle+booting+unknown
idle map[arvados.InstanceType]int
+ unknown map[arvados.InstanceType]int
running map[string]time.Time
atQuota bool
canCreate int
@@ -62,7 +63,7 @@ func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
defer p.Unlock()
r := map[arvados.InstanceType]int{}
for it, n := range p.unalloc {
- r[it] = n
+ r[it] = n - p.unknown[it]
}
return r
}
@@ -96,6 +97,7 @@ func (p *stubPool) CountWorkers() map[worker.State]int {
worker.StateBooting: len(p.unalloc) - len(p.idle),
worker.StateIdle: len(p.idle),
worker.StateRunning: len(p.running),
+ worker.StateUnknown: len(p.unknown),
}
}
func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go
index 305ab9e04..ae264bb29 100644
--- a/lib/dispatchcloud/scheduler/sync_test.go
+++ b/lib/dispatchcloud/scheduler/sync_test.go
@@ -54,3 +54,53 @@ func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) {
ents, _ = queue.Entries()
c.Check(ents, check.HasLen, 0)
}
+
+func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) {
+ ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+ pool := stubPool{
+ unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1},
+ unknown: map[arvados.InstanceType]int{test.InstanceType(1): 1},
+ }
+ queue := test.Queue{
+ ChooseType: chooseType,
+ Containers: []arvados.Container{
+ {
+ UUID: test.ContainerUUID(1),
+ Priority: 0,
+ State: arvados.ContainerStateRunning,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
+ },
+ },
+ }
+ queue.Update()
+
+ ents, _ := queue.Entries()
+ c.Check(ents, check.HasLen, 1)
+
+ sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond)
+
+ // Sync shouldn't cancel the container because it might be
+ // running on the VM with state=="unknown".
+ for i := 0; i < 10; i++ {
+ sch.sync()
+ time.Sleep(time.Millisecond)
+ }
+ ents, _ = queue.Entries()
+ c.Check(ents, check.HasLen, 1)
+ c.Check(ents[test.ContainerUUID(1)].Container.State, check.Equals, arvados.ContainerStateRunning)
+
+ // Sync should cancel & forget the container when the
+ // "unknown" node goes away.
+ pool.unknown = nil
+ for deadline := time.Now().Add(time.Second); ; time.Sleep(time.Millisecond) {
+ sch.sync()
+ ents, _ = queue.Entries()
+ if len(ents) == 0 || time.Now().After(deadline) {
+ break
+ }
+ }
+ c.Check(ents, check.HasLen, 0)
+}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list