[ARVADOS] updated: 1.3.0-2845-gb2a1842d6

Git user git at public.arvados.org
Tue Aug 4 21:21:32 UTC 2020


Summary of changes:
 lib/dispatchcloud/scheduler/run_queue_test.go |  4 ++-
 lib/dispatchcloud/scheduler/sync_test.go      | 50 +++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

       via  b2a1842d611a2739066a819badafaa2d2f14015d (commit)
      from  3aa3fb78afa46e98c9be345045f4fea9fea0f08c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b2a1842d611a2739066a819badafaa2d2f14015d
Author: Tom Clegg <tom at tomclegg.ca>
Date:   Tue Aug 4 17:19:45 2020 -0400

    16663: Test for errant cancel while VMs have state==unknown.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at tomclegg.ca>

diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index 8ab1cd9ba..32c6b3b24 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -36,6 +36,7 @@ type stubPool struct {
 	notify    <-chan struct{}
 	unalloc   map[arvados.InstanceType]int // idle+booting+unknown
 	idle      map[arvados.InstanceType]int
+	unknown   map[arvados.InstanceType]int
 	running   map[string]time.Time
 	atQuota   bool
 	canCreate int
@@ -62,7 +63,7 @@ func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
 	defer p.Unlock()
 	r := map[arvados.InstanceType]int{}
 	for it, n := range p.unalloc {
-		r[it] = n
+		r[it] = n - p.unknown[it]
 	}
 	return r
 }
@@ -96,6 +97,7 @@ func (p *stubPool) CountWorkers() map[worker.State]int {
 		worker.StateBooting: len(p.unalloc) - len(p.idle),
 		worker.StateIdle:    len(p.idle),
 		worker.StateRunning: len(p.running),
+		worker.StateUnknown: len(p.unknown),
 	}
 }
 func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go
index 305ab9e04..ae264bb29 100644
--- a/lib/dispatchcloud/scheduler/sync_test.go
+++ b/lib/dispatchcloud/scheduler/sync_test.go
@@ -54,3 +54,53 @@ func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) {
 	ents, _ = queue.Entries()
 	c.Check(ents, check.HasLen, 0)
 }
+
+func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) {
+	ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+	pool := stubPool{
+		unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1},
+		unknown: map[arvados.InstanceType]int{test.InstanceType(1): 1},
+	}
+	queue := test.Queue{
+		ChooseType: chooseType,
+		Containers: []arvados.Container{
+			{
+				UUID:     test.ContainerUUID(1),
+				Priority: 0,
+				State:    arvados.ContainerStateRunning,
+				RuntimeConstraints: arvados.RuntimeConstraints{
+					VCPUs: 1,
+					RAM:   1 << 30,
+				},
+			},
+		},
+	}
+	queue.Update()
+
+	ents, _ := queue.Entries()
+	c.Check(ents, check.HasLen, 1)
+
+	sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond)
+
+	// Sync shouldn't cancel the container because it might be
+	// running on the VM with state=="unknown".
+	for i := 0; i < 10; i++ {
+		sch.sync()
+		time.Sleep(time.Millisecond)
+	}
+	ents, _ = queue.Entries()
+	c.Check(ents, check.HasLen, 1)
+	c.Check(ents[test.ContainerUUID(1)].Container.State, check.Equals, arvados.ContainerStateRunning)
+
+	// Sync should cancel & forget the container when the
+	// "unknown" node goes away.
+	pool.unknown = nil
+	for deadline := time.Now().Add(time.Second); ; time.Sleep(time.Millisecond) {
+		sch.sync()
+		ents, _ = queue.Entries()
+		if len(ents) == 0 || time.Now().After(deadline) {
+			break
+		}
+	}
+	c.Check(ents, check.HasLen, 0)
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list