[ARVADOS] updated: 1.3.0-321-ga4396e183

Git user git at public.curoverse.com
Tue Feb 12 14:20:02 EST 2019


Summary of changes:
 lib/dispatchcloud/dispatcher_test.go           |  2 +-
 lib/dispatchcloud/scheduler/fix_stale_locks.go | 47 +++++++++++++++-----------
 2 files changed, 28 insertions(+), 21 deletions(-)

       via  a4396e183ec11a7241cf5089c5ccfbca1ad8627f (commit)
      from  b56e2857bfa4f7d2094546ffa3407cede877fed1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit a4396e183ec11a7241cf5089c5ccfbca1ad8627f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Tue Feb 12 14:17:10 2019 -0500

    14325: Start up immediately if there are no stale locks.
    
    ...instead of waiting for the pool to send a notification to trigger
    the first loop iteration.
    
    refs #14325
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 22e3425b6..0558d79f1 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -156,7 +156,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
 		c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
 	}
 
-	deadline := time.Now().Add(time.Second)
+	deadline := time.Now().Add(5 * time.Second)
 	for range time.NewTicker(10 * time.Millisecond).C {
 		insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
 		c.Check(err, check.IsNil)
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
index 264f9e4ec..4bd27021c 100644
--- a/lib/dispatchcloud/scheduler/fix_stale_locks.go
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -19,24 +19,15 @@ import (
 func (sch *Scheduler) fixStaleLocks() {
 	wp := sch.pool.Subscribe()
 	defer sch.pool.Unsubscribe(wp)
+
+	var stale []string
 	timeout := time.NewTimer(sch.staleLockTimeout)
 waiting:
 	for {
-		unlock := false
-		select {
-		case <-wp:
-			// If all workers have been contacted, unlock
-			// containers that aren't claimed by any
-			// worker.
-			unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
-		case <-timeout.C:
-			// Give up and unlock the containers, even
-			// though they might be working.
-			unlock = true
-		}
-
 		running := sch.pool.Running()
 		qEntries, _ := sch.queue.Entries()
+
+		stale = nil
 		for uuid, ent := range qEntries {
 			if ent.Container.State != arvados.ContainerStateLocked {
 				continue
@@ -44,14 +35,30 @@ waiting:
 			if _, running := running[uuid]; running {
 				continue
 			}
-			if !unlock {
-				continue waiting
-			}
-			err := sch.queue.Unlock(uuid)
-			if err != nil {
-				sch.logger.Warnf("Unlock %s: %s", uuid, err)
+			stale = append(stale, uuid)
+		}
+		if len(stale) == 0 {
+			return
+		}
+
+		select {
+		case <-wp:
+			// Stop waiting if all workers have been
+			// contacted.
+			if sch.pool.CountWorkers()[worker.StateUnknown] == 0 {
+				break waiting
 			}
+		case <-timeout.C:
+			// Give up.
+			break waiting
+		}
+
+	}
+
+	for _, uuid := range stale {
+		err := sch.queue.Unlock(uuid)
+		if err != nil {
+			sch.logger.Warnf("Unlock %s: %s", uuid, err)
 		}
-		return
 	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list