[ARVADOS] updated: 1.3.0-321-ga4396e183
Git user
git at public.curoverse.com
Tue Feb 12 14:20:02 EST 2019
Summary of changes:
lib/dispatchcloud/dispatcher_test.go | 2 +-
lib/dispatchcloud/scheduler/fix_stale_locks.go | 47 +++++++++++++++-----------
2 files changed, 28 insertions(+), 21 deletions(-)
via a4396e183ec11a7241cf5089c5ccfbca1ad8627f (commit)
from b56e2857bfa4f7d2094546ffa3407cede877fed1 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a4396e183ec11a7241cf5089c5ccfbca1ad8627f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Feb 12 14:17:10 2019 -0500
14325: Start up immediately if there are no stale locks.
...instead of waiting for the pool to send a notification to trigger
the first loop iteration.
refs #14325
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 22e3425b6..0558d79f1 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -156,7 +156,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
}
- deadline := time.Now().Add(time.Second)
+ deadline := time.Now().Add(5 * time.Second)
for range time.NewTicker(10 * time.Millisecond).C {
insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
c.Check(err, check.IsNil)
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
index 264f9e4ec..4bd27021c 100644
--- a/lib/dispatchcloud/scheduler/fix_stale_locks.go
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -19,24 +19,15 @@ import (
func (sch *Scheduler) fixStaleLocks() {
wp := sch.pool.Subscribe()
defer sch.pool.Unsubscribe(wp)
+
+ var stale []string
timeout := time.NewTimer(sch.staleLockTimeout)
waiting:
for {
- unlock := false
- select {
- case <-wp:
- // If all workers have been contacted, unlock
- // containers that aren't claimed by any
- // worker.
- unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
- case <-timeout.C:
- // Give up and unlock the containers, even
- // though they might be working.
- unlock = true
- }
-
running := sch.pool.Running()
qEntries, _ := sch.queue.Entries()
+
+ stale = nil
for uuid, ent := range qEntries {
if ent.Container.State != arvados.ContainerStateLocked {
continue
@@ -44,14 +35,30 @@ waiting:
if _, running := running[uuid]; running {
continue
}
- if !unlock {
- continue waiting
- }
- err := sch.queue.Unlock(uuid)
- if err != nil {
- sch.logger.Warnf("Unlock %s: %s", uuid, err)
+ stale = append(stale, uuid)
+ }
+ if len(stale) == 0 {
+ return
+ }
+
+ select {
+ case <-wp:
+ // Stop waiting if all workers have been
+ // contacted.
+ if sch.pool.CountWorkers()[worker.StateUnknown] == 0 {
+ break waiting
}
+ case <-timeout.C:
+ // Give up.
+ break waiting
+ }
+
+ }
+
+ for _, uuid := range stale {
+ err := sch.queue.Unlock(uuid)
+ if err != nil {
+ sch.logger.Warnf("Unlock %s: %s", uuid, err)
}
- return
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list