[ARVADOS] created: 1.3.0-2844-g3aa3fb78a
Git user
git at public.arvados.org
Tue Aug 4 20:09:09 UTC 2020
at 3aa3fb78afa46e98c9be345045f4fea9fea0f08c (commit)
commit 3aa3fb78afa46e98c9be345045f4fea9fea0f08c
Author: Tom Clegg <tom at tomclegg.ca>
Date: Tue Aug 4 16:06:29 2020 -0400
16663: Don't kill orphaned containers when unprobed nodes exist.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at tomclegg.ca>
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
index de69df982..116ca7643 100644
--- a/lib/dispatchcloud/scheduler/sync.go
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -8,6 +8,7 @@ import (
"fmt"
"git.arvados.org/arvados.git/lib/dispatchcloud/container"
+ "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
"git.arvados.org/arvados.git/sdk/go/arvados"
"github.com/sirupsen/logrus"
)
@@ -23,6 +24,7 @@ import (
// Running containers whose crunch-run processes have exited are
// cancelled.
func (sch *Scheduler) sync() {
+ anyUnknownWorkers := sch.pool.CountWorkers()[worker.StateUnknown] > 0
running := sch.pool.Running()
qEntries, qUpdated := sch.queue.Entries()
for uuid, ent := range qEntries {
@@ -30,7 +32,9 @@ func (sch *Scheduler) sync() {
switch ent.Container.State {
case arvados.ContainerStateRunning:
if !running {
- go sch.cancel(uuid, "not running on any worker")
+ if !anyUnknownWorkers {
+ go sch.cancel(uuid, "not running on any worker")
+ }
} else if !exited.IsZero() && qUpdated.After(exited) {
go sch.cancel(uuid, "state=Running after crunch-run exited")
} else if ent.Container.Priority == 0 {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list