[ARVADOS] created: 1.3.0-2953-g26519f015

Git user git at public.arvados.org
Mon Aug 24 14:25:40 UTC 2020


        at  26519f015ebbd7e7b4ef288d4e89d877ea05c0ec (commit)


commit 26519f015ebbd7e7b4ef288d4e89d877ea05c0ec
Author: Tom Clegg <tom at tomclegg.ca>
Date:   Mon Aug 24 10:20:32 2020 -0400

    16723: Don't lock after requeue until old crunch-run exits.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at tomclegg.ca>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index dddb974b3..d77dcee94 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -51,6 +51,10 @@ tryrun:
 				overquota = sorted[i:]
 				break tryrun
 			}
+			if sch.pool.KillContainer(ctr.UUID, "about to lock") {
+				logger.Info("not locking: crunch-run process from previous attempt has not exited")
+				continue
+			}
 			go sch.lockContainer(logger, ctr.UUID)
 			unalloc[it]--
 		case arvados.ContainerStateLocked:
@@ -88,7 +92,7 @@ tryrun:
 				// a higher-priority container on the
 				// same instance type. Don't let this
 				// one sneak in ahead of it.
-			} else if sch.pool.KillContainer(ctr.UUID, "about to lock") {
+			} else if sch.pool.KillContainer(ctr.UUID, "about to start") {
 				logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
 			} else if sch.pool.StartContainer(it, ctr) {
 				// Success.

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list