[arvados] updated: 2.4.2-22-g8ad66154d
git repository hosting
git at public.arvados.org
Fri Sep 16 15:12:31 UTC 2022
Summary of changes:
lib/crunchrun/docker.go | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
via 8ad66154df528ad2020e80bc255896537f1c712a (commit)
from 6d04694d27d32591404310be790a212f9804142a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 8ad66154df528ad2020e80bc255896537f1c712a
Author: Tom Clegg <tom at curii.com>
Date: Fri Sep 2 10:06:03 2022 -0400
19437: Don't cancel until 3 consecutive docker-inspect failures.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/crunchrun/docker.go b/lib/crunchrun/docker.go
index eee8f1d76..7d8f312a8 100644
--- a/lib/crunchrun/docker.go
+++ b/lib/crunchrun/docker.go
@@ -23,6 +23,11 @@ import (
// Docker daemon won't let you set a limit less than ~10 MiB
const minDockerRAM = int64(16 * 1024 * 1024)
+// Number of consecutive "inspect container" failures before
+// concluding Docker is unresponsive, giving up, and cancelling the
+// container.
+const dockerWatchdogThreshold = 3
+
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
@@ -217,17 +222,17 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
// kill it.
return
} else if err != nil {
- e.logf("Error inspecting container: %s", err)
- watchdogErr <- err
- return
+ watchdogErr <- fmt.Errorf("error inspecting container: %s", err)
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
- watchdogErr <- fmt.Errorf("Container is not running: State=%v", ctr.State)
- return
+ watchdogErr <- fmt.Errorf("container is not running: State=%v", ctr.State)
+ } else {
+ watchdogErr <- nil
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
+ errors := 0
for {
select {
case waitBody := <-waitOk:
@@ -242,7 +247,16 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
return -1, ctx.Err()
case err := <-watchdogErr:
- return -1, err
+ if err == nil {
+ errors = 0
+ } else {
+ e.logf("docker watchdog: %s", err)
+ errors++
+ if errors >= dockerWatchdogThreshold {
+ e.logf("docker watchdog: giving up")
+ return -1, err
+ }
+ }
}
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list