[arvados] created: 2.1.0-2865-g9c254acbd
git repository hosting
git at public.arvados.org
Fri Sep 2 14:08:03 UTC 2022
at 9c254acbd78ed50e1e9fec508fb9ec4164867dda (commit)
commit 9c254acbd78ed50e1e9fec508fb9ec4164867dda
Author: Tom Clegg <tom at curii.com>
Date: Fri Sep 2 10:06:03 2022 -0400
19437: Don't cancel until 3 consecutive docker-inspect failures.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/crunchrun/docker.go b/lib/crunchrun/docker.go
index 54d0e680f..bb6352658 100644
--- a/lib/crunchrun/docker.go
+++ b/lib/crunchrun/docker.go
@@ -31,6 +31,11 @@ const minDockerRAM = int64(16 * 1024 * 1024)
// https://docs.docker.com/engine/api/.
const DockerAPIVersion = "1.35"
+// Number of consecutive "inspect container" failures before
+// concluding Docker is unresponsive, giving up, and cancelling the
+// container.
+const dockerWatchdogThreshold = 3
+
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
@@ -225,17 +230,17 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
// kill it.
return
} else if err != nil {
- e.logf("Error inspecting container: %s", err)
- watchdogErr <- err
- return
+ watchdogErr <- fmt.Errorf("error inspecting container: %s", err)
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
- watchdogErr <- fmt.Errorf("Container is not running: State=%v", ctr.State)
- return
+ watchdogErr <- fmt.Errorf("container is not running: State=%v", ctr.State)
+ } else {
+ watchdogErr <- nil
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
+ errors := 0
for {
select {
case waitBody := <-waitOk:
@@ -250,7 +255,16 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
return -1, ctx.Err()
case err := <-watchdogErr:
- return -1, err
+ if err == nil {
+ errors = 0
+ } else {
+ e.logf("docker watchdog: %s", err)
+ errors++
+ if errors >= dockerWatchdogThreshold {
+ e.logf("docker watchdog: giving up")
+ return -1, err
+ }
+ }
}
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list