[ARVADOS] updated: 1.2.0-221-gafe579c7b
Git user
git at public.curoverse.com
Thu Oct 25 17:45:43 EDT 2018
Summary of changes:
services/crunch-run/crunchrun.go | 42 ++++++++++++-----------------------
services/crunch-run/crunchrun_test.go | 14 +++++++-----
2 files changed, 22 insertions(+), 34 deletions(-)
via afe579c7b3c03b56fa8cc2c97f44134642a9458f (commit)
from 986a0a7dcb98af92de23fecd745bb23768fe4647 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit afe579c7b3c03b56fa8cc2c97f44134642a9458f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Oct 25 17:44:14 2018 -0400
14328: Check ContainerInspect instead of List, give up on error.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 39cf44082..d38808f46 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -79,7 +79,7 @@ type ThinDockerClient interface {
ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error
ContainerRemove(ctx context.Context, container string, options dockertypes.ContainerRemoveOptions) error
ContainerWait(ctx context.Context, container string, condition dockercontainer.WaitCondition) (<-chan dockercontainer.ContainerWaitOKBody, <-chan error)
- ContainerList(ctx context.Context, opts dockertypes.ContainerListOptions) ([]dockertypes.Container, error)
+ ContainerInspect(ctx context.Context, id string) (dockertypes.ContainerJSON, error)
ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error)
ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error)
ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
@@ -157,7 +157,7 @@ type ContainerRunner struct {
arvMountLog *ThrottledLogger
checkContainerd time.Duration
- containerWaitGracePeriod time.Duration
+ containerWatchdogInterval time.Duration
}
// setupSignals sets up signal handling to gracefully terminate the underlying
@@ -1134,38 +1134,24 @@ func (runner *ContainerRunner) WaitFinish() error {
containerGone := make(chan struct{})
go func() {
defer close(containerGone)
- if runner.containerWaitGracePeriod < 1 {
- runner.containerWaitGracePeriod = 30 * time.Second
+ if runner.containerWatchdogInterval < 1 {
+ runner.containerWatchdogInterval = time.Minute
}
- found := time.Now()
- polling:
- for range time.NewTicker(runner.containerWaitGracePeriod / 30).C {
- ctrs, err := runner.Docker.ContainerList(context.Background(), dockertypes.ContainerListOptions{})
- if err != nil {
- runner.CrunchLog.Printf("error checking container list: %s", err)
- if runner.checkBrokenNode(err) {
- return
- }
- continue polling
- }
- for _, ctr := range ctrs {
- if ctr.ID == runner.ContainerID {
- found = time.Now()
- continue polling
- }
- }
+ for range time.NewTicker(runner.containerWatchdogInterval).C {
+ ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(runner.containerWatchdogInterval))
+ ctr, err := runner.Docker.ContainerInspect(ctx, runner.ContainerID)
+ cancel()
runner.cStateLock.Lock()
done := runner.cRemoved || runner.ExitCode != nil
runner.cStateLock.Unlock()
if done {
- // Skip the grace period and warning
- // log if the container disappeared
- // because it finished, or we removed
- // it ourselves.
return
- }
- if time.Since(found) > runner.containerWaitGracePeriod {
- runner.CrunchLog.Printf("container %s no longer exists", runner.ContainerID)
+ } else if err != nil {
+ runner.CrunchLog.Printf("Error inspecting container: %s", err)
+ runner.checkBrokenNode(err)
+ return
+ } else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
+ runner.CrunchLog.Printf("Container is not running: State=%v", ctr.State)
return
}
}
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 0e48a50aa..eb4f220e2 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -178,12 +178,15 @@ func (t *TestDockerClient) ContainerWait(ctx context.Context, container string,
return body, err
}
-func (t *TestDockerClient) ContainerList(ctx context.Context, options dockertypes.ContainerListOptions) ([]dockertypes.Container, error) {
+func (t *TestDockerClient) ContainerInspect(ctx context.Context, id string) (c dockertypes.ContainerJSON, err error) {
+ c.ContainerJSONBase = &dockertypes.ContainerJSONBase{}
+ c.ID = "abcde"
if t.ctrExited {
- return nil, nil
+ c.State = &dockertypes.ContainerState{Status: "exited", Dead: true}
} else {
- return []dockertypes.Container{{ID: "abcde"}}, nil
+ c.State = &dockertypes.ContainerState{Status: "running", Pid: 1234, Running: true}
}
+ return
}
func (t *TestDockerClient) ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error) {
@@ -748,6 +751,7 @@ func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, exi
c.Assert(err, IsNil)
s.runner = cr
cr.statInterval = 100 * time.Millisecond
+ cr.containerWatchdogInterval = time.Second
am := &ArvMountCmdLine{}
cr.RunArvMount = am.ArvMountTest
@@ -850,15 +854,13 @@ func (s *TestSuite) TestContainerWaitFails(c *C) {
"output_path": "/tmp",
"priority": 1
}`, nil, 0, func(t *TestDockerClient) {
- s.runner.containerWaitGracePeriod = time.Second
t.ctrExited = true
time.Sleep(10 * time.Second)
t.logWriter.Close()
})
c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
- c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*container.*no longer exists.*")
- c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*docker client never returned status.*")
+ c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*Container is not running.*")
}
func (s *TestSuite) TestCrunchstat(c *C) {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list