[ARVADOS] updated: 1.2.0-221-gafe579c7b

Git user git at public.curoverse.com
Thu Oct 25 17:45:43 EDT 2018


Summary of changes:
 services/crunch-run/crunchrun.go      | 42 ++++++++++++-----------------------
 services/crunch-run/crunchrun_test.go | 14 +++++++-----
 2 files changed, 22 insertions(+), 34 deletions(-)

       via  afe579c7b3c03b56fa8cc2c97f44134642a9458f (commit)
      from  986a0a7dcb98af92de23fecd745bb23768fe4647 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit afe579c7b3c03b56fa8cc2c97f44134642a9458f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Oct 25 17:44:14 2018 -0400

    14328: Check ContainerInspect instead of List, give up on error.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 39cf44082..d38808f46 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -79,7 +79,7 @@ type ThinDockerClient interface {
 	ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error
 	ContainerRemove(ctx context.Context, container string, options dockertypes.ContainerRemoveOptions) error
 	ContainerWait(ctx context.Context, container string, condition dockercontainer.WaitCondition) (<-chan dockercontainer.ContainerWaitOKBody, <-chan error)
-	ContainerList(ctx context.Context, opts dockertypes.ContainerListOptions) ([]dockertypes.Container, error)
+	ContainerInspect(ctx context.Context, id string) (dockertypes.ContainerJSON, error)
 	ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error)
 	ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error)
 	ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
@@ -157,7 +157,7 @@ type ContainerRunner struct {
 	arvMountLog     *ThrottledLogger
 	checkContainerd time.Duration
 
-	containerWaitGracePeriod time.Duration
+	containerWatchdogInterval time.Duration
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -1134,38 +1134,24 @@ func (runner *ContainerRunner) WaitFinish() error {
 	containerGone := make(chan struct{})
 	go func() {
 		defer close(containerGone)
-		if runner.containerWaitGracePeriod < 1 {
-			runner.containerWaitGracePeriod = 30 * time.Second
+		if runner.containerWatchdogInterval < 1 {
+			runner.containerWatchdogInterval = time.Minute
 		}
-		found := time.Now()
-	polling:
-		for range time.NewTicker(runner.containerWaitGracePeriod / 30).C {
-			ctrs, err := runner.Docker.ContainerList(context.Background(), dockertypes.ContainerListOptions{})
-			if err != nil {
-				runner.CrunchLog.Printf("error checking container list: %s", err)
-				if runner.checkBrokenNode(err) {
-					return
-				}
-				continue polling
-			}
-			for _, ctr := range ctrs {
-				if ctr.ID == runner.ContainerID {
-					found = time.Now()
-					continue polling
-				}
-			}
+		for range time.NewTicker(runner.containerWatchdogInterval).C {
+			ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(runner.containerWatchdogInterval))
+			ctr, err := runner.Docker.ContainerInspect(ctx, runner.ContainerID)
+			cancel()
 			runner.cStateLock.Lock()
 			done := runner.cRemoved || runner.ExitCode != nil
 			runner.cStateLock.Unlock()
 			if done {
-				// Skip the grace period and warning
-				// log if the container disappeared
-				// because it finished, or we removed
-				// it ourselves.
 				return
-			}
-			if time.Since(found) > runner.containerWaitGracePeriod {
-				runner.CrunchLog.Printf("container %s no longer exists", runner.ContainerID)
+			} else if err != nil {
+				runner.CrunchLog.Printf("Error inspecting container: %s", err)
+				runner.checkBrokenNode(err)
+				return
+			} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
+				runner.CrunchLog.Printf("Container is not running: State=%v", ctr.State)
 				return
 			}
 		}
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 0e48a50aa..eb4f220e2 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -178,12 +178,15 @@ func (t *TestDockerClient) ContainerWait(ctx context.Context, container string,
 	return body, err
 }
 
-func (t *TestDockerClient) ContainerList(ctx context.Context, options dockertypes.ContainerListOptions) ([]dockertypes.Container, error) {
+func (t *TestDockerClient) ContainerInspect(ctx context.Context, id string) (c dockertypes.ContainerJSON, err error) {
+	c.ContainerJSONBase = &dockertypes.ContainerJSONBase{}
+	c.ID = "abcde"
 	if t.ctrExited {
-		return nil, nil
+		c.State = &dockertypes.ContainerState{Status: "exited", Dead: true}
 	} else {
-		return []dockertypes.Container{{ID: "abcde"}}, nil
+		c.State = &dockertypes.ContainerState{Status: "running", Pid: 1234, Running: true}
 	}
+	return
 }
 
 func (t *TestDockerClient) ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error) {
@@ -748,6 +751,7 @@ func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, exi
 	c.Assert(err, IsNil)
 	s.runner = cr
 	cr.statInterval = 100 * time.Millisecond
+	cr.containerWatchdogInterval = time.Second
 	am := &ArvMountCmdLine{}
 	cr.RunArvMount = am.ArvMountTest
 
@@ -850,15 +854,13 @@ func (s *TestSuite) TestContainerWaitFails(c *C) {
     "output_path": "/tmp",
     "priority": 1
 }`, nil, 0, func(t *TestDockerClient) {
-		s.runner.containerWaitGracePeriod = time.Second
 		t.ctrExited = true
 		time.Sleep(10 * time.Second)
 		t.logWriter.Close()
 	})
 
 	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
-	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*container.*no longer exists.*")
-	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*docker client never returned status.*")
+	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*Container is not running.*")
 }
 
 func (s *TestSuite) TestCrunchstat(c *C) {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list