[ARVADOS] created: 1.1.1-109-gca71780

Git user git at public.curoverse.com
Fri Dec 1 11:26:06 EST 2017


        at  ca71780328ebc4c2e6d524f0b7a82d72c6036cc0 (commit)


commit ca71780328ebc4c2e6d524f0b7a82d72c6036cc0
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Fri Dec 1 11:23:23 2017 -0500

    12614: Additional error message triggers "broken node" behavior (refs #12503)
    
    Also make "executable not found" error message check more specific to avoid
    false positives which produce confusing messages.  Add tests.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index fc0dda7..d455dd2 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -19,6 +19,7 @@ import (
 	"os/signal"
 	"path"
 	"path/filepath"
+	"regexp"
 	"runtime"
 	"runtime/pprof"
 	"sort"
@@ -228,12 +229,15 @@ func (runner *ContainerRunner) stopSignals() {
 	}
 }
 
-var errorBlacklist = []string{"Cannot connect to the Docker daemon"}
+var errorBlacklist = []string{
+	"(?ms).*[Cc]annot connect to the Docker daemon.*",
+	"(?ms).*oci runtime error.*starting container process.*container init.*mounting.*to rootfs.*no such file or directory.*",
+}
 var brokenNodeHook *string = flag.String("broken-node-hook", "", "Script to run if node is detected to be broken (for example, Docker daemon is not running)")
 
 func (runner *ContainerRunner) checkBrokenNode(goterr error) bool {
 	for _, d := range errorBlacklist {
-		if strings.Index(goterr.Error(), d) != -1 {
+		if m, e := regexp.MatchString(d, goterr.Error()); m && e == nil {
 			runner.CrunchLog.Printf("Error suggests node is unable to run containers: %v", goterr)
 			if *brokenNodeHook == "" {
 				runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
@@ -915,7 +919,7 @@ func (runner *ContainerRunner) StartContainer() error {
 		dockertypes.ContainerStartOptions{})
 	if err != nil {
 		var advice string
-		if strings.Contains(err.Error(), "no such file or directory") {
+		if m, e := regexp.MatchString("(?ms).*(exec|System error).*(no such file or directory|file not found).*", err.Error()); m && e == nil {
 			advice = fmt.Sprintf("\nPossible causes: command %q is missing, the interpreter given in #! is missing, or script has Windows line endings.", runner.Container.Command[0])
 		}
 		return fmt.Errorf("could not start container: %v%s", err, advice)
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 97faa89..e1d9fed 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -130,6 +130,19 @@ func (t *TestDockerClient) ContainerCreate(ctx context.Context, config *dockerco
 }
 
 func (t *TestDockerClient) ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error {
+	if t.finish == 3 {
+		return errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
+	}
+	if t.finish == 4 {
+		return errors.New(`panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`)
+	}
+	if t.finish == 5 {
+		return errors.New(`Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`)
+	}
+	if t.finish == 6 {
+		return errors.New(`Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`)
+	}
+
 	if container == "abcde" {
 		// t.fn gets executed in ContainerWait
 		return nil
@@ -1835,3 +1848,91 @@ func (s *TestSuite) TestFullBrokenDocker2(c *C) {
 	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*unable to run containers.*")
 	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*No broken node hook.*")
 }
+
+func (s *TestSuite) TestFullBrokenDocker3(c *C) {
+	ech := ""
+	brokenNodeHook = &ech
+
+	api, _, _ := FullRunHelper(c, `{
+    "command": ["echo", "hello world"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {}
+}`, nil, 3, func(t *TestDockerClient) {
+		t.logWriter.Write(dockerLog(1, "hello world\n"))
+		t.logWriter.Close()
+	})
+
+	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
+	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*unable to run containers.*")
+}
+
+func (s *TestSuite) TestBadCommand1(c *C) {
+	ech := ""
+	brokenNodeHook = &ech
+
+	api, _, _ := FullRunHelper(c, `{
+    "command": ["echo", "hello world"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {}
+}`, nil, 4, func(t *TestDockerClient) {
+		t.logWriter.Write(dockerLog(1, "hello world\n"))
+		t.logWriter.Close()
+	})
+
+	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
+	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*Possible causes:.*is missing.*")
+}
+
+func (s *TestSuite) TestBadCommand2(c *C) {
+	ech := ""
+	brokenNodeHook = &ech
+
+	api, _, _ := FullRunHelper(c, `{
+    "command": ["echo", "hello world"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {}
+}`, nil, 5, func(t *TestDockerClient) {
+		t.logWriter.Write(dockerLog(1, "hello world\n"))
+		t.logWriter.Close()
+	})
+
+	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
+	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*Possible causes:.*is missing.*")
+}
+
+func (s *TestSuite) TestBadCommand3(c *C) {
+	ech := ""
+	brokenNodeHook = &ech
+
+	api, _, _ := FullRunHelper(c, `{
+    "command": ["echo", "hello world"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {}
+}`, nil, 6, func(t *TestDockerClient) {
+		t.logWriter.Write(dockerLog(1, "hello world\n"))
+		t.logWriter.Close()
+	})
+
+	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
+	c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*Possible causes:.*is missing.*")
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list