[ARVADOS] created: 1.1.4-562-g08540a58a

Git user git at public.curoverse.com
Tue Jul 10 14:14:41 EDT 2018


        at  08540a58a67f2d575b1e15338c6d49b8b74e2c38 (commit)


commit 08540a58a67f2d575b1e15338c6d49b8b74e2c38
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Jul 10 14:12:32 2018 -0400

    12983: Command line option takes duration.  Add tests.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 490d2cf48..fc206a91c 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -84,6 +84,10 @@ type ThinDockerClient interface {
 	ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
 }
 
+type PsProcess interface {
+	CmdlineSlice() ([]string, error)
+}
+
 // ContainerRunner is the main stateful struct used for a single execution of a
 // container.
 type ContainerRunner struct {
@@ -119,6 +123,8 @@ type ContainerRunner struct {
 	finalState    string
 	parentTemp    string
 
+	ListProcesses func() ([]PsProcess, error)
+
 	statLogger       io.WriteCloser
 	statReporter     *crunchstat.Reporter
 	hoststatLogger   io.WriteCloser
@@ -145,7 +151,7 @@ type ContainerRunner struct {
 	enableNetwork   string // one of "default" or "always"
 	networkMode     string // passed through to HostConfig.NetworkMode
 	arvMountLog     *ThrottledLogger
-	checkContainerd bool
+	checkContainerd time.Duration
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -1007,6 +1013,10 @@ func (runner *ContainerRunner) CreateContainer() error {
 	runner.ContainerConfig.Volumes = runner.Volumes
 
 	maxRAM := int64(runner.Container.RuntimeConstraints.RAM)
+	if maxRAM < 4*1024*1024 {
+		// Docker daemon won't let you set a limit less than 4 MiB
+		maxRAM = 4 * 1024 * 1024
+	}
 	runner.HostConfig = dockercontainer.HostConfig{
 		Binds: runner.Binds,
 		LogConfig: dockercontainer.LogConfig{
@@ -1079,10 +1089,10 @@ func (runner *ContainerRunner) StartContainer() error {
 
 // checkContainerd checks if "containerd" is present in the process list.
 func (runner *ContainerRunner) CheckContainerd() error {
-	if !runner.checkContainerd {
+	if runner.checkContainerd == 0 {
 		return nil
 	}
-	p, _ := process.Processes()
+	p, _ := runner.ListProcesses()
 	for _, i := range p {
 		e, _ := i.CmdlineSlice()
 		if len(e) > 0 {
@@ -1114,21 +1124,23 @@ func (runner *ContainerRunner) WaitFinish() error {
 	defer func() {
 		close(containerdGone)
 	}()
-	go func() {
-		ticker := time.NewTicker(time.Duration(60 * time.Second))
-		defer ticker.Stop()
-		for {
-			select {
-			case <-ticker.C:
-				if ck := runner.CheckContainerd(); ck != nil {
-					containerdGone <- ck
-					return
+	if runner.checkContainerd > 0 {
+		go func() {
+			ticker := time.NewTicker(time.Duration(runner.checkContainerd))
+			defer ticker.Stop()
+			for {
+				select {
+				case <-ticker.C:
+					if ck := runner.CheckContainerd(); ck != nil {
+						containerdGone <- ck
+						return
+					}
+				case <-containerdGone:
+					break
 				}
-			case <-containerdGone:
-				break
 			}
-		}
-	}()
+		}()
+	}
 
 	for {
 		select {
@@ -1584,6 +1596,17 @@ func NewContainerRunner(client *arvados.Client, api IArvadosClient, kc IKeepClie
 	cr.NewLogWriter = cr.NewArvLogWriter
 	cr.RunArvMount = cr.ArvMountCmd
 	cr.MkTempDir = ioutil.TempDir
+	cr.ListProcesses = func() ([]PsProcess, error) {
+		pr, err := process.Processes()
+		if err != nil {
+			return nil, err
+		}
+		ps := make([]PsProcess, len(pr))
+		for i, j := range pr {
+			ps[i] = j
+		}
+		return ps, nil
+	}
 	cr.MkArvClient = func(token string) (IArvadosClient, error) {
 		cl, err := arvadosclient.MakeArvadosClient()
 		if err != nil {
@@ -1626,7 +1649,7 @@ func main() {
     	`)
 	memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
 	getVersion := flag.Bool("version", false, "Print version information and exit.")
-	checkContainerd := flag.Bool("check-containerd", false, "Periodically check if (docker-)containerd is running, cancel if missing.")
+	checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running, period in seconds.")
 	flag.Parse()
 
 	// Print version information if requested
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 8ad487d77..8d8e04000 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -2067,3 +2067,49 @@ func (s *TestSuite) TestSecretTextMountPoint(c *C) {
 	c.Check(api.CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
 	c.Check(api.CalledWith("collection.manifest_text", ""), NotNil)
 }
+
+type FakeProcess struct {
+	cmdLine []string
+}
+
+func (fp FakeProcess) CmdlineSlice() ([]string, error) {
+	return fp.cmdLine, nil
+}
+
+func (s *TestSuite) helpCheckContainerd(c *C, lp func() ([]PsProcess, error)) error {
+	kc := &KeepTestClient{}
+	defer kc.Close()
+	cr, err := NewContainerRunner(s.client, &ArvTestClient{callraw: true}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+	cr.checkContainerd = time.Duration(100 * time.Millisecond)
+	c.Assert(err, IsNil)
+	cr.ListProcesses = lp
+
+	s.docker.fn = func(t *TestDockerClient) {
+		time.Sleep(1 * time.Second)
+		t.logWriter.Close()
+	}
+
+	err = cr.CreateContainer()
+	c.Check(err, IsNil)
+
+	err = cr.StartContainer()
+	c.Check(err, IsNil)
+
+	err = cr.WaitFinish()
+	return err
+
+}
+
+func (s *TestSuite) TestCheckContainerdPresent(c *C) {
+	err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
+		return []PsProcess{FakeProcess{[]string{"docker-containerd"}}}, nil
+	})
+	c.Check(err, IsNil)
+}
+
+func (s *TestSuite) TestCheckContainerdMissing(c *C) {
+	err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
+		return []PsProcess{FakeProcess{[]string{"abc"}}}, nil
+	})
+	c.Check(err, ErrorMatches, `'containerd' not found in process list.`)
+}

commit 6b495b26e49bb32eccaddffea36d91f34d3ba6f8
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Jul 10 09:49:17 2018 -0400

    12983: Periodically check for containerd
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index adce853a5..490d2cf48 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -32,6 +32,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
+	"github.com/shirou/gopsutil/process"
 	"golang.org/x/net/context"
 
 	dockertypes "github.com/docker/docker/api/types"
@@ -141,9 +142,10 @@ type ContainerRunner struct {
 	cStateLock sync.Mutex
 	cCancelled bool // StopContainer() invoked
 
-	enableNetwork string // one of "default" or "always"
-	networkMode   string // passed through to HostConfig.NetworkMode
-	arvMountLog   *ThrottledLogger
+	enableNetwork   string // one of "default" or "always"
+	networkMode     string // passed through to HostConfig.NetworkMode
+	arvMountLog     *ThrottledLogger
+	checkContainerd bool
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -185,23 +187,27 @@ var errorBlacklist = []string{
 }
 var brokenNodeHook *string = flag.String("broken-node-hook", "", "Script to run if node is detected to be broken (for example, Docker daemon is not running)")
 
+func (runner *ContainerRunner) runBrokenNodeHook() {
+	if *brokenNodeHook == "" {
+		runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
+	} else {
+		runner.CrunchLog.Printf("Running broken node hook %q", *brokenNodeHook)
+		// run killme script
+		c := exec.Command(*brokenNodeHook)
+		c.Stdout = runner.CrunchLog
+		c.Stderr = runner.CrunchLog
+		err := c.Run()
+		if err != nil {
+			runner.CrunchLog.Printf("Error running broken node hook: %v", err)
+		}
+	}
+}
+
 func (runner *ContainerRunner) checkBrokenNode(goterr error) bool {
 	for _, d := range errorBlacklist {
 		if m, e := regexp.MatchString(d, goterr.Error()); m && e == nil {
 			runner.CrunchLog.Printf("Error suggests node is unable to run containers: %v", goterr)
-			if *brokenNodeHook == "" {
-				runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
-			} else {
-				runner.CrunchLog.Printf("Running broken node hook %q", *brokenNodeHook)
-				// run killme script
-				c := exec.Command(*brokenNodeHook)
-				c.Stdout = runner.CrunchLog
-				c.Stderr = runner.CrunchLog
-				err := c.Run()
-				if err != nil {
-					runner.CrunchLog.Printf("Error running broken node hook: %v", err)
-				}
-			}
+			runner.runBrokenNodeHook()
 			return true
 		}
 	}
@@ -1071,6 +1077,27 @@ func (runner *ContainerRunner) StartContainer() error {
 	return nil
 }
 
+// checkContainerd checks if "containerd" is present in the process list.
+func (runner *ContainerRunner) CheckContainerd() error {
+	if !runner.checkContainerd {
+		return nil
+	}
+	p, _ := process.Processes()
+	for _, i := range p {
+		e, _ := i.CmdlineSlice()
+		if len(e) > 0 {
+			if strings.Index(e[0], "containerd") > -1 {
+				return nil
+			}
+		}
+	}
+
+	// Not found
+	runner.runBrokenNodeHook()
+	runner.stop(nil)
+	return fmt.Errorf("'containerd' not found in process list.")
+}
+
 // WaitFinish waits for the container to terminate, capture the exit code, and
 // close the stdout/stderr logging.
 func (runner *ContainerRunner) WaitFinish() error {
@@ -1082,6 +1109,27 @@ func (runner *ContainerRunner) WaitFinish() error {
 	if timeout := runner.Container.SchedulingParameters.MaxRunTime; timeout > 0 {
 		runTimeExceeded = time.After(time.Duration(timeout) * time.Second)
 	}
+
+	containerdGone := make(chan error)
+	defer func() {
+		close(containerdGone)
+	}()
+	go func() {
+		ticker := time.NewTicker(time.Duration(60 * time.Second))
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ticker.C:
+				if ck := runner.CheckContainerd(); ck != nil {
+					containerdGone <- ck
+					return
+				}
+			case <-containerdGone:
+				break
+			}
+		}
+	}()
+
 	for {
 		select {
 		case waitBody := <-waitOk:
@@ -1107,6 +1155,9 @@ func (runner *ContainerRunner) WaitFinish() error {
 			runner.CrunchLog.Printf("maximum run time exceeded. Stopping container.")
 			runner.stop(nil)
 			runTimeExceeded = nil
+
+		case err := <-containerdGone:
+			return err
 		}
 	}
 }
@@ -1408,6 +1459,12 @@ func (runner *ContainerRunner) Run() (err error) {
 		return
 	}
 
+	// Sanity check that containerd is running.
+	err = runner.CheckContainerd()
+	if err != nil {
+		return
+	}
+
 	// check for and/or load image
 	err = runner.LoadImage()
 	if err != nil {
@@ -1569,6 +1626,7 @@ func main() {
     	`)
 	memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
 	getVersion := flag.Bool("version", false, "Print version information and exit.")
+	checkContainerd := flag.Bool("check-containerd", false, "Periodically check if (docker-)containerd is running, cancel if missing.")
 	flag.Parse()
 
 	// Print version information if requested
@@ -1624,6 +1682,7 @@ func main() {
 	cr.expectCgroupParent = *cgroupParent
 	cr.enableNetwork = *enableNetwork
 	cr.networkMode = *networkMode
+	cr.checkContainerd = *checkContainerd
 	if *cgroupParentSubsystem != "" {
 		p := findCgroup(*cgroupParentSubsystem)
 		cr.setCgroupParent = p
diff --git a/services/crunch-run/logging_test.go b/services/crunch-run/logging_test.go
index 86f8cec04..13a171ae8 100644
--- a/services/crunch-run/logging_test.go
+++ b/services/crunch-run/logging_test.go
@@ -83,7 +83,7 @@ func (s *LoggingTestSuite) TestWriteLogsLarge(c *C) {
 	cr.CrunchLog.Print("Goodbye")
 	cr.CrunchLog.Close()
 
-	c.Check(api.Calls > 1, Equals, true)
+	c.Check(api.Calls > 0, Equals, true)
 	c.Check(api.Calls < 2000000, Equals, true)
 
 	mt, err := cr.LogCollection.MarshalManifest(".")
diff --git a/vendor/vendor.json b/vendor/vendor.json
index a4f750b4c..f18d4e464 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -72,6 +72,12 @@
 			"revisionTime": "2017-12-05T20:32:29Z"
 		},
 		{
+			"checksumSHA1": "st4vb0GmDeoKbsfxdpNZ2MPl76M=",
+			"path": "github.com/StackExchange/wmi",
+			"revision": "cdffdb33acae0e14efff2628f9bae377b597840e",
+			"revisionTime": "2018-04-12T20:51:11Z"
+		},
+		{
 			"checksumSHA1": "spyv5/YFBjYyZLZa1U2LBfDR8PM=",
 			"path": "github.com/beorn7/perks/quantile",
 			"revision": "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9",
@@ -234,6 +240,18 @@
 			"revisionTime": "2017-03-27T23:54:44Z"
 		},
 		{
+			"checksumSHA1": "Kqv7bA4oJG0nPwQvGWDwGGaKONo=",
+			"path": "github.com/go-ole/go-ole",
+			"revision": "7a0fa49edf48165190530c675167e2f319a05268",
+			"revisionTime": "2018-06-25T08:58:08Z"
+		},
+		{
+			"checksumSHA1": "PArleDBtadu2qO4hJwHR8a3IOTA=",
+			"path": "github.com/go-ole/go-ole/oleutil",
+			"revision": "7a0fa49edf48165190530c675167e2f319a05268",
+			"revisionTime": "2018-06-25T08:58:08Z"
+		},
+		{
 			"checksumSHA1": "wn2shNJMwRZpvuvkf1s7h0wvqHI=",
 			"path": "github.com/gogo/protobuf/proto",
 			"revision": "160de10b2537169b5ae3e7e221d28269ef40d311",
@@ -427,6 +445,48 @@
 			"revisionTime": "2017-11-10T11:01:46Z"
 		},
 		{
+			"checksumSHA1": "q14d3C3xvWevU3dSv4P5K0+OSD0=",
+			"path": "github.com/shirou/gopsutil/cpu",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "LZ9GloiGLTISmQ4dalK2XspH6Wo=",
+			"path": "github.com/shirou/gopsutil/host",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "cyoqI0gryzjxGTkaAfyUqMiuUR0=",
+			"path": "github.com/shirou/gopsutil/internal/common",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "vEQLjAO5T5K9zXblEMYdoaBZzj0=",
+			"path": "github.com/shirou/gopsutil/mem",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "KMWFRa0DVpabo9d8euB4RYjUBQE=",
+			"path": "github.com/shirou/gopsutil/net",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "fbO7c1gv1kSvWKOb/+5HUWFkBaA=",
+			"path": "github.com/shirou/gopsutil/process",
+			"revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+			"revisionTime": "2018-07-05T13:28:12Z"
+		},
+		{
+			"checksumSHA1": "Nve7SpDmjsv6+rhkXAkfg/UQx94=",
+			"path": "github.com/shirou/w32",
+			"revision": "bb4de0191aa41b5507caa14b0650cdbddcd9280b",
+			"revisionTime": "2016-09-30T03:27:40Z"
+		},
+		{
 			"checksumSHA1": "8QeSG127zQqbA+YfkO1WkKx/iUI=",
 			"path": "github.com/src-d/gcfg",
 			"revision": "f187355171c936ac84a82793659ebb4936bc1c23",

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list