[ARVADOS] created: ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11

Git user git at public.curoverse.com
Mon Aug 1 22:42:54 EDT 2016


        at  ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11 (commit)


commit ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Aug 1 22:18:51 2016 -0400

    9406: Add -cgroup-parent-subsystem argument.
    
    This accommodates setups where the scheduling system (e.g., SLURM)
    uses cgroups to impose resource limits on crunch-run itself, and those
    limits are meant to include resources used by the container itself.
    
    Normally, when docker creates a container, it creates new cgroups
    inside the "docker" cgroup, which circumvents any resource limits set
    on crunch-run's own cgroup.
    
    If a subsystem is specified (e.g., -cgroup-parent-subsystem=memory),
    crunch-run will look in /proc/self/cgroup to find the current cgroup
    for the given subsystem, and instruct docker to use that cgroup as the
    parent of the new container's cgroup instead.

diff --git a/services/crunch-run/cgroup.go b/services/crunch-run/cgroup.go
new file mode 100644
index 0000000..b304519
--- /dev/null
+++ b/services/crunch-run/cgroup.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+	"bytes"
+	"io/ioutil"
+	"log"
+)
+
+// Return the current process's cgroup for the given subsystem.
+func findCgroup(subsystem string) string {
+	subsys := []byte(subsystem)
+	cgroups, err := ioutil.ReadFile("/proc/self/cgroup")
+	if err != nil {
+		log.Fatal(err)
+	}
+	for _, line := range bytes.Split(cgroups, []byte("\n")) {
+		toks := bytes.SplitN(line, []byte(":"), 3)
+		if len(toks) == 3 && bytes.Compare(toks[1], subsys) == 0 {
+			return string(toks[2])
+		}
+	}
+	log.Fatalf("subsystem %q not found in /proc/self/cgroup", subsystem)
+	return ""
+}
diff --git a/services/crunch-run/cgroup_test.go b/services/crunch-run/cgroup_test.go
new file mode 100644
index 0000000..0b44f09
--- /dev/null
+++ b/services/crunch-run/cgroup_test.go
@@ -0,0 +1,17 @@
+package main
+
+import (
+	. "gopkg.in/check.v1"
+)
+
+type CgroupSuite struct{}
+
+var _ = Suite(&CgroupSuite{})
+
+func (s *CgroupSuite) TestFindCgroup(c *C) {
+	for _, s := range []string{"memory", "cpuset"} {
+		g := findCgroup(s)
+		c.Check(g, Not(Equals), "")
+		c.Logf("cgroup(%q) == %q", s, g)
+	}
+}
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 32d524a..40e9fc1 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -93,11 +93,12 @@ type ContainerRunner struct {
 	ArvMountExit   chan error
 	finalState     string
 
-	statLogger   io.WriteCloser
-	statReporter *crunchstat.Reporter
-	statInterval time.Duration
-	cgroupRoot   string
-	cgroupParent string
+	statLogger         io.WriteCloser
+	statReporter       *crunchstat.Reporter
+	statInterval       time.Duration
+	cgroupRoot         string
+	expectCgroupParent string
+	setCgroupParent    string
 }
 
 // SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -393,7 +394,7 @@ func (runner *ContainerRunner) StartCrunchstat() {
 	runner.statReporter = &crunchstat.Reporter{
 		CID:          runner.ContainerID,
 		Logger:       log.New(runner.statLogger, "", 0),
-		CgroupParent: runner.cgroupParent,
+		CgroupParent: runner.expectCgroupParent,
 		CgroupRoot:   runner.cgroupRoot,
 		PollPeriod:   runner.statInterval,
 	}
@@ -480,8 +481,13 @@ func (runner *ContainerRunner) CreateContainer() error {
 		return fmt.Errorf("While creating container: %v", err)
 	}
 
-	runner.HostConfig = dockerclient.HostConfig{Binds: runner.Binds,
-		LogConfig: dockerclient.LogConfig{Type: "none"}}
+	runner.HostConfig = dockerclient.HostConfig{
+		Binds:        runner.Binds,
+		CgroupParent: runner.setCgroupParent,
+		LogConfig: dockerclient.LogConfig{
+			Type: "none",
+		},
+	}
 
 	return runner.AttachStreams()
 }
@@ -823,7 +829,8 @@ func NewContainerRunner(api IArvadosClient,
 func main() {
 	statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
 	cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
-	cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup")
+	cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
+	cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
 	flag.Parse()
 
 	containerId := flag.Arg(0)
@@ -850,7 +857,12 @@ func main() {
 	cr := NewContainerRunner(api, kc, docker, containerId)
 	cr.statInterval = *statInterval
 	cr.cgroupRoot = *cgroupRoot
-	cr.cgroupParent = *cgroupParent
+	cr.expectCgroupParent = *cgroupParent
+	if *cgroupParentSubsystem != "" {
+		p := findCgroup(*cgroupParentSubsystem)
+		cr.setCgroupParent = p
+		cr.expectCgroupParent = p
+	}
 
 	err = cr.Run()
 	if err != nil {

commit 483ca35ac4348924cfbc187dab4f1b88a272eea7
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Aug 1 21:42:37 2016 -0400

    9406: Support passing additional arguments from crunch-dispatch-slurm to crunch-run.

diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 740df55..af2c42e 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -9,6 +9,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/dispatch"
+	"io"
 	"io/ioutil"
 	"log"
 	"math"
@@ -20,9 +21,14 @@ import (
 
 // Config used by crunch-dispatch-slurm
 type Config struct {
-	SbatchArguments  []string
-	PollPeriod       *time.Duration
-	CrunchRunCommand *string
+	SbatchArguments []string
+	PollPeriod      *time.Duration
+
+	// crunch-run command to invoke. The container UUID will be
+	// appended. If nil, []string{"crunch-run"} will be used.
+	//
+	// Example: []string{"crunch-run", "--cgroup-parent-subsystem=memory"}
+	CrunchRunCommand []string
 }
 
 func main() {
@@ -52,11 +58,6 @@ func doMain() error {
 		10*time.Second,
 		"Time duration to poll for queued containers")
 
-	config.CrunchRunCommand = flags.String(
-		"crunch-run-command",
-		"/usr/bin/crunch-run",
-		"Crunch command to run container")
-
 	// Parse args; omit the first arg which is the command name
 	flags.Parse(os.Args[1:])
 
@@ -66,6 +67,10 @@ func doMain() error {
 		return err
 	}
 
+	if config.CrunchRunCommand == nil {
+		config.CrunchRunCommand = []string{"crunch-run"}
+	}
+
 	arv, err := arvadosclient.MakeArvadosClient()
 	if err != nil {
 		log.Printf("Error making Arvados client: %v", err)
@@ -115,7 +120,7 @@ var scancelCmd = scancelFunc
 
 // Submit job to slurm using sbatch.
 func submit(dispatcher *dispatch.Dispatcher,
-	container arvados.Container, crunchRunCommand string) (submitErr error) {
+	container arvados.Container, crunchRunCommand []string) (submitErr error) {
 	defer func() {
 		// If we didn't get as far as submitting a slurm job,
 		// unlock the container and return it to the queue.
@@ -178,7 +183,7 @@ func submit(dispatcher *dispatch.Dispatcher,
 
 	// Send a tiny script on stdin to execute the crunch-run command
 	// slurm actually enforces that this must be a #! script
-	fmt.Fprintf(stdinWriter, "#!/bin/sh\nexec '%s' '%s'\n", crunchRunCommand, container.UUID)
+	io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID)))
 	stdinWriter.Close()
 
 	err = cmd.Wait()
@@ -215,7 +220,7 @@ func monitorSubmitOrCancel(dispatcher *dispatch.Dispatcher, container arvados.Co
 
 			log.Printf("About to submit queued container %v", container.UUID)
 
-			if err := submit(dispatcher, container, *config.CrunchRunCommand); err != nil {
+			if err := submit(dispatcher, container, config.CrunchRunCommand); err != nil {
 				log.Printf("Error submitting container %s to slurm: %v",
 					container.UUID, err)
 				// maybe sbatch is broken, put it back to queued
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index a559298..6692f7f 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -143,8 +143,7 @@ func (s *TestSuite) integrationTest(c *C,
 	c.Check(err, IsNil)
 	c.Check(len(containers.Items), Equals, 1)
 
-	echo := "echo"
-	config.CrunchRunCommand = &echo
+	config.CrunchRunCommand = []string{"echo"}
 
 	doneProcessing := make(chan struct{})
 	dispatcher := dispatch.Dispatcher{
@@ -206,7 +205,7 @@ func testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubRespon
 	log.SetOutput(io.MultiWriter(buf, os.Stderr))
 	defer log.SetOutput(os.Stderr)
 
-	config.CrunchRunCommand = &crunchCmd
+	config.CrunchRunCommand = []string{crunchCmd}
 
 	doneProcessing := make(chan struct{})
 	dispatcher := dispatch.Dispatcher{
diff --git a/services/crunch-dispatch-slurm/script.go b/services/crunch-dispatch-slurm/script.go
new file mode 100644
index 0000000..93ae6b5
--- /dev/null
+++ b/services/crunch-dispatch-slurm/script.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+	"strings"
+)
+
+func execScript(args []string) string {
+	s := "#!/bin/sh\nexec"
+	for _, w := range args {
+		s += ` '`
+		s += strings.Replace(w, `'`, `'\''`, -1)
+		s += `'`
+	}
+	return s + "\n"
+}
diff --git a/services/crunch-dispatch-slurm/script_test.go b/services/crunch-dispatch-slurm/script_test.go
new file mode 100644
index 0000000..3cb407d
--- /dev/null
+++ b/services/crunch-dispatch-slurm/script_test.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+	. "gopkg.in/check.v1"
+)
+
+var _ = Suite(&ScriptSuite{})
+
+type ScriptSuite struct{}
+
+func (s *ScriptSuite) TestExecScript(c *C) {
+	for _, test := range []struct {
+		args   []string
+		script string
+	}{
+		{nil, `exec`},
+		{[]string{`foo`}, `exec 'foo'`},
+		{[]string{`foo`, `bar baz`}, `exec 'foo' 'bar baz'`},
+		{[]string{`foo"`, "'waz 'qux\n"}, `exec 'foo"' ''\''waz '\''qux` + "\n" + `'`},
+	} {
+		c.Logf("%+v -> %+v", test.args, test.script)
+		c.Check(execScript(test.args), Equals, "#!/bin/sh\n"+test.script+"\n")
+	}
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list