[ARVADOS] created: ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11
Git user
git at public.curoverse.com
Mon Aug 1 22:42:54 EDT 2016
at ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11 (commit)
commit ac783a2ebf1bc0bcbdb294648a8f929df9d4ae11
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Aug 1 22:18:51 2016 -0400
9406: Add -cgroup-parent-subsystem argument.
This accommodates setups where the scheduling system (e.g., SLURM)
uses cgroups to impose resource limits on crunch-run itself, and those
limits are meant to include resources used by the container itself.
Normally, when docker creates a container, it creates new cgroups
inside the "docker" cgroup, which circumvents any resource limits set
on crunch-run's own cgroup.
If a subsystem is specified (e.g., -cgroup-parent-subsystem=memory),
crunch-run will look in /proc/self/cgroup to find the current cgroup
for the given subsystem, and instruct docker to use that cgroup as the
parent of the new container's cgroup instead.
diff --git a/services/crunch-run/cgroup.go b/services/crunch-run/cgroup.go
new file mode 100644
index 0000000..b304519
--- /dev/null
+++ b/services/crunch-run/cgroup.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+ "bytes"
+ "io/ioutil"
+ "log"
+)
+
+// Return the current process's cgroup for the given subsystem.
+func findCgroup(subsystem string) string {
+ subsys := []byte(subsystem)
+ cgroups, err := ioutil.ReadFile("/proc/self/cgroup")
+ if err != nil {
+ log.Fatal(err)
+ }
+ for _, line := range bytes.Split(cgroups, []byte("\n")) {
+ toks := bytes.SplitN(line, []byte(":"), 3)
+ if len(toks) == 3 && bytes.Compare(toks[1], subsys) == 0 {
+ return string(toks[2])
+ }
+ }
+ log.Fatalf("subsystem %q not found in /proc/self/cgroup", subsystem)
+ return ""
+}
diff --git a/services/crunch-run/cgroup_test.go b/services/crunch-run/cgroup_test.go
new file mode 100644
index 0000000..0b44f09
--- /dev/null
+++ b/services/crunch-run/cgroup_test.go
@@ -0,0 +1,17 @@
+package main
+
+import (
+ . "gopkg.in/check.v1"
+)
+
+type CgroupSuite struct{}
+
+var _ = Suite(&CgroupSuite{})
+
+func (s *CgroupSuite) TestFindCgroup(c *C) {
+ for _, s := range []string{"memory", "cpuset"} {
+ g := findCgroup(s)
+ c.Check(g, Not(Equals), "")
+ c.Logf("cgroup(%q) == %q", s, g)
+ }
+}
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 32d524a..40e9fc1 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -93,11 +93,12 @@ type ContainerRunner struct {
ArvMountExit chan error
finalState string
- statLogger io.WriteCloser
- statReporter *crunchstat.Reporter
- statInterval time.Duration
- cgroupRoot string
- cgroupParent string
+ statLogger io.WriteCloser
+ statReporter *crunchstat.Reporter
+ statInterval time.Duration
+ cgroupRoot string
+ expectCgroupParent string
+ setCgroupParent string
}
// SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -393,7 +394,7 @@ func (runner *ContainerRunner) StartCrunchstat() {
runner.statReporter = &crunchstat.Reporter{
CID: runner.ContainerID,
Logger: log.New(runner.statLogger, "", 0),
- CgroupParent: runner.cgroupParent,
+ CgroupParent: runner.expectCgroupParent,
CgroupRoot: runner.cgroupRoot,
PollPeriod: runner.statInterval,
}
@@ -480,8 +481,13 @@ func (runner *ContainerRunner) CreateContainer() error {
return fmt.Errorf("While creating container: %v", err)
}
- runner.HostConfig = dockerclient.HostConfig{Binds: runner.Binds,
- LogConfig: dockerclient.LogConfig{Type: "none"}}
+ runner.HostConfig = dockerclient.HostConfig{
+ Binds: runner.Binds,
+ CgroupParent: runner.setCgroupParent,
+ LogConfig: dockerclient.LogConfig{
+ Type: "none",
+ },
+ }
return runner.AttachStreams()
}
@@ -823,7 +829,8 @@ func NewContainerRunner(api IArvadosClient,
func main() {
statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
- cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup")
+ cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
+ cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
flag.Parse()
containerId := flag.Arg(0)
@@ -850,7 +857,12 @@ func main() {
cr := NewContainerRunner(api, kc, docker, containerId)
cr.statInterval = *statInterval
cr.cgroupRoot = *cgroupRoot
- cr.cgroupParent = *cgroupParent
+ cr.expectCgroupParent = *cgroupParent
+ if *cgroupParentSubsystem != "" {
+ p := findCgroup(*cgroupParentSubsystem)
+ cr.setCgroupParent = p
+ cr.expectCgroupParent = p
+ }
err = cr.Run()
if err != nil {
commit 483ca35ac4348924cfbc187dab4f1b88a272eea7
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Aug 1 21:42:37 2016 -0400
9406: Support passing additional arguments from crunch-dispatch-slurm to crunch-run.
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 740df55..af2c42e 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -9,6 +9,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/dispatch"
+ "io"
"io/ioutil"
"log"
"math"
@@ -20,9 +21,14 @@ import (
// Config used by crunch-dispatch-slurm
type Config struct {
- SbatchArguments []string
- PollPeriod *time.Duration
- CrunchRunCommand *string
+ SbatchArguments []string
+ PollPeriod *time.Duration
+
+ // crunch-run command to invoke. The container UUID will be
+ // appended. If nil, []string{"crunch-run"} will be used.
+ //
+ // Example: []string{"crunch-run", "--cgroup-parent-subsystem=memory"}
+ CrunchRunCommand []string
}
func main() {
@@ -52,11 +58,6 @@ func doMain() error {
10*time.Second,
"Time duration to poll for queued containers")
- config.CrunchRunCommand = flags.String(
- "crunch-run-command",
- "/usr/bin/crunch-run",
- "Crunch command to run container")
-
// Parse args; omit the first arg which is the command name
flags.Parse(os.Args[1:])
@@ -66,6 +67,10 @@ func doMain() error {
return err
}
+ if config.CrunchRunCommand == nil {
+ config.CrunchRunCommand = []string{"crunch-run"}
+ }
+
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
log.Printf("Error making Arvados client: %v", err)
@@ -115,7 +120,7 @@ var scancelCmd = scancelFunc
// Submit job to slurm using sbatch.
func submit(dispatcher *dispatch.Dispatcher,
- container arvados.Container, crunchRunCommand string) (submitErr error) {
+ container arvados.Container, crunchRunCommand []string) (submitErr error) {
defer func() {
// If we didn't get as far as submitting a slurm job,
// unlock the container and return it to the queue.
@@ -178,7 +183,7 @@ func submit(dispatcher *dispatch.Dispatcher,
// Send a tiny script on stdin to execute the crunch-run command
// slurm actually enforces that this must be a #! script
- fmt.Fprintf(stdinWriter, "#!/bin/sh\nexec '%s' '%s'\n", crunchRunCommand, container.UUID)
+ io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID)))
stdinWriter.Close()
err = cmd.Wait()
@@ -215,7 +220,7 @@ func monitorSubmitOrCancel(dispatcher *dispatch.Dispatcher, container arvados.Co
log.Printf("About to submit queued container %v", container.UUID)
- if err := submit(dispatcher, container, *config.CrunchRunCommand); err != nil {
+ if err := submit(dispatcher, container, config.CrunchRunCommand); err != nil {
log.Printf("Error submitting container %s to slurm: %v",
container.UUID, err)
// maybe sbatch is broken, put it back to queued
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index a559298..6692f7f 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -143,8 +143,7 @@ func (s *TestSuite) integrationTest(c *C,
c.Check(err, IsNil)
c.Check(len(containers.Items), Equals, 1)
- echo := "echo"
- config.CrunchRunCommand = &echo
+ config.CrunchRunCommand = []string{"echo"}
doneProcessing := make(chan struct{})
dispatcher := dispatch.Dispatcher{
@@ -206,7 +205,7 @@ func testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubRespon
log.SetOutput(io.MultiWriter(buf, os.Stderr))
defer log.SetOutput(os.Stderr)
- config.CrunchRunCommand = &crunchCmd
+ config.CrunchRunCommand = []string{crunchCmd}
doneProcessing := make(chan struct{})
dispatcher := dispatch.Dispatcher{
diff --git a/services/crunch-dispatch-slurm/script.go b/services/crunch-dispatch-slurm/script.go
new file mode 100644
index 0000000..93ae6b5
--- /dev/null
+++ b/services/crunch-dispatch-slurm/script.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+ "strings"
+)
+
+func execScript(args []string) string {
+ s := "#!/bin/sh\nexec"
+ for _, w := range args {
+ s += ` '`
+ s += strings.Replace(w, `'`, `'\''`, -1)
+ s += `'`
+ }
+ return s + "\n"
+}
diff --git a/services/crunch-dispatch-slurm/script_test.go b/services/crunch-dispatch-slurm/script_test.go
new file mode 100644
index 0000000..3cb407d
--- /dev/null
+++ b/services/crunch-dispatch-slurm/script_test.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+ . "gopkg.in/check.v1"
+)
+
+var _ = Suite(&ScriptSuite{})
+
+type ScriptSuite struct{}
+
+func (s *ScriptSuite) TestExecScript(c *C) {
+ for _, test := range []struct {
+ args []string
+ script string
+ }{
+ {nil, `exec`},
+ {[]string{`foo`}, `exec 'foo'`},
+ {[]string{`foo`, `bar baz`}, `exec 'foo' 'bar baz'`},
+ {[]string{`foo"`, "'waz 'qux\n"}, `exec 'foo"' ''\''waz '\''qux` + "\n" + `'`},
+ } {
+ c.Logf("%+v -> %+v", test.args, test.script)
+ c.Check(execScript(test.args), Equals, "#!/bin/sh\n"+test.script+"\n")
+ }
+}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list