[ARVADOS] created: 2.1.0-447-g8685bdc41
Git user
git at public.arvados.org
Mon Feb 15 16:08:11 UTC 2021
at 8685bdc41012f1623cc02b573e27439fdf314799 (commit)
commit 8685bdc41012f1623cc02b573e27439fdf314799
Author: Tom Clegg <tom at curii.com>
Date: Mon Feb 15 11:06:35 2021 -0500
17384: Respect CrunchRunCommand and CrunchRunArgumentsList in a-d-c.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 68e518732..c644de374 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -831,7 +831,11 @@ Clusters:
# stale locks from a previous dispatch process.
StaleLockTimeout: 1m
- # The crunch-run command to manage the container on a node
+ # The crunch-run command used to start a container on a worker node.
+ #
+ # When dispatching to cloud VMs, this is used only if
+ # DeployRunnerBinary in the CloudVMs section is set to the empty
+ # string.
CrunchRunCommand: "crunch-run"
# Extra arguments to add to crunch-run invocation
@@ -1052,7 +1056,7 @@ Clusters:
#
# Use the empty string to disable this step: nothing will be
# copied, and cloud instances are assumed to have a suitable
- # version of crunch-run installed.
+ # version of crunch-run installed; see CrunchRunCommand above.
DeployRunnerBinary: "/proc/self/exe"
# Tags to add on all resources (VMs, NICs, disks) created by
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 8ef787771..8354102c2 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -837,7 +837,11 @@ Clusters:
# stale locks from a previous dispatch process.
StaleLockTimeout: 1m
- # The crunch-run command to manage the container on a node
+ # The crunch-run command used to start a container on a worker node.
+ #
+ # When dispatching to cloud VMs, this is used only if
+ # DeployRunnerBinary in the CloudVMs section is set to the empty
+ # string.
CrunchRunCommand: "crunch-run"
# Extra arguments to add to crunch-run invocation
@@ -1058,7 +1062,7 @@ Clusters:
#
# Use the empty string to disable this step: nothing will be
# copied, and cloud instances are assumed to have a suitable
- # version of crunch-run installed.
+ # version of crunch-run installed; see CrunchRunCommand above.
DeployRunnerBinary: "/proc/self/exe"
# Tags to add on all resources (VMs, NICs, disks) created by
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index d5d90bf35..8752ee054 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -52,8 +52,10 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
s.cluster = &arvados.Cluster{
ManagementToken: "test-management-token",
Containers: arvados.ContainersConfig{
- DispatchPrivateKey: string(dispatchprivraw),
- StaleLockTimeout: arvados.Duration(5 * time.Millisecond),
+ CrunchRunCommand: "crunch-run",
+ CrunchRunArgumentsList: []string{"--foo", "--extra='args'"},
+ DispatchPrivateKey: string(dispatchprivraw),
+ StaleLockTimeout: arvados.Duration(5 * time.Millisecond),
CloudVMs: arvados.CloudVMsConfig{
Driver: "test",
SyncInterval: arvados.Duration(10 * time.Millisecond),
@@ -161,6 +163,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
stubvm.ExecuteContainer = executeContainer
stubvm.CrashRunningContainer = finishContainer
+ stubvm.ExtraCrunchRunArgs = "'--foo' '--extra='\\''args'\\'''"
switch n % 7 {
case 0:
stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 4d32cf221..1b31a71a2 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -193,6 +193,7 @@ type StubVM struct {
ArvMountDeadlockRate float64
ExecuteContainer func(arvados.Container) int
CrashRunningContainer func(arvados.Container)
+ ExtraCrunchRunArgs string // extra args expected after "crunch-run --detach --stdin-env "
sis *StubInstanceSet
id cloud.InstanceID
@@ -251,7 +252,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
fmt.Fprint(stderr, "crunch-run: command not found\n")
return 1
}
- if strings.HasPrefix(command, "crunch-run --detach --stdin-env ") {
+ if strings.HasPrefix(command, "crunch-run --detach --stdin-env "+svm.ExtraCrunchRunArgs) {
var stdinKV map[string]string
err := json.Unmarshal(stdinData, &stdinKV)
if err != nil {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 6a74280ca..7289179fd 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -121,6 +121,8 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
systemRootToken: cluster.SystemRootToken,
installPublicKey: installPublicKey,
tagKeyPrefix: cluster.Containers.CloudVMs.TagKeyPrefix,
+ runnerCmdDefault: cluster.Containers.CrunchRunCommand,
+ runnerArgs: cluster.Containers.CrunchRunArgumentsList,
stop: make(chan bool),
}
wp.registerMetrics(reg)
@@ -160,6 +162,8 @@ type Pool struct {
systemRootToken string
installPublicKey ssh.PublicKey
tagKeyPrefix string
+ runnerCmdDefault string // crunch-run command to use if not deploying a binary
+ runnerArgs []string // extra args passed to crunch-run
// private state
subscribers map[<-chan struct{}]chan<- struct{}
@@ -881,7 +885,7 @@ func (wp *Pool) loadRunnerData() error {
if wp.runnerData != nil {
return nil
} else if wp.runnerSource == "" {
- wp.runnerCmd = "crunch-run"
+ wp.runnerCmd = wp.runnerCmdDefault
wp.runnerData = []byte{}
return nil
}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index a85f7383a..0f5c5ee19 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -72,8 +72,8 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
newExecutor := func(cloud.Instance) Executor {
return &stubExecutor{
response: map[string]stubResp{
- "crunch-run --list": {},
- "true": {},
+ "crunch-run-custom --list": {},
+ "true": {},
},
}
}
@@ -87,6 +87,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
SyncInterval: arvados.Duration(time.Millisecond * 10),
TagKeyPrefix: "testprefix:",
},
+ CrunchRunCommand: "crunch-run-custom",
},
InstanceTypes: arvados.InstanceTypeMap{
type1.Name: type1,
diff --git a/lib/dispatchcloud/worker/runner.go b/lib/dispatchcloud/worker/runner.go
index 0fd99aeee..63561874c 100644
--- a/lib/dispatchcloud/worker/runner.go
+++ b/lib/dispatchcloud/worker/runner.go
@@ -9,6 +9,7 @@ import (
"encoding/json"
"fmt"
"net"
+ "strings"
"syscall"
"time"
@@ -22,6 +23,7 @@ type remoteRunner struct {
executor Executor
envJSON json.RawMessage
runnerCmd string
+ runnerArgs []string
remoteUser string
timeoutTERM time.Duration
timeoutSignal time.Duration
@@ -64,6 +66,7 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
executor: wkr.executor,
envJSON: envJSON,
runnerCmd: wkr.wp.runnerCmd,
+ runnerArgs: wkr.wp.runnerArgs,
remoteUser: wkr.instance.RemoteUser(),
timeoutTERM: wkr.wp.timeoutTERM,
timeoutSignal: wkr.wp.timeoutSignal,
@@ -81,7 +84,11 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
// assume the remote process _might_ have started, at least until it
// probes the worker and finds otherwise.
func (rr *remoteRunner) Start() {
- cmd := rr.runnerCmd + " --detach --stdin-env '" + rr.uuid + "'"
+ cmd := rr.runnerCmd + " --detach --stdin-env"
+ for _, arg := range rr.runnerArgs {
+ cmd += " '" + strings.Replace(arg, "'", "'\\''", -1) + "'"
+ }
+ cmd += " '" + rr.uuid + "'"
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}
diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go
index cfb7a1bfb..4134788b2 100644
--- a/lib/dispatchcloud/worker/worker_test.go
+++ b/lib/dispatchcloud/worker/worker_test.go
@@ -236,6 +236,8 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
timeoutBooting: bootTimeout,
timeoutProbe: probeTimeout,
exited: map[string]time.Time{},
+ runnerCmdDefault: "crunch-run",
+ runnerArgs: []string{"--args=not used with --list"},
runnerCmd: "crunch-run",
runnerData: trial.deployRunner,
runnerMD5: md5.Sum(trial.deployRunner),
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list