[ARVADOS] created: 2.1.0-447-g8685bdc41

Git user git at public.arvados.org
Mon Feb 15 16:08:11 UTC 2021


        at  8685bdc41012f1623cc02b573e27439fdf314799 (commit)


commit 8685bdc41012f1623cc02b573e27439fdf314799
Author: Tom Clegg <tom at curii.com>
Date:   Mon Feb 15 11:06:35 2021 -0500

    17384: Respect CrunchRunCommand and CrunchRunArgumentsList in a-d-c.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 68e518732..c644de374 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -831,7 +831,11 @@ Clusters:
       # stale locks from a previous dispatch process.
       StaleLockTimeout: 1m
 
-      # The crunch-run command to manage the container on a node
+      # The crunch-run command used to start a container on a worker node.
+      #
+      # When dispatching to cloud VMs, this is used only if
+      # DeployRunnerBinary in the CloudVMs section is set to the empty
+      # string.
       CrunchRunCommand: "crunch-run"
 
       # Extra arguments to add to crunch-run invocation
@@ -1052,7 +1056,7 @@ Clusters:
         #
         # Use the empty string to disable this step: nothing will be
         # copied, and cloud instances are assumed to have a suitable
-        # version of crunch-run installed.
+        # version of crunch-run installed; see CrunchRunCommand above.
         DeployRunnerBinary: "/proc/self/exe"
 
         # Tags to add on all resources (VMs, NICs, disks) created by
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 8ef787771..8354102c2 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -837,7 +837,11 @@ Clusters:
       # stale locks from a previous dispatch process.
       StaleLockTimeout: 1m
 
-      # The crunch-run command to manage the container on a node
+      # The crunch-run command used to start a container on a worker node.
+      #
+      # When dispatching to cloud VMs, this is used only if
+      # DeployRunnerBinary in the CloudVMs section is set to the empty
+      # string.
       CrunchRunCommand: "crunch-run"
 
       # Extra arguments to add to crunch-run invocation
@@ -1058,7 +1062,7 @@ Clusters:
         #
         # Use the empty string to disable this step: nothing will be
         # copied, and cloud instances are assumed to have a suitable
-        # version of crunch-run installed.
+        # version of crunch-run installed; see CrunchRunCommand above.
         DeployRunnerBinary: "/proc/self/exe"
 
         # Tags to add on all resources (VMs, NICs, disks) created by
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index d5d90bf35..8752ee054 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -52,8 +52,10 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
 	s.cluster = &arvados.Cluster{
 		ManagementToken: "test-management-token",
 		Containers: arvados.ContainersConfig{
-			DispatchPrivateKey: string(dispatchprivraw),
-			StaleLockTimeout:   arvados.Duration(5 * time.Millisecond),
+			CrunchRunCommand:       "crunch-run",
+			CrunchRunArgumentsList: []string{"--foo", "--extra='args'"},
+			DispatchPrivateKey:     string(dispatchprivraw),
+			StaleLockTimeout:       arvados.Duration(5 * time.Millisecond),
 			CloudVMs: arvados.CloudVMsConfig{
 				Driver:               "test",
 				SyncInterval:         arvados.Duration(10 * time.Millisecond),
@@ -161,6 +163,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
 		stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
 		stubvm.ExecuteContainer = executeContainer
 		stubvm.CrashRunningContainer = finishContainer
+		stubvm.ExtraCrunchRunArgs = "'--foo' '--extra='\\''args'\\'''"
 		switch n % 7 {
 		case 0:
 			stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 4d32cf221..1b31a71a2 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -193,6 +193,7 @@ type StubVM struct {
 	ArvMountDeadlockRate  float64
 	ExecuteContainer      func(arvados.Container) int
 	CrashRunningContainer func(arvados.Container)
+	ExtraCrunchRunArgs    string // extra args expected after "crunch-run --detach --stdin-env "
 
 	sis          *StubInstanceSet
 	id           cloud.InstanceID
@@ -251,7 +252,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
 		fmt.Fprint(stderr, "crunch-run: command not found\n")
 		return 1
 	}
-	if strings.HasPrefix(command, "crunch-run --detach --stdin-env ") {
+	if strings.HasPrefix(command, "crunch-run --detach --stdin-env "+svm.ExtraCrunchRunArgs) {
 		var stdinKV map[string]string
 		err := json.Unmarshal(stdinData, &stdinKV)
 		if err != nil {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 6a74280ca..7289179fd 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -121,6 +121,8 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
 		systemRootToken:                cluster.SystemRootToken,
 		installPublicKey:               installPublicKey,
 		tagKeyPrefix:                   cluster.Containers.CloudVMs.TagKeyPrefix,
+		runnerCmdDefault:               cluster.Containers.CrunchRunCommand,
+		runnerArgs:                     cluster.Containers.CrunchRunArgumentsList,
 		stop:                           make(chan bool),
 	}
 	wp.registerMetrics(reg)
@@ -160,6 +162,8 @@ type Pool struct {
 	systemRootToken                string
 	installPublicKey               ssh.PublicKey
 	tagKeyPrefix                   string
+	runnerCmdDefault               string   // crunch-run command to use if not deploying a binary
+	runnerArgs                     []string // extra args passed to crunch-run
 
 	// private state
 	subscribers  map[<-chan struct{}]chan<- struct{}
@@ -881,7 +885,7 @@ func (wp *Pool) loadRunnerData() error {
 	if wp.runnerData != nil {
 		return nil
 	} else if wp.runnerSource == "" {
-		wp.runnerCmd = "crunch-run"
+		wp.runnerCmd = wp.runnerCmdDefault
 		wp.runnerData = []byte{}
 		return nil
 	}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index a85f7383a..0f5c5ee19 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -72,8 +72,8 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 	newExecutor := func(cloud.Instance) Executor {
 		return &stubExecutor{
 			response: map[string]stubResp{
-				"crunch-run --list": {},
-				"true":              {},
+				"crunch-run-custom --list": {},
+				"true":                     {},
 			},
 		}
 	}
@@ -87,6 +87,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 				SyncInterval:       arvados.Duration(time.Millisecond * 10),
 				TagKeyPrefix:       "testprefix:",
 			},
+			CrunchRunCommand: "crunch-run-custom",
 		},
 		InstanceTypes: arvados.InstanceTypeMap{
 			type1.Name: type1,
diff --git a/lib/dispatchcloud/worker/runner.go b/lib/dispatchcloud/worker/runner.go
index 0fd99aeee..63561874c 100644
--- a/lib/dispatchcloud/worker/runner.go
+++ b/lib/dispatchcloud/worker/runner.go
@@ -9,6 +9,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net"
+	"strings"
 	"syscall"
 	"time"
 
@@ -22,6 +23,7 @@ type remoteRunner struct {
 	executor      Executor
 	envJSON       json.RawMessage
 	runnerCmd     string
+	runnerArgs    []string
 	remoteUser    string
 	timeoutTERM   time.Duration
 	timeoutSignal time.Duration
@@ -64,6 +66,7 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
 		executor:      wkr.executor,
 		envJSON:       envJSON,
 		runnerCmd:     wkr.wp.runnerCmd,
+		runnerArgs:    wkr.wp.runnerArgs,
 		remoteUser:    wkr.instance.RemoteUser(),
 		timeoutTERM:   wkr.wp.timeoutTERM,
 		timeoutSignal: wkr.wp.timeoutSignal,
@@ -81,7 +84,11 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
 // assume the remote process _might_ have started, at least until it
 // probes the worker and finds otherwise.
 func (rr *remoteRunner) Start() {
-	cmd := rr.runnerCmd + " --detach --stdin-env '" + rr.uuid + "'"
+	cmd := rr.runnerCmd + " --detach --stdin-env"
+	for _, arg := range rr.runnerArgs {
+		cmd += " '" + strings.Replace(arg, "'", "'\\''", -1) + "'"
+	}
+	cmd += " '" + rr.uuid + "'"
 	if rr.remoteUser != "root" {
 		cmd = "sudo " + cmd
 	}
diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go
index cfb7a1bfb..4134788b2 100644
--- a/lib/dispatchcloud/worker/worker_test.go
+++ b/lib/dispatchcloud/worker/worker_test.go
@@ -236,6 +236,8 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
 			timeoutBooting:   bootTimeout,
 			timeoutProbe:     probeTimeout,
 			exited:           map[string]time.Time{},
+			runnerCmdDefault: "crunch-run",
+			runnerArgs:       []string{"--args=not used with --list"},
 			runnerCmd:        "crunch-run",
 			runnerData:       trial.deployRunner,
 			runnerMD5:        md5.Sum(trial.deployRunner),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list