[arvados] created: 2.7.0-6181-g6c2315672c

Wed Mar 13 21:37:40 UTC 2024

at  6c2315672cb6c951190d69403a17acf2ab88bd8f (commit)


commit 6c2315672cb6c951190d69403a17acf2ab88bd8f
Author: Tom Clegg <tom at curii.com>
Date:   Wed Mar 13 17:22:39 2024 -0400

    21449: Propagate max_run_time to LSF scheduler.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
index fc4393d0b6..6aeb11040c 100644
--- a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
@@ -75,6 +75,7 @@ Template variables starting with % will be substituted as follows:
 %M memory in MB
 %T tmp in MB
 %G number of GPU devices (@runtime_constraints.cuda.device_count@)
+%W maximum job run time in minutes, suitable for use with @-W@ or @-We@ flags (see MaxRunTimeOverhead MaxRunTimeDefault below)
 
 Use %% to express a literal %. The %%J in the default will be changed to %J, which is interpreted by @bsub@ itself.
 
@@ -83,7 +84,7 @@ For example:
 <notextile>
 <pre>    Containers:
       LSF:
-        <code class="userinput">BsubArgumentsList: <b>["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]</b></code>
+        <code class="userinput">BsubArgumentsList: <b>["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]</b></code>
 </pre>
 </notextile>
 
@@ -100,6 +101,14 @@ If the container requests access to GPUs (@runtime_constraints.cuda.device_count
 </pre>
 </notextile>
 
+h3(#MaxRunTimeOverhead). Containers.LSF.MaxRunTimeOverhead
+
+Extra time to add to each container's @scheduling_parameters.max_run_time@ value when substituting for @%W@ in @BsubArgumentsList@, to account for time spent setting up the container image, copying output files, etc.
+
+h3(#MaxRunTimeDefault). Containers.LSF.MaxRunTimeDefault
+
+Default @max_run_time@ value to use for containers that do not specify one in @scheduling_parameters.max_run_time at . If this is zero, and @BsubArgumentsList@ contains @"-W", "%W"@ or @"-We", "%W"@, those arguments will be dropped when submitting containers that do not specify @scheduling_parameters.max_run_time at .
+
 h3(#PollInterval). Containers.PollInterval
 
 arvados-dispatch-lsf polls the API server periodically for new containers to run.  The @PollInterval@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h at .  For example:
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index e3b67f7259..70ecd10681 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1379,15 +1379,23 @@ Clusters:
         # %M memory in MB
         # %T tmp in MB
         # %G number of GPU devices (runtime_constraints.cuda.device_count)
+        # %W maximum run time in minutes (see MaxRunTimeOverhead and
+        #    MaxRunTimeDefault below)
         #
-        # Use %% to express a literal %. The %%J in the default will be changed
-        # to %J, which is interpreted by bsub itself.
+        # Use %% to express a literal %. For example, the %%J in the
+        # default argument list will be changed to %J, which is
+        # interpreted by bsub itself.
         #
         # Note that the default arguments cause LSF to write two files
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
+        #
+        # If ["-We", "%W"] or ["-W", "%W"] appear in this argument
+        # list, and MaxRunTimeDefault is not set (see below), both of
+        # those arguments will be dropped from the argument list when
+        # running a container that has no max_run_time value.
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]
 
         # Arguments that will be appended to the bsub command line
         # when submitting Arvados containers as LSF jobs with
@@ -1402,6 +1410,19 @@ Clusters:
         # Arvados LSF dispatcher runs ("submission host").
         BsubSudoUser: "crunch"
 
+        # When passing the scheduling_constraints.max_run_time value
+        # to LSF via "%W", add this much time to account for
+        # crunch-run startup/shutdown overhead.
+        MaxRunTimeOverhead: 5m
+
+        # If non-zero, MaxRunTimeDefault is used as the default value
+        # for max_run_time for containers that do not specify a time
+        # limit.  MaxRunTimeOverhead will be added to this.
+        #
+        # Example:
+        # MaxRunTimeDefault: 2h
+        MaxRunTimeDefault: 0
+
       JobsAPI:
         # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
         #
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index d1408d23cb..897e5803f2 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -306,6 +306,15 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
 		container.RuntimeConstraints.KeepCacheRAM+
 		int64(disp.Cluster.Containers.ReserveExtraRAM)) / 1048576))
 
+	maxruntime := time.Duration(container.SchedulingParameters.MaxRunTime) * time.Second
+	if maxruntime == 0 {
+		maxruntime = disp.Cluster.Containers.LSF.MaxRunTimeDefault.Duration()
+	}
+	if maxruntime > 0 {
+		maxruntime += disp.Cluster.Containers.LSF.MaxRunTimeOverhead.Duration()
+	}
+	maxrunminutes := int64(math.Ceil(float64(maxruntime.Seconds()) / 60))
+
 	repl := map[string]string{
 		"%%": "%",
 		"%C": fmt.Sprintf("%d", vcpus),
@@ -313,6 +322,7 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
 		"%T": fmt.Sprintf("%d", tmp),
 		"%U": container.UUID,
 		"%G": fmt.Sprintf("%d", container.RuntimeConstraints.CUDA.DeviceCount),
+		"%W": fmt.Sprintf("%d", maxrunminutes),
 	}
 
 	re := regexp.MustCompile(`%.`)
@@ -321,7 +331,16 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
 	if container.RuntimeConstraints.CUDA.DeviceCount > 0 {
 		argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...)
 	}
-	for _, a := range argumentTemplate {
+	for idx, a := range argumentTemplate {
+		if idx > 0 && (argumentTemplate[idx-1] == "-W" || argumentTemplate[idx-1] == "-We") && a == "%W" && maxrunminutes == 0 {
+			// LSF docs don't specify an argument to "-W"
+			// or "-We" that indicates "unknown", so
+			// instead we drop the "-W %W" part of the
+			// command line entirely when max runtime is
+			// unknown.
+			args = args[:len(args)-1]
+			continue
+		}
 		args = append(args, re.ReplaceAllStringFunc(a, func(s string) string {
 			subst := repl[s]
 			if len(subst) == 0 {
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go
index cd41071d2c..e1e0bcae31 100644
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -34,6 +34,7 @@ type suite struct {
 	crTooBig      arvados.ContainerRequest
 	crPending     arvados.ContainerRequest
 	crCUDARequest arvados.ContainerRequest
+	crMaxRunTime  arvados.ContainerRequest
 }
 
 func (s *suite) TearDownTest(c *check.C) {
@@ -116,6 +117,25 @@ func (s *suite) SetUpTest(c *check.C) {
 	})
 	c.Assert(err, check.IsNil)
 
+	err = arvados.NewClientFromEnv().RequestAndDecode(&s.crMaxRunTime, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+		"container_request": map[string]interface{}{
+			"runtime_constraints": arvados.RuntimeConstraints{
+				RAM:   1000000,
+				VCPUs: 1,
+			},
+			"scheduling_parameters": arvados.SchedulingParameters{
+				MaxRunTime: 124,
+			},
+			"container_image":     arvadostest.DockerImage112PDH,
+			"command":             []string{"sleep", "123"},
+			"mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+			"output_path":         "/mnt/out",
+			"state":               arvados.ContainerRequestStateCommitted,
+			"priority":            1,
+			"container_count_max": 1,
+		},
+	})
+	c.Assert(err, check.IsNil)
 }
 
 type lsfstub struct {
@@ -141,12 +161,7 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 		}
 		switch prog {
 		case "bsub":
-			defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
-			if args[5] == s.crCUDARequest.ContainerUUID {
-				c.Assert(len(args), check.Equals, len(defaultArgs)+len(s.disp.Cluster.Containers.LSF.BsubCUDAArguments))
-			} else {
-				c.Assert(len(args), check.Equals, len(defaultArgs))
-			}
+			c.Assert(len(args) > 5, check.Equals, true)
 			// %%J must have been rewritten to %J
 			c.Check(args[1], check.Equals, "/tmp/crunch-run.%J.out")
 			args = args[4:]
@@ -204,6 +219,21 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
 				mtx.Unlock()
+			case s.crMaxRunTime.ContainerUUID:
+				c.Check(args, check.DeepEquals, []string{
+					"-J", s.crMaxRunTime.ContainerUUID,
+					"-n", "1",
+					"-D", "257MB",
+					"-R", "rusage[mem=257MB:tmp=2304MB] span[hosts=1]",
+					"-R", "select[mem>=257MB]",
+					"-R", "select[tmp>=2304MB]",
+					"-R", "select[ncpus>=1]",
+					"-We", "8", // 124s + 5m overhead + roundup = 8m
+				})
+				mtx.Lock()
+				fakejobq[nextjobid] = args[1]
+				nextjobid++
+				mtx.Unlock()
 			default:
 				c.Errorf("unexpected uuid passed to bsub: args %q", args)
 				return exec.Command("false")
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 9b2cf155b0..698ee20d8c 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -545,9 +545,11 @@ type ContainersConfig struct {
 		}
 	}
 	LSF struct {
-		BsubSudoUser      string
-		BsubArgumentsList []string
-		BsubCUDAArguments []string
+		BsubSudoUser       string
+		BsubArgumentsList  []string
+		BsubCUDAArguments  []string
+		MaxRunTimeOverhead Duration
+		MaxRunTimeDefault  Duration
 	}
 }
 
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 0a8e33d05f..f5789f31f6 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -470,8 +470,9 @@ class ContainerRequest < ArvadosModel
 
   def validate_scheduling_parameters
     if self.state == Committed
-      if scheduling_parameters.include? 'partitions' and
-         (!scheduling_parameters['partitions'].is_a?(Array) ||
+      if scheduling_parameters.include?('partitions') and
+        !scheduling_parameters['partitions'].nil? and
+        (!scheduling_parameters['partitions'].is_a?(Array) ||
           scheduling_parameters['partitions'].reject{|x| !x.is_a?(String)}.size !=
             scheduling_parameters['partitions'].size)
             errors.add :scheduling_parameters, "partitions must be an array of strings"

-----------------------------------------------------------------------


hooks/post-receive
--