[ARVADOS] created: 2.1.0-1522-ga72955713

Git user git at public.arvados.org
Sat Oct 23 15:42:48 UTC 2021


        at  a729557139f4d2e618b56a505e37c8649f1d1c21 (commit)


commit a729557139f4d2e618b56a505e37c8649f1d1c21
Author: Ward Vandewege <ward at curii.com>
Date:   Sat Oct 23 11:42:14 2021 -0400

    18290: LSF: make the bsub arguments completely configurable.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 4e2a0e26d..8b51a85d9 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1021,14 +1021,23 @@ Clusters:
           AssignNodeHostname: "compute%<slot_number>d"
 
       LSF:
-        # Additional arguments to bsub when submitting Arvados
-        # containers as LSF jobs.
+        # Arguments to bsub when submitting Arvados containers as LSF jobs.
+        #
+        # Template variables starting with % will be substituted as follows:
+        #
+        # %U uuid
+        # %C number of cpus
+        # %M memory in MB
+        # %T tmp in MB
+        #
+        # Use %% to express a literal %. The %%J in the default will be changed
+        # to %J, which is interpreted by bsub itself.
         #
         # Note that the default arguments cause LSF to write two files
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
-        # from /tmp, or adjust these arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]
+        # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]"]
 
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 875939a3e..519d1a8e5 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -1027,14 +1027,23 @@ Clusters:
           AssignNodeHostname: "compute%<slot_number>d"
 
       LSF:
-        # Additional arguments to bsub when submitting Arvados
-        # containers as LSF jobs.
+        # Arguments to bsub when submitting Arvados containers as LSF jobs.
+        #
+        # Template variables starting with % will be substituted as follows:
+        #
+        # %U uuid
+        # %C number of cpus
+        # %M memory in MB
+        # %T tmp in MB
+        #
+        # Use %% to express a literal %. The %%J in the default will be changed
+        # to %J, which is interpreted by bsub itself.
         #
         # Note that the default arguments cause LSF to write two files
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
-        # from /tmp, or adjust these arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]
+        # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]"]
 
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index d3ba605ab..c83e04d63 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -270,28 +270,47 @@ func (disp *dispatcher) bkill(ctr arvados.Container) {
 }
 
 func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error) {
+	tmpArgs := []string{}
 	args := []string{"bsub"}
-	args = append(args, disp.Cluster.Containers.LSF.BsubArgumentsList...)
-	args = append(args, "-J", container.UUID)
-	args = append(args, disp.bsubConstraintArgs(container)...)
-	if u := disp.Cluster.Containers.LSF.BsubSudoUser; u != "" {
-		args = append([]string{"sudo", "-E", "-u", u}, args...)
-	}
-	return args, nil
-}
+	tmpArgs = append(tmpArgs, disp.Cluster.Containers.LSF.BsubArgumentsList...)
 
-func (disp *dispatcher) bsubConstraintArgs(container arvados.Container) []string {
-	// TODO: propagate container.SchedulingParameters.Partitions
 	tmp := int64(math.Ceil(float64(dispatchcloud.EstimateScratchSpace(&container)) / 1048576))
 	vcpus := container.RuntimeConstraints.VCPUs
 	mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+
 		container.RuntimeConstraints.KeepCacheRAM+
 		int64(disp.Cluster.Containers.ReserveExtraRAM)) / 1048576))
-	return []string{
-		"-n", fmt.Sprintf("%d", vcpus),
-		"-D", fmt.Sprintf("%dMB", mem), // ulimit -d (note this doesn't limit the total container memory usage)
-		"-R", fmt.Sprintf("rusage[mem=%dMB:tmp=%dMB] span[hosts=1]", mem, tmp),
+
+	r := regexp.MustCompile(`([^%]|^)%([^%])`)
+	for _, a := range tmpArgs {
+		args = append(args, r.ReplaceAllStringFunc(a, func(m string) string {
+			parts := r.FindStringSubmatch(m)
+			return parts[1] + disp.substitute(parts[2], container.UUID, vcpus, mem, tmp)
+		}))
+	}
+
+	if u := disp.Cluster.Containers.LSF.BsubSudoUser; u != "" {
+		args = append([]string{"sudo", "-E", "-u", u}, args...)
+	}
+	return args, nil
+}
+
+func (disp *dispatcher) substitute(l string, uuid string, vcpus int, tmp, mem int64) string {
+	var arg string
+	switch l {
+	case "C":
+		arg = fmt.Sprintf("%d", vcpus)
+	case "T":
+		arg = fmt.Sprintf("%d", tmp)
+	case "M":
+		arg = fmt.Sprintf("%d", mem)
+	case "U":
+		arg = uuid
+	default:
+		arg = "%" + l
 	}
+	undoubleRE := regexp.MustCompile(`%%`)
+	arg = undoubleRE.ReplaceAllString(arg, "%")
+	return arg
 }
 
 // Check the next bjobs report, and invoke TrackContainer for all the

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list