[ARVADOS] updated: 2.1.0-1764-g565612fd4

Git user git at public.arvados.org
Wed Jan 5 20:53:51 UTC 2022


Summary of changes:
 lib/config/config.default.yml  |  6 ++++++
 lib/config/generated_config.go |  6 ++++++
 lib/lsf/dispatch.go            |  6 +++++-
 lib/lsf/dispatch_test.go       | 48 +++++++++++++++++++++++++++++++++++++++---
 sdk/go/arvados/config.go       |  1 +
 5 files changed, 63 insertions(+), 4 deletions(-)

       via  565612fd40474044e2afaa4fcb993c8c0197ca8e (commit)
       via  3c034a63249b94b07449407ad5c4f4115a1ef974 (commit)
      from  2c39f766745e853ae216d5489236a98a766f46b9 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 565612fd40474044e2afaa4fcb993c8c0197ca8e
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Jan 5 15:53:17 2022 -0500

    18324: Incorporate BsubCUDAArguments.  update test
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index f34f840e5..c9ed5582b 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -311,7 +311,11 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
 
 	re := regexp.MustCompile(`%.`)
 	var substitutionErrors string
-	for _, a := range disp.Cluster.Containers.LSF.BsubArgumentsList {
+	argumentTemplate := disp.Cluster.Containers.LSF.BsubArgumentsList
+	if container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+		argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...)
+	}
+	for _, a := range argumentTemplate {
 		args = append(args, re.ReplaceAllStringFunc(a, func(s string) string {
 			subst := repl[s]
 			if len(subst) == 0 {
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go
index c044df09f..c678a9a48 100644
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -30,8 +30,9 @@ func Test(t *testing.T) {
 var _ = check.Suite(&suite{})
 
 type suite struct {
-	disp     *dispatcher
-	crTooBig arvados.ContainerRequest
+	disp          *dispatcher
+	crTooBig      arvados.ContainerRequest
+	crCUDARequest arvados.ContainerRequest
 }
 
 func (s *suite) TearDownTest(c *check.C) {
@@ -64,6 +65,29 @@ func (s *suite) SetUpTest(c *check.C) {
 		},
 	})
 	c.Assert(err, check.IsNil)
+
+	err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+		"container_request": map[string]interface{}{
+			"runtime_constraints": arvados.RuntimeConstraints{
+				RAM:   16000000,
+				VCPUs: 1,
+				CUDA: arvados.CUDARuntimeConstraints{
+					DeviceCount:        1,
+					DriverVersion:      "11.0",
+					HardwareCapability: "8.0",
+				},
+			},
+			"container_image":     arvadostest.DockerImage112PDH,
+			"command":             []string{"sleep", "1"},
+			"mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+			"output_path":         "/mnt/out",
+			"state":               arvados.ContainerRequestStateCommitted,
+			"priority":            1,
+			"container_count_max": 1,
+		},
+	})
+	c.Assert(err, check.IsNil)
+
 }
 
 type lsfstub struct {
@@ -90,7 +114,11 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 		switch prog {
 		case "bsub":
 			defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
-			c.Assert(len(args), check.Equals, len(defaultArgs))
+			if args[5] == s.crCUDARequest.ContainerUUID {
+				c.Assert(len(args), check.Equals, len(defaultArgs)+len(s.disp.Cluster.Containers.LSF.BsubCUDAArguments))
+			} else {
+				c.Assert(len(args), check.Equals, len(defaultArgs))
+			}
 			// %%J must have been rewritten to %J
 			c.Check(args[1], check.Equals, "/tmp/crunch-run.%J.out")
 			args = args[4:]
@@ -134,6 +162,20 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
 				mtx.Unlock()
+			case s.crCUDARequest.ContainerUUID:
+				c.Check(args, check.DeepEquals, []string{
+					"-J", s.crCUDARequest.ContainerUUID,
+					"-n", "1",
+					"-D", "528MB",
+					"-R", "rusage[mem=528MB:tmp=256MB] span[hosts=1]",
+					"-R", "select[mem>=528MB]",
+					"-R", "select[tmp>=256MB]",
+					"-R", "select[ncpus>=1]",
+					"-gpu", "num=1"})
+				mtx.Lock()
+				fakejobq[nextjobid] = args[1]
+				nextjobid++
+				mtx.Unlock()
 			default:
 				c.Errorf("unexpected uuid passed to bsub: args %q", args)
 				return exec.Command("false")

commit 3c034a63249b94b07449407ad5c4f4115a1ef974
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Jan 3 17:05:10 2022 -0500

    18324: Adding BsubCUDAArguments WIP
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index fe9645982..002acd03a 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1089,6 +1089,7 @@ Clusters:
         # %C number of VCPUs
         # %M memory in MB
         # %T tmp in MB
+        # %G number of GPU devices (runtime_constraints.cuda.device_count)
         #
         # Use %% to express a literal %. The %%J in the default will be changed
         # to %J, which is interpreted by bsub itself.
@@ -1099,6 +1100,11 @@ Clusters:
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
         BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
 
+        # Arguments that will be appended to the bsub command line
+        # when submitting Arvados containers as LSF jobs with
+        # runtime_constraints.cuda.device_count > 0
+        BsubCUDAArguments: ["-gpu", "num=%G"]
+
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
         #
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index f19699347..9983a33b5 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -1095,6 +1095,7 @@ Clusters:
         # %C number of VCPUs
         # %M memory in MB
         # %T tmp in MB
+        # %G number of GPU devices (runtime_constraints.cuda.device_count)
         #
         # Use %% to express a literal %. The %%J in the default will be changed
         # to %J, which is interpreted by bsub itself.
@@ -1105,6 +1106,11 @@ Clusters:
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
         BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
 
+        # Arguments that will be appended to the bsub command line
+        # when submitting Arvados containers as LSF jobs with
+        # runtime_constraints.cuda.device_count > 0
+        BsubCUDAArguments: ["-gpu", "num=%G"]
+
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
         #
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index b11189306..e367c9202 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -486,6 +486,7 @@ type ContainersConfig struct {
 	LSF struct {
 		BsubSudoUser      string
 		BsubArgumentsList []string
+		BsubCUDAArguments []string
 	}
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list