[ARVADOS] updated: 2.1.0-1764-g565612fd4
Git user
git at public.arvados.org
Wed Jan 5 20:53:51 UTC 2022
Summary of changes:
lib/config/config.default.yml | 6 ++++++
lib/config/generated_config.go | 6 ++++++
lib/lsf/dispatch.go | 6 +++++-
lib/lsf/dispatch_test.go | 48 +++++++++++++++++++++++++++++++++++++++---
sdk/go/arvados/config.go | 1 +
5 files changed, 63 insertions(+), 4 deletions(-)
via 565612fd40474044e2afaa4fcb993c8c0197ca8e (commit)
via 3c034a63249b94b07449407ad5c4f4115a1ef974 (commit)
from 2c39f766745e853ae216d5489236a98a766f46b9 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 565612fd40474044e2afaa4fcb993c8c0197ca8e
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Wed Jan 5 15:53:17 2022 -0500
18324: Incorporate BsubCUDAArguments. update test
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index f34f840e5..c9ed5582b 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -311,7 +311,11 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
re := regexp.MustCompile(`%.`)
var substitutionErrors string
- for _, a := range disp.Cluster.Containers.LSF.BsubArgumentsList {
+ argumentTemplate := disp.Cluster.Containers.LSF.BsubArgumentsList
+ if container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+ argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...)
+ }
+ for _, a := range argumentTemplate {
args = append(args, re.ReplaceAllStringFunc(a, func(s string) string {
subst := repl[s]
if len(subst) == 0 {
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go
index c044df09f..c678a9a48 100644
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -30,8 +30,9 @@ func Test(t *testing.T) {
var _ = check.Suite(&suite{})
type suite struct {
- disp *dispatcher
- crTooBig arvados.ContainerRequest
+ disp *dispatcher
+ crTooBig arvados.ContainerRequest
+ crCUDARequest arvados.ContainerRequest
}
func (s *suite) TearDownTest(c *check.C) {
@@ -64,6 +65,29 @@ func (s *suite) SetUpTest(c *check.C) {
},
})
c.Assert(err, check.IsNil)
+
+ err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+ "container_request": map[string]interface{}{
+ "runtime_constraints": arvados.RuntimeConstraints{
+ RAM: 16000000,
+ VCPUs: 1,
+ CUDA: arvados.CUDARuntimeConstraints{
+ DeviceCount: 1,
+ DriverVersion: "11.0",
+ HardwareCapability: "8.0",
+ },
+ },
+ "container_image": arvadostest.DockerImage112PDH,
+ "command": []string{"sleep", "1"},
+ "mounts": map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+ "output_path": "/mnt/out",
+ "state": arvados.ContainerRequestStateCommitted,
+ "priority": 1,
+ "container_count_max": 1,
+ },
+ })
+ c.Assert(err, check.IsNil)
+
}
type lsfstub struct {
@@ -90,7 +114,11 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
switch prog {
case "bsub":
defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
- c.Assert(len(args), check.Equals, len(defaultArgs))
+ if args[5] == s.crCUDARequest.ContainerUUID {
+ c.Assert(len(args), check.Equals, len(defaultArgs)+len(s.disp.Cluster.Containers.LSF.BsubCUDAArguments))
+ } else {
+ c.Assert(len(args), check.Equals, len(defaultArgs))
+ }
// %%J must have been rewritten to %J
c.Check(args[1], check.Equals, "/tmp/crunch-run.%J.out")
args = args[4:]
@@ -134,6 +162,20 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
fakejobq[nextjobid] = args[1]
nextjobid++
mtx.Unlock()
+ case s.crCUDARequest.ContainerUUID:
+ c.Check(args, check.DeepEquals, []string{
+ "-J", s.crCUDARequest.ContainerUUID,
+ "-n", "1",
+ "-D", "528MB",
+ "-R", "rusage[mem=528MB:tmp=256MB] span[hosts=1]",
+ "-R", "select[mem>=528MB]",
+ "-R", "select[tmp>=256MB]",
+ "-R", "select[ncpus>=1]",
+ "-gpu", "num=1"})
+ mtx.Lock()
+ fakejobq[nextjobid] = args[1]
+ nextjobid++
+ mtx.Unlock()
default:
c.Errorf("unexpected uuid passed to bsub: args %q", args)
return exec.Command("false")
commit 3c034a63249b94b07449407ad5c4f4115a1ef974
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Jan 3 17:05:10 2022 -0500
18324: Adding BsubCUDAArguments WIP
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index fe9645982..002acd03a 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1089,6 +1089,7 @@ Clusters:
# %C number of VCPUs
# %M memory in MB
# %T tmp in MB
+ # %G number of GPU devices (runtime_constraints.cuda.device_count)
#
# Use %% to express a literal %. The %%J in the default will be changed
# to %J, which is interpreted by bsub itself.
@@ -1099,6 +1100,11 @@ Clusters:
# from /tmp, or adjust the "-o" and "-e" arguments accordingly.
BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
+ # Arguments that will be appended to the bsub command line
+ # when submitting Arvados containers as LSF jobs with
+ # runtime_constraints.cuda.device_count > 0
+ BsubCUDAArguments: ["-gpu", "num=%G"]
+
# Use sudo to switch to this user account when submitting LSF
# jobs.
#
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index f19699347..9983a33b5 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -1095,6 +1095,7 @@ Clusters:
# %C number of VCPUs
# %M memory in MB
# %T tmp in MB
+ # %G number of GPU devices (runtime_constraints.cuda.device_count)
#
# Use %% to express a literal %. The %%J in the default will be changed
# to %J, which is interpreted by bsub itself.
@@ -1105,6 +1106,11 @@ Clusters:
# from /tmp, or adjust the "-o" and "-e" arguments accordingly.
BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
+ # Arguments that will be appended to the bsub command line
+ # when submitting Arvados containers as LSF jobs with
+ # runtime_constraints.cuda.device_count > 0
+ BsubCUDAArguments: ["-gpu", "num=%G"]
+
# Use sudo to switch to this user account when submitting LSF
# jobs.
#
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index b11189306..e367c9202 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -486,6 +486,7 @@ type ContainersConfig struct {
LSF struct {
BsubSudoUser string
BsubArgumentsList []string
+ BsubCUDAArguments []string
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list