[ARVADOS] created: 2.1.0-1959-g4834245e7
Git user
git at public.arvados.org
Mon Feb 21 22:27:03 UTC 2022
at 4834245e77cb316e488467c76984bdaff89569ae (commit)
commit 4834245e77cb316e488467c76984bdaff89569ae
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Feb 21 17:26:24 2022 -0500
18656: Support dynamically requesting GPUs with an expression
Requires cwltool update, pending.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index d5efa31a0..5e5d5aa3e 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -359,13 +359,24 @@ $graph:
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
- cudaComputeCapabilityMin:
- type: string
- doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
- deviceCountMin:
- type: int?
+ cudaComputeCapability:
+ type:
+ - 'string'
+ - 'string[]'
+ doc: |
+ CUDA hardware capability required to run the software, in X.Y
+ format.
+
+ * If this is a single value, it defines only the minimum
+ compute capability. GPUs with higher capability are also
+ accepted.
+
+ * If it is an array value, then only select GPUs with compute
+ capabilities that explicitly appear in the array.
+ cudaDeviceCountMin:
+ type: ['null', int, cwl:Expression]
default: 1
doc: Minimum number of GPU devices to request, default 1.
- deviceCountMax:
- type: int?
+ cudaDeviceCountMax:
+ type: ['null', int, cwl:Expression]
doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index 4a6b6947f..628a79dd7 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -302,13 +302,24 @@ $graph:
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
- cudaComputeCapabilityMin:
- type: string
- doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
- deviceCountMin:
- type: int?
+ cudaComputeCapability:
+ type:
+ - 'string'
+ - 'string[]'
+ doc: |
+ CUDA hardware capability required to run the software, in X.Y
+ format.
+
+ * If this is a single value, it defines only the minimum
+ compute capability. GPUs with higher capability are also
+ accepted.
+
+ * If it is an array value, then only select GPUs with compute
+ capabilities that explicitly appear in the array.
+ cudaDeviceCountMin:
+ type: ['null', int, cwl:Expression]
default: 1
doc: Minimum number of GPU devices to request, default 1.
- deviceCountMax:
- type: int?
+ cudaDeviceCountMax:
+ type: ['null', int, cwl:Expression]
doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index e95b6543f..9e6de063f 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -304,13 +304,24 @@ $graph:
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
- cudaComputeCapabilityMin:
- type: string
- doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
- deviceCountMin:
- type: int?
+ cudaComputeCapability:
+ type:
+ - 'string'
+ - 'string[]'
+ doc: |
+ CUDA hardware capability required to run the software, in X.Y
+ format.
+
+ * If this is a single value, it defines only the minimum
+ compute capability. GPUs with higher capability are also
+ accepted.
+
+ * If it is an array value, then only select GPUs with compute
+ capabilities that explicitly appear in the array.
+ cudaDeviceCountMin:
+ type: ['null', int, cwl:Expression]
default: 1
doc: Minimum number of GPU devices to request, default 1.
- deviceCountMax:
- type: int?
+ cudaDeviceCountMax:
+ type: ['null', int, cwl:Expression]
doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 753c2c250..4d0d65c4d 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -295,9 +295,9 @@ class ArvadosContainer(JobBase):
cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
if cuda_req:
runtime_constraints["cuda"] = {
- "device_count": cuda_req.get("deviceCountMin", 1),
+ "device_count": resources["cudaDeviceCount"],
"driver_version": cuda_req["cudaVersionMin"],
- "hardware_capability": cuda_req["cudaComputeCapabilityMin"]
+ "hardware_capability": cuda_req["cudaComputeCapability"]
}
if self.timelimit is not None and self.timelimit > 0:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list