[ARVADOS] created: 2.1.0-1959-g4834245e7

Mon Feb 21 22:27:03 UTC 2022

at  4834245e77cb316e488467c76984bdaff89569ae (commit)


commit 4834245e77cb316e488467c76984bdaff89569ae
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Feb 21 17:26:24 2022 -0500

    18656: Support dynamically requesting GPUs with an expression
    
    Requires cwltool update, pending.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index d5efa31a0..5e5d5aa3e 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -359,13 +359,24 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
       doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
       doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index 4a6b6947f..628a79dd7 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -302,13 +302,24 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
       doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
       doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index e95b6543f..9e6de063f 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -304,13 +304,24 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
       doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
       doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 753c2c250..4d0d65c4d 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -295,9 +295,9 @@ class ArvadosContainer(JobBase):
         cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
         if cuda_req:
             runtime_constraints["cuda"] = {
-                "device_count": cuda_req.get("deviceCountMin", 1),
+                "device_count": resources["cudaDeviceCount"],
                 "driver_version": cuda_req["cudaVersionMin"],
-                "hardware_capability": cuda_req["cudaComputeCapabilityMin"]
+                "hardware_capability": cuda_req["cudaComputeCapability"]
             }
 
         if self.timelimit is not None and self.timelimit > 0:

-----------------------------------------------------------------------


hooks/post-receive
--