[arvados] created: 2.7.0-5814-g5c2f3f465a

git repository hosting git at public.arvados.org
Thu Jan 11 14:37:06 UTC 2024


        at  5c2f3f465ac10b674e9033083063489a2d9b73c2 (commit)


commit 5c2f3f465ac10b674e9033083063489a2d9b73c2
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Jan 9 17:22:32 2024 -0500

    21216: Make memoryRetryMultiplier optional and set a default value
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid
index e05072ddf6..3c8366721d 100644
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -73,7 +73,7 @@ hints:
     usePreemptible: true
 
   arv:OutOfMemoryRetry:
-    memoryRetryMultipler: 2
+    memoryRetryMultiplier: 2
     memoryErrorRegex: "custom memory error"
 {% endcodeblock %}
 
@@ -195,7 +195,7 @@ table(table table-bordered table-condensed).
 
 h2(#OutOfMemoryRetry). arv:OutOfMemoryRetry
 
-Specify that when a workflow step appears to have failed because it did not request enough RAM, it should be re-submitted with more RAM.  Out of memory conditions are detected either by the container being unexpectedly killed (exit code 137) or by matching a pattern in the container's output (see @memoryErrorRegex@).  Retrying will increase the base RAM request by the value of @memoryRetryMultipler at .  For example, if the original RAM request was 10 GiB and the multiplier is 1.5, then it will re-submit with 15 GiB.
+Specify that when a workflow step appears to have failed because it did not request enough RAM, it should be re-submitted with more RAM.  Out of memory conditions are detected either by the container being unexpectedly killed (exit code 137) or by matching a pattern in the container's output (see @memoryErrorRegex@).  Retrying will increase the base RAM request by the value of @memoryRetryMultiplier at .  For example, if the original RAM request was 10 GiB and the multiplier is 1.5, then it will re-submit with 15 GiB.
 
 Containers are only re-submitted once.  If it fails a second time after increasing RAM, then the worklow step will still fail.
 
@@ -203,7 +203,7 @@ Also note that expressions that use @$(runtime.ram)@ (such as dynamic command li
 
 table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
-|memoryRetryMultipler|float|Required, the retry will multiply the base memory request by this factor to get the retry memory request.|
+|memoryRetryMultiplier|float|Optional, default value is 2.  The retry will multiply the base memory request by this factor to get the retry memory request.|
 |memoryErrorRegex|string|Optional, a custom regex that, if found in the stdout, stderr or crunch-run logging of a program, will trigger a retry with greater RAM.  If not provided, the default pattern matches "out of memory" (with or without spaces), "memory error" (with or without spaces), "bad_alloc" and "container using over 90% of memory".|
 
 h2. arv:dockerCollectionPDH
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index 450864df30..aeb41db568 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -479,7 +479,7 @@ $graph:
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
     - name: memoryRetryMultiplier
-      type: float
+      type: float?
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index f33b94e69d..0e51d50080 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -422,7 +422,7 @@ $graph:
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
     - name: memoryRetryMultiplier
-      type: float
+      type: float?
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index 0c6035c56f..a753579c9a 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -425,7 +425,7 @@ $graph:
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
     - name: memoryRetryMultiplier
-      type: float
+      type: float?
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 1f379ccec6..584ca1713a 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -375,6 +375,8 @@ class ArvadosContainer(JobBase):
                 ram_multiplier.append(oom_retry_req.get('memoryRetryMultiplier'))
             elif oom_retry_req.get('memoryRetryMultipler'):
                 ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler'))
+            else:
+                ram_multiplier.append(2)
 
         if runtimeContext.runnerjob.startswith("arvwf:"):
             wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.index("#")]

commit c6c5d70c2929ede0ed79edf511025c0a93f5adac
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Jan 9 17:12:55 2024 -0500

    21216: Make the misspelled version optional
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index b91564bdd1..450864df30 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -484,7 +484,7 @@ $graph:
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
     - name: memoryRetryMultipler
-      type: float
+      type: float?
       doc: |
         Deprecated misspelling of "memoryRetryMultiplier".  Kept only
         for backwards compatability, don't use this.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index ebec3f334e..f33b94e69d 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -427,7 +427,7 @@ $graph:
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
     - name: memoryRetryMultipler
-      type: float
+      type: float?
       doc: |
         Deprecated misspelling of "memoryRetryMultiplier".  Kept only
         for backwards compatability, don't use this.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index 7b70a00dab..0c6035c56f 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -430,7 +430,7 @@ $graph:
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
     - name: memoryRetryMultipler
-      type: float
+      type: float?
       doc: |
         Deprecated misspelling of "memoryRetryMultiplier".  Kept only
         for backwards compatability, don't use this.
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index 7ca8ca0950..cb4a151f0e 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -487,7 +487,7 @@
 
 - job: oom/fakeoom.yml
   output: {}
-  tool: oom/19975-oom-misspelled.cwl
+  tool: oom/19975-oom-mispelled.cwl
   doc: "Test feature 19975 - retry on exit 137, old misspelled version"
 
 - job: oom/fakeoom2.yml

commit c35f7316d44a2f72ed4cb2999adc6d8403e3ed01
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Jan 9 13:15:54 2024 -0500

    Correct typo in field name to 'memoryRetryMultiplier'
    
    Include fallback to previous misspelling 'memoryRetryMultipler'
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index 91a05e1254..b91564bdd1 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -478,8 +478,13 @@ $graph:
         and stderr produced by the tool to determine if a failed job
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
-    - name: memoryRetryMultipler
+    - name: memoryRetryMultiplier
       type: float
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
+    - name: memoryRetryMultipler
+      type: float
+      doc: |
+        Deprecated misspelling of "memoryRetryMultiplier".  Kept only
+        for backwards compatability, don't use this.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index 458d5a37a7..ebec3f334e 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -421,8 +421,13 @@ $graph:
         and stderr produced by the tool to determine if a failed job
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
-    - name: memoryRetryMultipler
+    - name: memoryRetryMultiplier
       type: float
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
+    - name: memoryRetryMultipler
+      type: float
+      doc: |
+        Deprecated misspelling of "memoryRetryMultiplier".  Kept only
+        for backwards compatability, don't use this.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index 389add4104..7b70a00dab 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -424,11 +424,17 @@ $graph:
         and stderr produced by the tool to determine if a failed job
         should be retried with more RAM.  By default, searches for the
         substrings 'bad_alloc' and 'OutOfMemory'.
-    - name: memoryRetryMultipler
+    - name: memoryRetryMultiplier
       type: float
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
+    - name: memoryRetryMultipler
+      type: float
+      doc: |
+        Deprecated misspelling of "memoryRetryMultiplier".  Kept only
+        for backwards compatability, don't use this.
+
 
 - name: SeparateRunner
   type: record
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 84b98378f4..1f379ccec6 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -370,8 +370,11 @@ class ArvadosContainer(JobBase):
         ram_multiplier = [1]
 
         oom_retry_req, _ = self.get_requirement("http://arvados.org/cwl#OutOfMemoryRetry")
-        if oom_retry_req and oom_retry_req.get('memoryRetryMultipler'):
-            ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler'))
+        if oom_retry_req:
+            if oom_retry_req.get('memoryRetryMultiplier'):
+                ram_multiplier.append(oom_retry_req.get('memoryRetryMultiplier'))
+            elif oom_retry_req.get('memoryRetryMultipler'):
+                ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler'))
 
         if runtimeContext.runnerjob.startswith("arvwf:"):
             wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.index("#")]
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index e0bdd8a5a3..7ca8ca0950 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -485,6 +485,11 @@
   tool: oom/19975-oom.cwl
   doc: "Test feature 19975 - retry on exit 137"
 
+- job: oom/fakeoom.yml
+  output: {}
+  tool: oom/19975-oom-misspelled.cwl
+  doc: "Test feature 19975 - retry on exit 137, old misspelled version"
+
 - job: oom/fakeoom2.yml
   output: {}
   tool: oom/19975-oom.cwl
diff --git a/sdk/cwl/tests/oom/19975-oom.cwl b/sdk/cwl/tests/oom/19975-oom-mispelled.cwl
similarity index 86%
copy from sdk/cwl/tests/oom/19975-oom.cwl
copy to sdk/cwl/tests/oom/19975-oom-mispelled.cwl
index ec80648716..bbd26b9c9a 100644
--- a/sdk/cwl/tests/oom/19975-oom.cwl
+++ b/sdk/cwl/tests/oom/19975-oom-mispelled.cwl
@@ -8,6 +8,7 @@ $namespaces:
   arv: "http://arvados.org/cwl#"
 hints:
   arv:OutOfMemoryRetry:
+    # legacy misspelled name, should behave exactly the same
     memoryRetryMultipler: 2
   ResourceRequirement:
     ramMin: 256
diff --git a/sdk/cwl/tests/oom/19975-oom.cwl b/sdk/cwl/tests/oom/19975-oom.cwl
index ec80648716..bf3e5cc389 100644
--- a/sdk/cwl/tests/oom/19975-oom.cwl
+++ b/sdk/cwl/tests/oom/19975-oom.cwl
@@ -8,7 +8,7 @@ $namespaces:
   arv: "http://arvados.org/cwl#"
 hints:
   arv:OutOfMemoryRetry:
-    memoryRetryMultipler: 2
+    memoryRetryMultiplier: 2
   ResourceRequirement:
     ramMin: 256
   arv:APIRequirement: {}
diff --git a/sdk/cwl/tests/oom/19975-oom3.cwl b/sdk/cwl/tests/oom/19975-oom3.cwl
index af3271b847..bbca110b6f 100644
--- a/sdk/cwl/tests/oom/19975-oom3.cwl
+++ b/sdk/cwl/tests/oom/19975-oom3.cwl
@@ -8,7 +8,7 @@ $namespaces:
   arv: "http://arvados.org/cwl#"
 hints:
   arv:OutOfMemoryRetry:
-    memoryRetryMultipler: 2
+    memoryRetryMultiplier: 2
     memoryErrorRegex: Whoops
   ResourceRequirement:
     ramMin: 256

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list