[arvados] created: 2.6.0-505-gc0c3828dcb

git repository hosting git at public.arvados.org
Tue Jan 2 18:39:24 UTC 2024


        at  c0c3828dcb8d8a396d8183bb2dcca60eda56bcdf (commit)


commit c0c3828dcb8d8a396d8183bb2dcca60eda56bcdf
Author: Alex Coleman <alex.coleman at curii.com>
Date:   Tue Jan 2 13:36:23 2024 -0500

    19982: Adding inital code
    
    Adding inital code in arvcontainer.py, adding first draft of test, and adding new hint.
    
    Arvados-DCO-1.1-Signed-off-by: Alex Coleman <alex.coleman at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 8108934aae..e90de431ff 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -290,6 +290,7 @@ def add_arv_hints():
         "http://arvados.org/cwl#OutputCollectionProperties",
         "http://arvados.org/cwl#KeepCacheTypeRequirement",
         "http://arvados.org/cwl#OutOfMemoryRetry",
+        "http://arvados.org/cwl#SpotInstanceRetry",
     ])
 
 def exit_signal_handler(sigcode, frame):
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index a94fdac522..4df20f10e3 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -429,6 +429,17 @@ class ArvadosContainer(JobBase):
             logger.debug("Container request was %s", container_request)
             self.output_callback({}, "permanentFail")
 
+
+    def spot_instance_retry(self, record, container):
+        spot_instance_retry_req, _ = self.get_requirement("http://arvados.org/cwl#SpotInstanceRetry")
+        if spot_instance_retry_req is None:
+            return False
+        if container["preemptionNotice"]:
+            return True
+        return False
+    
+        
+
     def out_of_memory_retry(self, record, container):
         oom_retry_req, _ = self.get_requirement("http://arvados.org/cwl#OutOfMemoryRetry")
         if oom_retry_req is None:
@@ -485,7 +496,12 @@ class ArvadosContainer(JobBase):
                     self.run(None)
                     retried = True
                     return
-
+                if processStatus == "permanentFail" and self.attempt_count == 1 and self.spot_instance_retry(record, container):
+                    logger.warning("%s Container failed with preemptible instance reclaimed, trying again nonpreemptible")
+                    self.job_runtime.enable_preemptible = False
+                    self.run(None)
+                    retried = True
+                    return
                 if rcode == 137:
                     logger.warning("%s Container may have been killed for using too much RAM.  Try resubmitting with a higher 'ramMin' or use the arv:OutOfMemoryRetry feature.",
                                  self.arvrunner.label(self))
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index a2f404d7eb..fbd3ef54d8 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -579,7 +579,7 @@ class TestContainer(unittest.TestCase):
             self.fail("RuntimeStatusLoggingHandler should not be called recursively")
 
 
-    # Test to make sure that an exception raised from
+    # Test to make sure trunner = mock.MagicMock()hat an exception raised from
     # get_current_container doesn't cause the logger to raise an
     # exception
     @mock.patch("arvados_cwl.util.get_current_container")
@@ -1708,3 +1708,26 @@ class TestWorkflow(unittest.TestCase):
         api._rootDesc = copy.deepcopy(get_rootDesc())
         runner = arvados_cwl.executor.ArvCwlExecutor(api)
         self.assertEqual(runner.work_api, 'containers')
+    
+    @mock.patch("arvados.collection.Collection")
+    def test_spot_instance_retry(self):
+        arvados_cwl.add_arv_hints()
+
+        # Add hint
+
+        api = mock.MagicMock()
+
+        runner = mock.MagicMock()
+        runner.api = api
+        runner.num_retries = 0
+        runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
+        runner.secret_store = cwltool.secrets.SecretStore()
+
+        runner.api.containers().get().execute.return_value = {
+            "state": "Complete",
+            "output": "abc+123",
+            "exit_code": 137
+        }
+        # Add assertions to make sure it reran as nonpreemptible
+        assert False
\ No newline at end of file

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list