[arvados] created: 2.7.0-4961-g5dc68f920a

git repository hosting git at public.arvados.org
Thu Oct 5 19:18:58 UTC 2023


        at  5dc68f920a97034ad92e07caacf92165279c0a63 (commit)


commit 5dc68f920a97034ad92e07caacf92165279c0a63
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Sep 12 09:32:30 2023 -0400

    20825: Allow arvwf: in 'run'
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 424bb27492..6b6634bcc9 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -147,7 +147,7 @@ def make_wrapper_workflow(arvRunner, main, packed, project_uuid, name, git_info,
 
 
 def rel_ref(s, baseuri, urlexpander, merged_map, jobmapper):
-    if s.startswith("keep:"):
+    if s.startswith("keep:") or s.startswith("arvwf:"):
         return s
 
     uri = urlexpander(s, baseuri)

commit 9ff9b33208ebffbb5be7fbe0a684b981f16e0533
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 21:49:55 2023 -0400

    20825: Make SeparateRunner reusable
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 487ab70909..aa9fa1e903 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -587,7 +587,7 @@ class RunnerContainer(Runner):
                 "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)),
                 "API": True
             },
-            "use_existing": False, # Never reuse the runner container - see #15497.
+            "use_existing": self.reuse_runner,
             "properties": {}
         }
 
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 6144f6f2f1..424bb27492 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -787,7 +787,8 @@ class ArvadosWorkflow(Workflow):
                                secret_store=self.arvrunner.secret_store,
                                collection_cache_size=runtimeContext.collection_cache_size,
                                collection_cache_is_default=self.arvrunner.should_estimate_cache_size,
-                               git_info=runtimeContext.git_info).job(joborder, output_callback, runtimeContext)
+                               git_info=runtimeContext.git_info,
+                               reuse_runner=True).job(joborder, output_callback, runtimeContext)
 
 
     def job(self, joborder, output_callback, runtimeContext):
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 4432813f6a..763d9d7e12 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -828,7 +828,8 @@ class Runner(Process):
                  priority=None, secret_store=None,
                  collection_cache_size=256,
                  collection_cache_is_default=True,
-                 git_info=None):
+                 git_info=None,
+                 reuse_runner=False):
 
         self.loadingContext = loadingContext.copy()
 
@@ -861,6 +862,7 @@ class Runner(Process):
         self.enable_dev = self.loadingContext.enable_dev
         self.git_info = git_info
         self.fast_parser = self.loadingContext.fast_parser
+        self.reuse_runner = reuse_runner
 
         self.submit_runner_cores = 1
         self.submit_runner_ram = 1024  # defaut 1 GiB

commit f9eebd01bb13511dd9644a061a709ac115c47d47
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 21:44:48 2023 -0400

    20825: Remove print statement
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 440018a366..6144f6f2f1 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -771,8 +771,6 @@ class ArvadosWorkflow(Workflow):
         if rpn:
             name = builder.do_eval(rpn)
 
-        print("BBB name", name)
-
         return RunnerContainer(self.arvrunner,
                                self,
                                self.loadingContext,

commit 86ab45de6b3924d50172411c24242dd265b5e9fc
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 21:38:13 2023 -0400

    20825: Add runnerProcessName
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index 751c71bdf8..389add4104 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -444,3 +444,7 @@ $graph:
       jsonldPredicate:
         _id: "@type"
         _type: "@vocab"
+    - name: runnerProcessName
+      type: ['null', string, cwl:Expression]
+      doc: |
+        Custom name to use for the runner process
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 181a53b5f7..440018a366 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -763,7 +763,16 @@ class ArvadosWorkflow(Workflow):
         return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
 
 
-    def separateRunner(self, joborder, output_callback, runtimeContext):
+    def separateRunner(self, joborder, output_callback, runtimeContext, req, builder):
+
+        name = runtimeContext.name
+
+        rpn = req.get("runnerProcessName")
+        if rpn:
+            name = builder.do_eval(rpn)
+
+        print("BBB name", name)
+
         return RunnerContainer(self.arvrunner,
                                self,
                                self.loadingContext,
@@ -771,7 +780,7 @@ class ArvadosWorkflow(Workflow):
                                None,
                                None,
                                submit_runner_ram=runtimeContext.submit_runner_ram,
-                               name=runtimeContext.name,
+                               name=name,
                                on_error=runtimeContext.on_error,
                                submit_runner_image=runtimeContext.submit_runner_image,
                                intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
@@ -794,7 +803,7 @@ class ArvadosWorkflow(Workflow):
 
         req, _ = self.get_requirement("http://arvados.org/cwl#SeparateRunner")
         if req:
-            return self.separateRunner(joborder, output_callback, runtimeContext)
+            return self.separateRunner(joborder, output_callback, runtimeContext, req, builder)
 
         return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
 

commit b89baf6ccf8ef6de4b3ab139ebf48bc53d4140ed
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 21:25:31 2023 -0400

    20825: Fix edge case producing incorrect error
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 55417becdd..43d7b60006 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -869,7 +869,8 @@ The 'jobs' API is no longer supported.
                     if (self.task_queue.in_flight + len(self.processes)) > 0:
                         self.workflow_eval_lock.wait(3)
                     else:
-                        logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
+                        if self.final_status is None:
+                            logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
                         break
 
                 if self.stop_polling.is_set():

commit 927e6781a1bb21e5ef1d887b89916685601b8fd4
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 21:09:14 2023 -0400

    20825: Fix git_info
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index a94fdac522..487ab70909 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -611,6 +611,8 @@ class RunnerContainer(Runner):
                 "content": packed
             }
             container_req["properties"]["template_uuid"] = self.embedded_tool.tool["id"][6:33]
+        elif self.embedded_tool.tool.get("id", "").startswith("file:"):
+            raise Exception("Tool id '%s' is a local file but expected keep: or arvwf:" % self.embedded_tool.tool.get("id"))
         else:
             main = self.loadingContext.loader.idx["_:main"]
             if main.get("id") == "_:main":
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index a42860aaad..181a53b5f7 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -780,7 +780,7 @@ class ArvadosWorkflow(Workflow):
                                secret_store=self.arvrunner.secret_store,
                                collection_cache_size=runtimeContext.collection_cache_size,
                                collection_cache_is_default=self.arvrunner.should_estimate_cache_size,
-                               git_info=self.arvrunner.git_info).job(joborder, output_callback, runtimeContext)
+                               git_info=runtimeContext.git_info).job(joborder, output_callback, runtimeContext)
 
 
     def job(self, joborder, output_callback, runtimeContext):
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 125527f783..86812a419a 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -43,6 +43,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.varying_url_params = ""
         self.prefer_cached_downloads = False
         self.cached_docker_lookups = {}
+        self.git_info = {}
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index ce8aa42095..55417becdd 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -603,6 +603,8 @@ The 'jobs' API is no longer supported.
                 if git_info[g]:
                     logger.info("  %s: %s", g.split("#", 1)[1], git_info[g])
 
+        runtimeContext.git_info = git_info
+
         workbench1 = self.api.config()["Services"]["Workbench1"]["ExternalURL"]
         workbench2 = self.api.config()["Services"]["Workbench2"]["ExternalURL"]
         controller = self.api.config()["Services"]["Controller"]["ExternalURL"]

commit 5e4f9bc4bba1ec5032f853266bd3f747e3d52588
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 20:58:18 2023 -0400

    20825: Fixing invocation
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 583ef64946..a42860aaad 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -38,6 +38,7 @@ import ruamel.yaml as yaml
 from .runner import (upload_dependencies, packed_workflow, upload_workflow_collection,
                      trim_anonymous_location, remove_redundant_fields, discover_secondary_files,
                      make_builder, arvados_jobs_image, FileUpdates)
+from .arvcontainer import RunnerContainer
 from .pathmapper import ArvPathMapper, trim_listing
 from .arvtool import ArvadosCommandTool, set_cluster_target
 from ._version import __version__
@@ -762,11 +763,13 @@ class ArvadosWorkflow(Workflow):
         return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
 
 
-    def separateRunner(joborder, output_callback, runtimeContext, builder):
-        return RunnerContainer(self, self.tool, self.loadingContext,
+    def separateRunner(self, joborder, output_callback, runtimeContext):
+        return RunnerContainer(self.arvrunner,
+                               self,
+                               self.loadingContext,
                                runtimeContext.enable_reuse,
-                               self.output_name,
-                               self.output_tags,
+                               None,
+                               None,
                                submit_runner_ram=runtimeContext.submit_runner_ram,
                                name=runtimeContext.name,
                                on_error=runtimeContext.on_error,
@@ -776,8 +779,8 @@ class ArvadosWorkflow(Workflow):
                                priority=runtimeContext.priority,
                                secret_store=self.arvrunner.secret_store,
                                collection_cache_size=runtimeContext.collection_cache_size,
-                               collection_cache_is_default=self.should_estimate_cache_size,
-                               git_info=self.arvrunner.git_info)
+                               collection_cache_is_default=self.arvrunner.should_estimate_cache_size,
+                               git_info=self.arvrunner.git_info).job(joborder, output_callback, runtimeContext)
 
 
     def job(self, joborder, output_callback, runtimeContext):
@@ -791,7 +794,7 @@ class ArvadosWorkflow(Workflow):
 
         req, _ = self.get_requirement("http://arvados.org/cwl#SeparateRunner")
         if req:
-            return self.separateRunner(joborder, output_callback, runtimeContext, builder)
+            return self.separateRunner(joborder, output_callback, runtimeContext)
 
         return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
 

commit 950735385f7d8871839e7690e7310218cb6dd9a0
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Sep 11 20:42:55 2023 -0400

    20825: arv:SeparateRunner first try
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index f4246ed70a..751c71bdf8 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -429,3 +429,18 @@ $graph:
       doc: |
         If the container failed on its first run, re-submit the
         container with the RAM request multiplied by this factor.
+
+- name: SeparateRunner
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Indicates that a subworkflow should run in a separate
+    arvados-cwl-runner process.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:SeparateRunner'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index cddcd15c54..583ef64946 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -597,17 +597,8 @@ class ArvadosWorkflow(Workflow):
         super(ArvadosWorkflow, self).__init__(toolpath_object, loadingContext)
         self.cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
 
-    def job(self, joborder, output_callback, runtimeContext):
-
-        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
-        runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
-
-        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
-        if not req:
-            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
-
-        # RunInSingleContainer is true
 
+    def runInSingleContainer(self, joborder, output_callback, runtimeContext, builder):
         with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
             if "id" not in self.tool:
                 raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
@@ -770,6 +761,41 @@ class ArvadosWorkflow(Workflow):
         })
         return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
 
+
+    def separateRunner(joborder, output_callback, runtimeContext, builder):
+        return RunnerContainer(self, self.tool, self.loadingContext,
+                               runtimeContext.enable_reuse,
+                               self.output_name,
+                               self.output_tags,
+                               submit_runner_ram=runtimeContext.submit_runner_ram,
+                               name=runtimeContext.name,
+                               on_error=runtimeContext.on_error,
+                               submit_runner_image=runtimeContext.submit_runner_image,
+                               intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
+                               merged_map=None,
+                               priority=runtimeContext.priority,
+                               secret_store=self.arvrunner.secret_store,
+                               collection_cache_size=runtimeContext.collection_cache_size,
+                               collection_cache_is_default=self.should_estimate_cache_size,
+                               git_info=self.arvrunner.git_info)
+
+
+    def job(self, joborder, output_callback, runtimeContext):
+
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
+        runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
+
+        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
+        if req:
+            return self.runInSingleContainer(joborder, output_callback, runtimeContext, builder)
+
+        req, _ = self.get_requirement("http://arvados.org/cwl#SeparateRunner")
+        if req:
+            return self.separateRunner(joborder, output_callback, runtimeContext, builder)
+
+        return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
+
+
     def make_workflow_step(self,
                            toolpath_object,      # type: Dict[Text, Any]
                            pos,                  # type: int
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index a93c64a224..e0bdd8a5a3 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -494,3 +494,9 @@
   output: {}
   tool: oom/19975-oom3.cwl
   doc: "Test feature 19975 - retry on custom error"
+
+- job: null
+  output:
+    out: out
+  tool: wf/runseparate-wf.cwl
+  doc: "test arv:SeparateRunner"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list