[arvados] updated: 2.1.0-3009-gd6a05be90

git repository hosting git at public.arvados.org
Wed Nov 2 03:18:56 UTC 2022


Summary of changes:
 sdk/cwl/arvados_cwl/__init__.py    |  2 +-
 sdk/cwl/arvados_cwl/arvworkflow.py | 57 +++++++++++++++++++++++++++++++++++++-
 sdk/cwl/arvados_cwl/fsaccess.py    |  9 +++---
 3 files changed, 62 insertions(+), 6 deletions(-)

       via  d6a05be901e501bd10c00702fc8540dc1efba68a (commit)
      from  d825b0330a1b51d8ccbb25e7dc7d9aac26e781e0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit d6a05be901e501bd10c00702fc8540dc1efba68a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Nov 1 23:17:51 2022 -0400

    19688: Make registered workflows lightweight wrappers
    
    Puts the actual workflow in a collection
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 9200c5caa..79bcee58e 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -359,7 +359,7 @@ def main(args=sys.argv[1:],
         # unit tests.
         stdout = None
 
-    if arvargs.submit and arvargs.wait is False and arvargs.workflow.startswith("arvwf:"):
+    if arvargs.submit and arvargs.workflow.startswith("arvwf:"):
         executor.loadingContext.do_validate = False
         executor.fast_submit = True
 
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 02c9c7a97..c0b479c37 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -13,6 +13,8 @@ import logging
 from schema_salad.sourceline import SourceLine, cmap
 import schema_salad.ref_resolver
 
+import arvados.collection
+
 from cwltool.pack import pack
 from cwltool.load_tool import fetch_document, resolve_and_validate_document
 from cwltool.process import shortname
@@ -37,6 +39,57 @@ metrics = logging.getLogger('arvados.cwl-runner.metrics')
 max_res_pars = ("coresMin", "coresMax", "ramMin", "ramMax", "tmpdirMin", "tmpdirMax")
 sum_res_pars = ("outdirMin", "outdirMax")
 
+def make_wrapper_workflow(arvRunner, main, packed, project_uuid, name):
+    col = arvados.collection.Collection(api_client=arvRunner.api,
+                                        keep_client=arvRunner.keep_client)
+
+    with col.open("workflow.json", "wt") as f:
+        json.dump(packed, f, sort_keys=True, indent=4, separators=(',',': '))
+
+    pdh = col.portable_data_hash()
+
+    existing = arvRunner.api.collections().list(filters=[["portable_data_hash", "=", pdh], ["owner_uuid", "=", project_uuid]]).execute(num_retries=arvRunner.num_retries)
+    if len(existing["items"]) == 0:
+        col.save_new(name=name, owner_uuid=project_uuid, ensure_unique_name=True)
+
+    # now construct the wrapper
+
+    step = {
+        "id": "#main/step",
+        "in": [],
+        "out": [],
+        "run": "keep:%s/workflow.json#main" % pdh
+    }
+
+    wrapper = {
+        "class": "Workflow",
+        "id": "#main",
+        "inputs": main["inputs"],
+        "outputs": [],
+        "steps": [step]
+    }
+
+    for i in main["inputs"]:
+        step["in"].append({
+            "id": "#main/step/%s" % shortname(i["id"]),
+            "source": i["id"]
+        })
+
+    for i in main["outputs"]:
+        step["out"].append({"id": "#main/step/%s" % shortname(i["id"])})
+        wrapper["outputs"].append({"outputSource": "#main/step/%s" % shortname(i["id"]),
+                                   "type": i["type"],
+                                   "id": i["id"]})
+
+    wrapper["requirements"] = [{"class": "SubworkflowFeatureRequirement"}]
+
+    if main.get("requirements"):
+        wrapper["requirements"].extend(main["requirements"])
+    if main.get("hints"):
+        wrapper["hints"] = main["hints"]
+
+    return json.dumps({"cwlVersion": "v1.2", "$graph": [wrapper]}, sort_keys=True, indent=4, separators=(',',': '))
+
 def upload_workflow(arvRunner, tool, job_order, project_uuid,
                     runtimeContext, uuid=None,
                     submit_runner_ram=0, name=None, merged_map=None,
@@ -84,11 +137,13 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
 
     main["hints"] = hints
 
+    wrapper = make_wrapper_workflow(arvRunner, main, packed, project_uuid, name)
+
     body = {
         "workflow": {
             "name": name,
             "description": tool.tool.get("doc", ""),
-            "definition":json.dumps(packed, sort_keys=True, indent=4, separators=(',',': '))
+            "definition": wrapper
         }}
     if project_uuid:
         body["workflow"]["owner_uuid"] = project_uuid
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 4da8f8556..5c09e671f 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -244,10 +244,11 @@ class CollectionFetcher(DefaultFetcher):
         try:
             if url.startswith("http://arvados.org/cwl"):
                 return True
-            if url.startswith("keep:"):
-                return self.fsaccess.exists(url)
-            if url.startswith("arvwf:"):
-                if self.fetch_text(url):
+            urld, _ = urllib.parse.urldefrag(url)
+            if urld.startswith("keep:"):
+                return self.fsaccess.exists(urld)
+            if urld.startswith("arvwf:"):
+                if self.fetch_text(urld):
                     return True
         except arvados.errors.NotFoundError:
             return False

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list