[arvados] created: 2.7.0-4988-g94c05a1d46

git repository hosting git at public.arvados.org
Wed Oct 11 21:14:09 UTC 2023


        at  94c05a1d46dfda5766c3a3a6a220e3471fd4b5ec (commit)


commit 94c05a1d46dfda5766c3a3a6a220e3471fd4b5ec
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Oct 11 16:57:59 2023 -0400

    20933: Code cleanups
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 554f137449..f6aab4b93f 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -983,4 +983,5 @@ def print_keep_deps(arvRunner, runtimeContext, merged_map, tool):
             if g:
                 references.add(g[1])
 
-    arvRunner.stdout.write(json.dumps(sorted(list(references)))+"\n")
+    json.dump(sorted(references), arvRunner.stdout)
+    print(file=arvRunner.stdout)
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 1be2c119c2..f8aace0683 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -44,6 +44,9 @@ setup(name='arvados-cwl-runner',
           'msgpack==1.0.3',
           'importlib-metadata<5',
           'setuptools>=40.3.0',
+
+          # zipp 3.16 dropped support for Python 3.7, so this can be
+          # removed this when we drop support for 3.7 as well.
           'zipp<3.16.0'
       ],
       data_files=[
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 6c7d873af4..7f5245db86 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -333,9 +333,15 @@ def copy_workflow(wf_uuid, src, dst, args):
         try:
             result = subprocess.run(["arvados-cwl-runner", "--quiet", "--print-keep-deps", "arvwf:"+wf_uuid],
                                     capture_output=True, env=env)
-        except (FileNotFoundError, subprocess.CalledProcessError):
-            logger.error('Copying workflows requires arvados-cwl-runner 2.7.1 or later to be installed in PATH.')
-            return
+        except FileNotFoundError:
+            no_arv_copy = True
+        else:
+            no_arv_copy = result.returncode == 2
+
+        if no_arv_copy:
+            raise Exception('Copying workflows requires arvados-cwl-runner 2.7.1 or later to be installed in PATH.')
+        elif result.returncode != 0:
+            raise Exception('There was an error getting Keep dependencies from workflow using arvados-cwl-runner --print-keep-deps')
 
         locations = json.loads(result.stdout)
 

commit 97e40209600c094eac15085627e49da52ab0f517
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Oct 11 16:32:50 2023 -0400

    20933: Use [0-9] instead of \d in regex
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index c3d3def723..88adc8879b 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -24,9 +24,9 @@ CR_UNCOMMITTED = 'Uncommitted'
 CR_COMMITTED = 'Committed'
 CR_FINAL = 'Final'
 
-keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*')
-signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*\+A\S+(\+\S+)*')
-portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
+keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
+signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
+portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
 uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
 collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
 group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
@@ -34,9 +34,9 @@ user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
 link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
 job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
 container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
-manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE)
-keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+\d+)/(.*)')
-keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+\d+)/(.*)')
+manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
+keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
+keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
 
 def _deprecated(version=None, preferred=None):
     """Mark a callable as deprecated in the SDK

commit 6a16dad57c5d082d00199a13440357b8ff204a77
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Oct 5 16:21:39 2023 -0400

    20933: Update arv-copy doc to mention it requires arvados-cwl-runner.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid
index 8174860478..a05620d62d 100644
--- a/doc/user/topics/arv-copy.html.textile.liquid
+++ b/doc/user/topics/arv-copy.html.textile.liquid
@@ -71,10 +71,14 @@ Additionally, if you need to specify the storage classes where to save the copie
 
 h3. How to copy a workflow
 
+Copying workflows requires @arvados-cwl-runner@ to be available in your @$PATH at .
+
 We will use the uuid @jutro-7fd4e-mkmmq53m1ze6apx@ as an example workflow.
 
+Arv-copy will infer the source cluster is @jutro@ from the object uuid, and destination cluster is @pirca@ from @--project-uuid at .
+
 <notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src jutro --dst pirca --project-uuid pirca-j7d0g-ecak8knpefz8ere jutro-7fd4e-mkmmq53m1ze6apx</span>
+<pre><code>~$ <span class="userinput">arv-copy --project-uuid pirca-j7d0g-ecak8knpefz8ere jutro-7fd4e-mkmmq53m1ze6apx</span>
 ae480c5099b81e17267b7445e35b4bc7+180: 23M / 23M 100.0%
 2463fa9efeb75e099685528b3b9071e0+438: 156M / 156M 100.0%
 jutro-4zz18-vvvqlops0a0kpdl: 94M / 94M 100.0%
@@ -91,6 +95,8 @@ h3. How to copy a project
 
 We will use the uuid @jutro-j7d0g-xj19djofle3aryq@ as an example project.
 
+Arv-copy will infer the source cluster is @jutro@ from the source project uuid, and destination cluster is @pirca@ from @--project-uuid at .
+
 <notextile>
 <pre><code>~$ <span class="userinput">arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq</span>
 2021-09-08 21:29:32 arvados.arv-copy[6377] INFO:

commit 2c0334b2c3209b1fbfe52373ce9108a45149c80c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Oct 5 15:52:10 2023 -0400

    20933: Handle errors when calling arvados-cwl-runner
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index ef0e031e60..6c7d873af4 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -171,6 +171,9 @@ def main():
     for d in listvalues(local_repo_dir):
         shutil.rmtree(d, ignore_errors=True)
 
+    if not result:
+        exit(1)
+
     # If no exception was thrown and the response does not have an
     # error_token field, presume success
     if result is None or 'error_token' in result or 'uuid' not in result:
@@ -327,8 +330,13 @@ def copy_workflow(wf_uuid, src, dst, args):
         env = {"ARVADOS_API_HOST": urllib.parse.urlparse(src._rootDesc["rootUrl"]).netloc,
                "ARVADOS_API_TOKEN": src.api_token,
                "PATH": os.environ["PATH"]}
-        result = subprocess.run(["arvados-cwl-runner", "--quiet", "--print-keep-deps", "arvwf:"+wf_uuid],
-                                capture_output=True, env=env)
+        try:
+            result = subprocess.run(["arvados-cwl-runner", "--quiet", "--print-keep-deps", "arvwf:"+wf_uuid],
+                                    capture_output=True, env=env)
+        except (FileNotFoundError, subprocess.CalledProcessError):
+            logger.error('Copying workflows requires arvados-cwl-runner 2.7.1 or later to be installed in PATH.')
+            return
+
         locations = json.loads(result.stdout)
 
         if locations:

commit 4eb78e44b6f046607d0b22cf2dbe3133e458463d
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Oct 5 09:39:51 2023 -0400

    20933: Add --print-keep-deps test
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 330dba3dbe..677e10d265 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -729,7 +729,7 @@ The 'jobs' API is no longer supported.
 
             if runtimeContext.print_keep_deps:
                 # Just find and print out all the collection dependencies and exit
-                print_keep_deps(tool)
+                print_keep_deps(self, runtimeContext, merged_map, tool)
                 return (None, "success")
 
             # Did not register a workflow, we're going to submit
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 860f8a1b9a..554f137449 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -948,18 +948,19 @@ class Runner(Process):
             self.arvrunner.output_callback(outputs, processStatus)
 
 
-def print_keep_deps_visitor(references, doc_loader, tool):
+def print_keep_deps_visitor(api, runtimeContext, references, doc_loader, tool):
     def collect_locators(obj):
         loc = obj.get("location", "")
 
         g = arvados.util.keepuri_pattern.match(loc)
-        if g and g[1] not in references:
-            references.append(g[1])
-            return
+        if g:
+            references.add(g[1])
+
+        if obj.get("class") == "http://arvados.org/cwl#WorkflowRunnerResources" and "acrContainerImage" in obj:
+            references.add(obj["acrContainerImage"])
 
-        loc = obj.get("http://arvados.org/cwl#dockerCollectionPDH", "") or obj.get("acrContainerImage")
-        if loc:
-            references.append(loc)
+        if obj.get("class") == "DockerRequirement":
+            references.add(arvados_cwl.arvdocker.arv_docker_get_image(api, obj, False, runtimeContext))
 
     sc_result = scandeps(tool["id"], tool,
                          set(),
@@ -971,8 +972,15 @@ def print_keep_deps_visitor(references, doc_loader, tool):
     visit_class(tool, ("DockerRequirement", "http://arvados.org/cwl#WorkflowRunnerResources"), collect_locators)
 
 
-def print_keep_deps(tool):
-    references = []
+def print_keep_deps(arvRunner, runtimeContext, merged_map, tool):
+    references = set()
+
+    tool.visit(partial(print_keep_deps_visitor, arvRunner.api, runtimeContext, references, tool.doc_loader))
+
+    for mm in merged_map:
+        for k, v in merged_map[mm].resolved.items():
+            g = arvados.util.keepuri_pattern.match(v)
+            if g:
+                references.add(g[1])
 
-    tool.visit(partial(print_keep_deps_visitor, references, tool.doc_loader))
-    print(json.dumps(references))
+    arvRunner.stdout.write(json.dumps(sorted(list(references)))+"\n")
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index b10d1159f9..c8bf127951 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1774,3 +1774,18 @@ class TestCreateWorkflow(unittest.TestCase):
         self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
         self.assertEqual(exited, 0)
+
+
+class TestPrintKeepDeps(unittest.TestCase):
+    @stubs()
+    def test_print_keep_deps(self, stubs):
+        # test --print-keep-deps which is used by arv-copy
+
+        exited = arvados_cwl.main(
+            ["--print-keep-deps", "--debug",
+             "tests/wf/submit_wf_map.cwl"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
+
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         '["5d373e7629203ce39e7c22af98a0f881+52", "999999999999999999999999999999d4+99"]' + '\n')
+        self.assertEqual(exited, 0)

commit bd8c170d90e2fbb56c5e72717df61e971f518b84
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Oct 4 17:50:05 2023 -0400

    20933: Fix tests
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 11ae66b139..ea7c9f7a33 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -560,7 +560,7 @@ class RunnerContainer(Runner):
                 }
                 self.job_order[param] = {"$include": mnt}
 
-        container_image = arvados_jobs_image(self.arvrunner, self.jobs_image, runtimeContext),
+        container_image = arvados_jobs_image(self.arvrunner, self.jobs_image, runtimeContext)
 
         workflow_runner_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
         if workflow_runner_req and workflow_runner_req.get("acrContainerImage"):
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index cdce3d643a..3ad2c6419a 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -42,6 +42,7 @@ from .pathmapper import ArvPathMapper, trim_listing
 from .arvtool import ArvadosCommandTool, set_cluster_target
 from ._version import __version__
 from .util import common_prefix
+from .arvdocker import arv_docker_get_image
 
 from .perf import Perf
 
@@ -178,14 +179,14 @@ def rel_ref(s, baseuri, urlexpander, merged_map, jobmapper):
 def is_basetype(tp):
     return _basetype_re.match(tp) is not None
 
-def update_refs(d, baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix):
+def update_refs(api, d, baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix):
     if isinstance(d, MutableSequence):
         for i, s in enumerate(d):
             if prefix and isinstance(s, str):
                 if s.startswith(prefix):
                     d[i] = replacePrefix+s[len(prefix):]
             else:
-                update_refs(s, baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
+                update_refs(api, s, baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
     elif isinstance(d, MutableMapping):
         for field in ("id", "name"):
             if isinstance(d.get(field), str) and d[field].startswith("_:"):
@@ -198,8 +199,8 @@ def update_refs(d, baseuri, urlexpander, merged_map, jobmapper, runtimeContext,
             baseuri = urlexpander(d["name"], baseuri, scoped_id=True)
 
         if d.get("class") == "DockerRequirement":
-            dockerImageId = d.get("dockerImageId") or d.get("dockerPull")
-            d["http://arvados.org/cwl#dockerCollectionPDH"] = runtimeContext.cached_docker_lookups.get(dockerImageId)
+            d["http://arvados.org/cwl#dockerCollectionPDH"] = arv_docker_get_image(api, d, False,
+                                                                                   runtimeContext)
 
         for field in d:
             if field in ("location", "run", "name") and isinstance(d[field], str):
@@ -222,15 +223,21 @@ def update_refs(d, baseuri, urlexpander, merged_map, jobmapper, runtimeContext,
                     if isinstance(d["inputs"][inp], str) and not is_basetype(d["inputs"][inp]):
                         d["inputs"][inp] = rel_ref(d["inputs"][inp], baseuri, urlexpander, merged_map, jobmapper)
                     if isinstance(d["inputs"][inp], MutableMapping):
-                        update_refs(d["inputs"][inp], baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
+                        update_refs(api, d["inputs"][inp], baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
                 continue
 
+            if field in ("requirements", "hints") and isinstance(d[field], MutableMapping):
+                dr = d[field].get("DockerRequirement")
+                if dr:
+                    dr["http://arvados.org/cwl#dockerCollectionPDH"] = arv_docker_get_image(api, dr, False,
+                                                                                            runtimeContext)
+
             if field == "$schemas":
                 for n, s in enumerate(d["$schemas"]):
                     d["$schemas"][n] = rel_ref(d["$schemas"][n], baseuri, urlexpander, merged_map, jobmapper)
                 continue
 
-            update_refs(d[field], baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
+            update_refs(api, d[field], baseuri, urlexpander, merged_map, jobmapper, runtimeContext, prefix, replacePrefix)
 
 
 def fix_schemadef(req, baseuri, urlexpander, merged_map, jobmapper, pdh):
@@ -327,7 +334,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
 
         # 2. find $import, $include, $schema, run, location
         # 3. update field value
-        update_refs(result, w, tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, "", "")
+        update_refs(arvRunner.api, result, w, tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, "", "")
 
         # Write the updated file to the collection.
         with col.open(w[len(prefix):], "wt") as f:
@@ -485,7 +492,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
         if r["class"] == "SchemaDefRequirement":
             wrapper["requirements"][i] = fix_schemadef(r, main["id"], tool.doc_loader.expand_url, merged_map, jobmapper, col.portable_data_hash())
 
-    update_refs(wrapper, main["id"], tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, main["id"]+"#", "#main/")
+    update_refs(arvRunner.api, wrapper, main["id"], tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, main["id"]+"#", "#main/")
 
     doc = {"cwlVersion": "v1.2", "$graph": [wrapper]}
 
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 92f0952af2..1be2c119c2 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -43,7 +43,8 @@ setup(name='arvados-cwl-runner',
           'networkx < 2.6',
           'msgpack==1.0.3',
           'importlib-metadata<5',
-          'setuptools>=40.3.0'
+          'setuptools>=40.3.0',
+          'zipp<3.16.0'
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index a2f404d7eb..8e3a8ab85e 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -85,7 +85,8 @@ class TestContainer(unittest.TestCase):
              "construct_tool_object": runner.arv_make_tool,
              "fetcher_constructor": functools.partial(arvados_cwl.CollectionFetcher, api_client=runner.api, fs_access=fs_access),
              "loader": Loader({}),
-             "metadata": cmap({"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"})
+             "metadata": cmap({"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"}),
+             "default_docker_image": "arvados/jobs:"+arvados_cwl.__version__
              })
         runtimeContext = arvados_cwl.context.ArvRuntimeContext(
             {"work_api": "containers",
@@ -1463,7 +1464,8 @@ class TestWorkflow(unittest.TestCase):
              "make_fs_access": make_fs_access,
              "loader": document_loader,
              "metadata": {"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"},
-             "construct_tool_object": runner.arv_make_tool})
+             "construct_tool_object": runner.arv_make_tool,
+             "default_docker_image": "arvados/jobs:"+arvados_cwl.__version__})
         runtimeContext = arvados_cwl.context.ArvRuntimeContext(
             {"work_api": "containers",
              "basedir": "",
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 9dad245254..b10d1159f9 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1180,7 +1180,7 @@ class TestSubmit(unittest.TestCase):
                                         "out": [
                                             {"id": "#main/step/out"}
                                         ],
-                                        "run": "keep:7628e49da34b93de9f4baf08a6212817+247/secret_wf.cwl"
+                                        "run": "keep:991302581d01db470345a131480e623b+247/secret_wf.cwl"
                                     }
                                 ]
                             }
@@ -1737,3 +1737,40 @@ class TestCreateWorkflow(unittest.TestCase):
         self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
         self.assertEqual(exited, 0)
+
+    @stubs()
+    def test_create_map(self, stubs):
+        # test uploading a document that uses objects instead of arrays
+        # for certain fields like inputs and requirements.
+
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=containers",
+             "--project-uuid", project_uuid,
+             "--disable-git",
+             "tests/wf/submit_wf_map.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
+
+        stubs.api.pipeline_templates().create.refute_called()
+        stubs.api.container_requests().create.refute_called()
+
+        expect_workflow = StripYAMLComments(
+            open("tests/wf/expect_upload_wrapper_map.cwl").read().rstrip())
+
+        body = {
+            "workflow": {
+                "owner_uuid": project_uuid,
+                "name": "submit_wf_map.cwl",
+                "description": "",
+                "definition": expect_workflow,
+            }
+        }
+        stubs.api.workflows().create.assert_called_with(
+            body=JsonDiffMatcher(body))
+
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_workflow_uuid + '\n')
+        self.assertEqual(exited, 0)
diff --git a/sdk/cwl/tests/tool/submit_tool_map.cwl b/sdk/cwl/tests/tool/submit_tool_map.cwl
new file mode 100644
index 0000000000..7a833d471b
--- /dev/null
+++ b/sdk/cwl/tests/tool/submit_tool_map.cwl
@@ -0,0 +1,24 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a tool file for dependencies (e.g. default
+# value blub.txt) and uploading to Keep works as intended.
+
+class: CommandLineTool
+cwlVersion: v1.0
+requirements:
+  DockerRequirement:
+    dockerPull: debian:buster-slim
+inputs:
+  x:
+    type: File
+    default:
+      class: File
+      location: blub.txt
+    inputBinding:
+      position: 1
+outputs: []
+baseCommand: cat
diff --git a/sdk/cwl/tests/wf/expect_upload_wrapper_map.cwl b/sdk/cwl/tests/wf/expect_upload_wrapper_map.cwl
new file mode 100644
index 0000000000..8f98f4718c
--- /dev/null
+++ b/sdk/cwl/tests/wf/expect_upload_wrapper_map.cwl
@@ -0,0 +1,88 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+{
+    "$graph": [
+        {
+            "class": "Workflow",
+            "hints": [
+                {
+                    "acrContainerImage": "999999999999999999999999999999d3+99",
+                    "class": "http://arvados.org/cwl#WorkflowRunnerResources"
+                }
+            ],
+            "id": "#main",
+            "inputs": [
+                {
+                    "default": {
+                        "basename": "blorp.txt",
+                        "class": "File",
+                        "location": "keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt",
+                        "nameext": ".txt",
+                        "nameroot": "blorp",
+                        "size": 16
+                    },
+                    "id": "#main/x",
+                    "type": "File"
+                },
+                {
+                    "default": {
+                        "basename": "99999999999999999999999999999998+99",
+                        "class": "Directory",
+                        "location": "keep:99999999999999999999999999999998+99"
+                    },
+                    "id": "#main/y",
+                    "type": "Directory"
+                },
+                {
+                    "default": {
+                        "basename": "anonymous",
+                        "class": "Directory",
+                        "listing": [
+                            {
+                                "basename": "renamed.txt",
+                                "class": "File",
+                                "location": "keep:99999999999999999999999999999998+99/file1.txt",
+                                "nameext": ".txt",
+                                "nameroot": "renamed",
+                                "size": 0
+                            }
+                        ]
+                    },
+                    "id": "#main/z",
+                    "type": "Directory"
+                }
+            ],
+            "outputs": [],
+            "requirements": [
+                {
+                    "class": "SubworkflowFeatureRequirement"
+                }
+            ],
+            "steps": [
+                {
+                    "id": "#main/submit_wf_map.cwl",
+                    "in": [
+                        {
+                            "id": "#main/step/x",
+                            "source": "#main/x"
+                        },
+                        {
+                            "id": "#main/step/y",
+                            "source": "#main/y"
+                        },
+                        {
+                            "id": "#main/step/z",
+                            "source": "#main/z"
+                        }
+                    ],
+                    "label": "submit_wf_map.cwl",
+                    "out": [],
+                    "run": "keep:2b94b65162db72023301a582e085646f+290/wf/submit_wf_map.cwl"
+                }
+            ]
+        }
+    ],
+    "cwlVersion": "v1.2"
+}
diff --git a/sdk/cwl/tests/wf/submit_wf_map.cwl b/sdk/cwl/tests/wf/submit_wf_map.cwl
new file mode 100644
index 0000000000..e8bb9cf77c
--- /dev/null
+++ b/sdk/cwl/tests/wf/submit_wf_map.cwl
@@ -0,0 +1,25 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a workflow file for dependencies
+# (e.g. submit_tool.cwl) and uploading to Keep works as intended.
+
+class: Workflow
+cwlVersion: v1.2
+inputs:
+  x:
+    type: File
+  y:
+    type: Directory
+  z:
+    type: Directory
+outputs: []
+steps:
+  step1:
+    in:
+      x: x
+    out: []
+    run: ../tool/submit_tool_map.cwl

commit a3e925e4c779c93c08805191f3973d7d7ebbec23
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Sep 7 16:43:45 2023 -0400

    20933: Copies collections reported by a-c-r
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 4d211d3ed6..860f8a1b9a 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -950,11 +950,16 @@ class Runner(Process):
 
 def print_keep_deps_visitor(references, doc_loader, tool):
     def collect_locators(obj):
-        loc = obj.get("location", "") or obj.get("http://arvados.org/cwl#dockerCollectionPDH", "")
+        loc = obj.get("location", "")
 
         g = arvados.util.keepuri_pattern.match(loc)
         if g and g[1] not in references:
             references.append(g[1])
+            return
+
+        loc = obj.get("http://arvados.org/cwl#dockerCollectionPDH", "") or obj.get("acrContainerImage")
+        if loc:
+            references.append(loc)
 
     sc_result = scandeps(tool["id"], tool,
                          set(),
@@ -963,6 +968,7 @@ def print_keep_deps_visitor(references, doc_loader, tool):
                          nestdirs=False)
 
     visit_class(sc_result, ("File", "Directory"), collect_locators)
+    visit_class(tool, ("DockerRequirement", "http://arvados.org/cwl#WorkflowRunnerResources"), collect_locators)
 
 
 def print_keep_deps(tool):
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index d6af681ffb..ef0e031e60 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -328,12 +328,11 @@ def copy_workflow(wf_uuid, src, dst, args):
                "ARVADOS_API_TOKEN": src.api_token,
                "PATH": os.environ["PATH"]}
         result = subprocess.run(["arvados-cwl-runner", "--quiet", "--print-keep-deps", "arvwf:"+wf_uuid],
-                                env=env)
-        print(result)
-        exit()
+                                capture_output=True, env=env)
+        locations = json.loads(result.stdout)
 
-        #if locations:
-        #        copy_collections(locations, src, dst, args)
+        if locations:
+            copy_collections(locations, src, dst, args)
 
     # copy the workflow itself
     del wf['uuid']

commit 7fec33bab2fb68405a1c641d3cd956d21487e14b
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Sep 7 16:30:14 2023 -0400

    20933: Use acrContainerImage where available
    
    also refs #20592
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 1e5acad2b2..fd3b7a5d16 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -420,6 +420,8 @@ def main(args=sys.argv[1:],
         # unit tests.
         stdout = None
 
+    executor.loadingContext.default_docker_image = arvargs.submit_runner_image or "arvados/jobs:"+__version__
+
     if arvargs.workflow.startswith("arvwf:") or workflow_uuid_pattern.match(arvargs.workflow) or arvargs.workflow.startswith("keep:"):
         executor.loadingContext.do_validate = False
         if arvargs.submit and not workflow_op:
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index a94fdac522..11ae66b139 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -560,13 +560,19 @@ class RunnerContainer(Runner):
                 }
                 self.job_order[param] = {"$include": mnt}
 
+        container_image = arvados_jobs_image(self.arvrunner, self.jobs_image, runtimeContext),
+
+        workflow_runner_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
+        if workflow_runner_req and workflow_runner_req.get("acrContainerImage"):
+            container_image = workflow_runner_req.get("acrContainerImage")
+
         container_req = {
             "name": self.name,
             "output_path": "/var/spool/cwl",
             "cwd": "/var/spool/cwl",
             "priority": self.priority,
             "state": "Committed",
-            "container_image": arvados_jobs_image(self.arvrunner, self.jobs_image, runtimeContext),
+            "container_image": container_image,
             "mounts": {
                 "/var/lib/cwl/cwl.input.json": {
                     "kind": "json",
diff --git a/sdk/cwl/arvados_cwl/arvtool.py b/sdk/cwl/arvados_cwl/arvtool.py
index b66e8ad3aa..86fecc0a1d 100644
--- a/sdk/cwl/arvados_cwl/arvtool.py
+++ b/sdk/cwl/arvados_cwl/arvtool.py
@@ -10,6 +10,7 @@ from ._version import __version__
 from functools import partial
 from schema_salad.sourceline import SourceLine
 from cwltool.errors import WorkflowException
+from arvados.util import portable_data_hash_pattern
 
 def validate_cluster_target(arvrunner, runtimeContext):
     if (runtimeContext.submit_runner_cluster and
@@ -61,8 +62,12 @@ class ArvadosCommandTool(CommandLineTool):
 
         (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
         if not docker_req:
-            self.hints.append({"class": "DockerRequirement",
-                               "dockerPull": "arvados/jobs:"+__version__})
+            if portable_data_hash_pattern.match(loadingContext.default_docker_image):
+                self.hints.append({"class": "DockerRequirement",
+                                   "http://arvados.org/cwl#dockerCollectionPDH": loadingContext.default_docker_image})
+            else:
+                self.hints.append({"class": "DockerRequirement",
+                                   "dockerPull": loadingContext.default_docker_image})
 
         self.arvrunner = arvrunner
 
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index c5d56fb656..cdce3d643a 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -29,7 +29,7 @@ from cwltool.load_tool import fetch_document, resolve_and_validate_document
 from cwltool.process import shortname, uniquename
 from cwltool.workflow import Workflow, WorkflowException, WorkflowStep
 from cwltool.utils import adjustFileObjs, adjustDirObjs, visit_class, normalizeFilesDirs
-from cwltool.context import LoadingContext
+from cwltool.context import LoadingContext, getdefault
 
 from schema_salad.ref_resolver import file_uri, uri_file_path
 
@@ -412,9 +412,10 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
         wf_runner_resources = {"class": "http://arvados.org/cwl#WorkflowRunnerResources"}
         hints.append(wf_runner_resources)
 
-    wf_runner_resources["acrContainerImage"] = arvados_jobs_image(arvRunner,
-                                                                  submit_runner_image or "arvados/jobs:"+__version__,
-                                                                  runtimeContext)
+    if "acrContainerImage" not in wf_runner_resources:
+        wf_runner_resources["acrContainerImage"] = arvados_jobs_image(arvRunner,
+                                                                      submit_runner_image or "arvados/jobs:"+__version__,
+                                                                      runtimeContext)
 
     if submit_runner_ram:
         wf_runner_resources["ramMin"] = submit_runner_ram
@@ -594,8 +595,18 @@ class ArvadosWorkflow(Workflow):
         self.dynamic_resource_req = []
         self.static_resource_req = []
         self.wf_reffiles = []
-        self.loadingContext = loadingContext
-        super(ArvadosWorkflow, self).__init__(toolpath_object, loadingContext)
+        self.loadingContext = loadingContext.copy()
+
+        self.requirements = copy.deepcopy(getdefault(loadingContext.requirements, []))
+        tool_requirements = toolpath_object.get("requirements", [])
+        self.hints = copy.deepcopy(getdefault(loadingContext.hints, []))
+        tool_hints = toolpath_object.get("hints", [])
+
+        workflow_runner_req, _ = self.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
+        if workflow_runner_req and workflow_runner_req.get("acrContainerImage"):
+            self.loadingContext.default_docker_image = workflow_runner_req.get("acrContainerImage")
+
+        super(ArvadosWorkflow, self).__init__(toolpath_object, self.loadingContext)
         self.cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
 
     def job(self, joborder, output_callback, runtimeContext):
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index c7b9f5284d..dd64879b9f 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -7,6 +7,7 @@ from collections import namedtuple
 
 class ArvLoadingContext(LoadingContext):
     def __init__(self, kwargs=None):
+        self.default_docker_image = None
         super(ArvLoadingContext, self).__init__(kwargs)
 
 class ArvRuntimeContext(RuntimeContext):
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 0d177137bf..330dba3dbe 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -649,6 +649,10 @@ The 'jobs' API is no longer supported.
             runtimeContext.copy_deps = True
             runtimeContext.match_local_docker = True
 
+        if runtimeContext.print_keep_deps:
+            runtimeContext.copy_deps = False
+            runtimeContext.match_local_docker = False
+
         if runtimeContext.update_workflow and self.project_uuid is None:
             # If we are updating a workflow, make sure anything that
             # gets uploaded goes into the same parent project, unless
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 7326840896..d6af681ffb 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -324,21 +324,16 @@ def copy_workflow(wf_uuid, src, dst, args):
 
     # copy collections and docker images
     if args.recursive and wf["definition"]:
-        wf_def = yaml.safe_load(wf["definition"])
-        if wf_def is not None:
-            locations = []
-            docker_images = {}
-            graph = wf_def.get('$graph', None)
-            if graph is not None:
-                workflow_collections(graph, locations, docker_images)
-            else:
-                workflow_collections(wf_def, locations, docker_images)
-
-            if locations:
-                copy_collections(locations, src, dst, args)
-
-            for image in docker_images:
-                copy_docker_image(image, docker_images[image], src, dst, args)
+        env = {"ARVADOS_API_HOST": urllib.parse.urlparse(src._rootDesc["rootUrl"]).netloc,
+               "ARVADOS_API_TOKEN": src.api_token,
+               "PATH": os.environ["PATH"]}
+        result = subprocess.run(["arvados-cwl-runner", "--quiet", "--print-keep-deps", "arvwf:"+wf_uuid],
+                                env=env)
+        print(result)
+        exit()
+
+        #if locations:
+        #        copy_collections(locations, src, dst, args)
 
     # copy the workflow itself
     del wf['uuid']

commit f155ad2074ac6afea41f7cfe04be3eb489e7e259
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Sep 7 14:37:05 2023 -0400

    20933: Adding --print-keep-deps to assist arv-copy
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 7968fb1e2b..1e5acad2b2 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -123,6 +123,8 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup.add_argument("--create-workflow", action="store_true", help="Register an Arvados workflow that can be run from Workbench")
     exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow with the given UUID.")
 
+    exgroup.add_argument("--print-keep-deps", action="store_true", help="To assist copying, print a list of Keep collections that this workflow depends on.")
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner, wait for completion.",
                         default=True, dest="wait")
@@ -324,7 +326,9 @@ def main(args=sys.argv[1:],
             return 1
         arvargs.work_api = want_api
 
-    if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
+    workflow_op = arvargs.create_workflow or arvargs.update_workflow or arvargs.print_keep_deps
+
+    if workflow_op and not arvargs.job_order:
         job_order_object = ({}, "")
 
     add_arv_hints()
@@ -418,7 +422,7 @@ def main(args=sys.argv[1:],
 
     if arvargs.workflow.startswith("arvwf:") or workflow_uuid_pattern.match(arvargs.workflow) or arvargs.workflow.startswith("keep:"):
         executor.loadingContext.do_validate = False
-        if arvargs.submit:
+        if arvargs.submit and not workflow_op:
             executor.fast_submit = True
 
     return cwltool.main.main(args=arvargs,
@@ -431,4 +435,4 @@ def main(args=sys.argv[1:],
                              custom_schema_callback=add_arv_hints,
                              loadingContext=executor.loadingContext,
                              runtimeContext=executor.toplevel_runtimeContext,
-                             input_required=not (arvargs.create_workflow or arvargs.update_workflow))
+                             input_required=not workflow_op)
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index cddcd15c54..c5d56fb656 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -292,7 +292,8 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
     # Find the longest common prefix among all the file names.  We'll
     # use this to recreate the directory structure in a keep
     # collection with correct relative references.
-    prefix = common_prefix(firstfile, all_files)
+    prefix = common_prefix(firstfile, all_files) if firstfile else ""
+
 
     col = arvados.collection.Collection(api_client=arvRunner.api)
 
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 125527f783..c7b9f5284d 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -43,6 +43,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.varying_url_params = ""
         self.prefer_cached_downloads = False
         self.cached_docker_lookups = {}
+        self.print_keep_deps = False
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index ce8aa42095..0d177137bf 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -34,7 +34,7 @@ from arvados.errors import ApiError
 
 import arvados_cwl.util
 from .arvcontainer import RunnerContainer, cleanup_name_for_collection
-from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps, make_builder, update_from_merged_map
+from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps, make_builder, update_from_merged_map, print_keep_deps
 from .arvtool import ArvadosCommandTool, validate_cluster_target, ArvadosExpressionTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow, make_workflow_record
 from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache, pdh_size
@@ -671,12 +671,10 @@ The 'jobs' API is no longer supported.
         # are going to wait for the result, and always_submit_runner
         # is false, then we don't submit a runner process.
 
-        submitting = (runtimeContext.update_workflow or
-                      runtimeContext.create_workflow or
-                      (runtimeContext.submit and not
+        submitting = (runtimeContext.submit and not
                        (updated_tool.tool["class"] == "CommandLineTool" and
                         runtimeContext.wait and
-                        not runtimeContext.always_submit_runner)))
+                        not runtimeContext.always_submit_runner))
 
         loadingContext = self.loadingContext.copy()
         loadingContext.do_validate = False
@@ -702,7 +700,7 @@ The 'jobs' API is no longer supported.
         loadingContext.skip_resolve_all = True
 
         workflow_wrapper = None
-        if submitting and not self.fast_submit:
+        if (submitting and not self.fast_submit) or runtimeContext.update_workflow or runtimeContext.create_workflow or runtimeContext.print_keep_deps:
             # upload workflow and get back the workflow wrapper
 
             workflow_wrapper = upload_workflow(self, tool, job_order,
@@ -725,6 +723,11 @@ The 'jobs' API is no longer supported.
                 self.stdout.write(uuid + "\n")
                 return (None, "success")
 
+            if runtimeContext.print_keep_deps:
+                # Just find and print out all the collection dependencies and exit
+                print_keep_deps(tool)
+                return (None, "success")
+
             # Did not register a workflow, we're going to submit
             # it instead.
             loadingContext.loader.idx.clear()
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 4432813f6a..4d211d3ed6 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -946,3 +946,27 @@ class Runner(Process):
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             self.arvrunner.output_callback(outputs, processStatus)
+
+
+def print_keep_deps_visitor(references, doc_loader, tool):
+    def collect_locators(obj):
+        loc = obj.get("location", "") or obj.get("http://arvados.org/cwl#dockerCollectionPDH", "")
+
+        g = arvados.util.keepuri_pattern.match(loc)
+        if g and g[1] not in references:
+            references.append(g[1])
+
+    sc_result = scandeps(tool["id"], tool,
+                         set(),
+                         set(("location", "id")),
+                         None, urljoin=doc_loader.fetcher.urljoin,
+                         nestdirs=False)
+
+    visit_class(sc_result, ("File", "Directory"), collect_locators)
+
+
+def print_keep_deps(tool):
+    references = []
+
+    tool.visit(partial(print_keep_deps_visitor, references, tool.doc_loader))
+    print(json.dumps(references))
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 1ee5f6355a..c3d3def723 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -35,6 +35,8 @@ link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
 job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
 container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
 manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE)
+keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+\d+)/(.*)')
+keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+\d+)/(.*)')
 
 def _deprecated(version=None, preferred=None):
     """Mark a callable as deprecated in the SDK

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list