[ARVADOS] created: 1.3.0-486-gcea06ef31

Git user git at public.curoverse.com
Mon Mar 11 17:43:17 EDT 2019


        at  cea06ef31f890c5026d2ee6db4a70e8f4f723a55 (commit)


commit cea06ef31f890c5026d2ee6db4a70e8f4f723a55
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Mar 11 17:42:09 2019 -0400

    14322: Propagate collection UUID to container mount
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 03b4e07c7..301e5317d 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -123,6 +123,8 @@ class ArvadosContainer(JobBase):
                 "kind": "collection",
                 "portable_data_hash": pdh
             }
+            if pdh in self.pathmapper.pdh_to_uuid:
+                mounts[targetdir]["uuid"] = self.pathmapper.pdh_to_uuid[pdh]
             if len(sp) == 2:
                 if tp == "Directory":
                     path = sp[1]
@@ -353,8 +355,8 @@ class ArvadosContainer(JobBase):
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
-            # Only include a stack trace if in debug mode. 
-            # A stack trace may obfuscate more useful output about the workflow. 
+            # Only include a stack trace if in debug mode.
+            # A stack trace may obfuscate more useful output about the workflow.
             logger.error("%s unable to collect output from %s:\n%s",
                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
             processStatus = "permanentFail"
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index e0445febd..38135899d 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -58,6 +58,7 @@ class ArvPathMapper(PathMapper):
         self.name = name
         self.referenced_files = [r["location"] for r in referenced_files]
         self.single_collection = single_collection
+        self.pdh_to_uuid = {}
         super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
 
     def visit(self, srcobj, uploadfiles):
@@ -67,6 +68,8 @@ class ArvPathMapper(PathMapper):
 
         if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
             self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+            if arvados_cwl.util.collectionUUID in srcobj:
+                self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
 
         debug = logger.isEnabledFor(logging.DEBUG)
 
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 794078bfe..39620a55f 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -33,7 +33,7 @@ from cwltool.builder import substitute
 from cwltool.pack import pack
 
 import arvados.collection
-import arvados.util
+from .util import collectionUUID
 import ruamel.yaml as yaml
 
 import arvados_cwl.arvdocker
@@ -236,8 +236,7 @@ def upload_dependencies(arvrunner, name, document_loader,
         if uuid not in uuid_map:
             raise Exception("Cannot resolve uuid %s" % uuid)
         p["location"] = "keep:%s%s" % (uuid_map[uuid], gp.groups()[1] if gp.groups()[1] else "")
-        p["http://arvados.org/cwl#collectionUUID"] = uuid
-
+        p[collectionUUID] = uuid
 
     visit_class(workflowobj, ("File", "Directory"), setloc)
     visit_class(discovered, ("File", "Directory"), setloc)
diff --git a/sdk/cwl/arvados_cwl/util.py b/sdk/cwl/arvados_cwl/util.py
index 776fc6bc2..85ae65ecf 100644
--- a/sdk/cwl/arvados_cwl/util.py
+++ b/sdk/cwl/arvados_cwl/util.py
@@ -5,6 +5,8 @@
 import datetime
 from arvados.errors import ApiError
 
+collectionUUID =  "http://arvados.org/cwl#collectionUUID"
+
 def get_intermediate_collection_info(workflow_step_name, current_container, intermediate_output_ttl):
         if workflow_step_name:
             name = "Intermediate collection for step %s" % (workflow_step_name)
@@ -30,5 +32,5 @@ def get_current_container(api, num_retries=0, logger=None):
             if logger:
                 logger.info("Getting current container: %s", e)
             raise e
-            
+
     return current_container
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index 1a57da392..07d962bf9 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -80,7 +80,7 @@ class TestContainer(unittest.TestCase):
 
         return loadingContext, runtimeContext
 
-    # Helper function to set up the ArvCwlExecutor to use the containers api 
+    # Helper function to set up the ArvCwlExecutor to use the containers api
     # and test that the RuntimeStatusLoggingHandler is set up correctly
     def setup_and_test_container_executor_and_logging(self, gcc_mock) :
         api = mock.MagicMock()
@@ -96,7 +96,7 @@ class TestContainer(unittest.TestCase):
         handlerClasses = [h.__class__ for h in root_logger.handlers]
         self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses)
         return runner
-        
+
     # The test passes no builder.resources
     # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
@@ -527,9 +527,9 @@ class TestContainer(unittest.TestCase):
         # get_current_container is invoked when we call runtime_status_update
         # so try and log again!
         gcc_mock.side_effect = lambda *args: root_logger.error("Second Error")
-        try: 
+        try:
             root_logger.error("First Error")
-        except RuntimeError: 
+        except RuntimeError:
             self.fail("RuntimeStatusLoggingHandler should not be called recursively")
 
     @mock.patch("arvados_cwl.ArvCwlExecutor.runtime_status_update")
@@ -538,7 +538,7 @@ class TestContainer(unittest.TestCase):
     @mock.patch("arvados.collection.Collection")
     def test_child_failure(self, col, reader, gcc_mock, rts_mock):
         runner = self.setup_and_test_container_executor_and_logging(gcc_mock)
-        
+
         gcc_mock.return_value = {"uuid" : "zzzzz-dz642-zzzzzzzzzzzzzzz"}
         self.assertTrue(gcc_mock.called)
 
@@ -630,6 +630,7 @@ class TestContainer(unittest.TestCase):
             "p1": {
                 "class": "Directory",
                 "location": "keep:99999999999999999999999999999994+44",
+                "http://arvados.org/cwl#collectionUUID": "zzzzz-4zz18-zzzzzzzzzzzzzzz",
                 "listing": [
                     {
                         "class": "File",
@@ -660,7 +661,8 @@ class TestContainer(unittest.TestCase):
                     'mounts': {
                         "/keep/99999999999999999999999999999994+44": {
                             "kind": "collection",
-                            "portable_data_hash": "99999999999999999999999999999994+44"
+                            "portable_data_hash": "99999999999999999999999999999994+44",
+                            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz"
                         },
                         '/tmp': {'kind': 'tmp',
                                  "capacity": 1073741824 },

commit 34aa2c02eacaeff7b039c42edc16b73a5d439c70
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Mar 11 17:02:14 2019 -0400

    14322: Convert uuids on input to portable data hashes
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index e515ac2ce..794078bfe 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -8,6 +8,7 @@ from future.utils import  viewvalues, viewitems
 
 import os
 import sys
+import re
 import urllib.parse
 from functools import partial
 import logging
@@ -32,6 +33,7 @@ from cwltool.builder import substitute
 from cwltool.pack import pack
 
 import arvados.collection
+import arvados.util
 import ruamel.yaml as yaml
 
 import arvados_cwl.arvdocker
@@ -87,6 +89,7 @@ def discover_secondary_files(inputs, job_order, discovered=None):
         if shortname(t["id"]) in job_order and t.get("secondaryFiles"):
             setSecondary(t, job_order[shortname(t["id"])], discovered)
 
+collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
 
 def upload_dependencies(arvrunner, name, document_loader,
                         workflowobj, uri, loadref_run,
@@ -136,14 +139,40 @@ def upload_dependencies(arvrunner, name, document_loader,
                   loadref, urljoin=document_loader.fetcher.urljoin)
 
     sc = []
-    def only_real(obj):
-        # Only interested in local files than need to be uploaded,
-        # don't include file literals, keep references, etc.
-        sp = obj.get("location", "").split(":")
-        if len(sp) > 1 and sp[0] in ("file", "http", "https"):
+    uuids = []
+    def dependencies_needing_transformation(obj):
+        loc = obj.get("location", "")
+        sp = loc.split(":")
+        if len(sp) < 1:
+            return
+        if sp[0] in ("file", "http", "https"):
+            # Record local files than need to be uploaded,
+            # don't include file literals, keep references, etc.
             sc.append(obj)
+        elif sp[0] == "keep":
+            # Collect collection uuids that need to be resolved to
+            # portable data hashes
+            gp = collection_uuid_pattern.match(loc)
+            if gp:
+                uuids.append(gp.groups()[0])
 
-    visit_class(sc_result, ("File", "Directory"), only_real)
+    visit_class(sc_result, ("File", "Directory"), dependencies_needing_transformation)
+
+    uuid_map = {}
+    while uuids:
+        lookups = arvrunner.api.collections().list(
+            filters=[["uuid", "in", uuids]],
+            count="none",
+            select=["uuid", "portable_data_hash"]).execute(
+                num_retries=arvrunner.num_retries)
+
+        if not lookups["items"]:
+            break
+
+        for l in lookups["items"]:
+            uuid_map[l["uuid"]] = l["portable_data_hash"]
+
+        uuids = [u for u in uuids if u not in uuid_map]
 
     normalizeFilesDirs(sc)
 
@@ -194,8 +223,21 @@ def upload_dependencies(arvrunner, name, document_loader,
                            single_collection=True)
 
     def setloc(p):
-        if "location" in p and (not p["location"].startswith("_:")) and (not p["location"].startswith("keep:")):
+        loc = p.get("location")
+        if loc and (not loc.startswith("_:")) and (not loc.startswith("keep:")):
             p["location"] = mapper.mapper(p["location"]).resolved
+            return
+        if not uuid_map:
+            return
+        gp = collection_uuid_pattern.match(loc)
+        if not gp:
+            return
+        uuid = gp.groups()[0]
+        if uuid not in uuid_map:
+            raise Exception("Cannot resolve uuid %s" % uuid)
+        p["location"] = "keep:%s%s" % (uuid_map[uuid], gp.groups()[1] if gp.groups()[1] else "")
+        p["http://arvados.org/cwl#collectionUUID"] = uuid
+
 
     visit_class(workflowobj, ("File", "Directory"), setloc)
     visit_class(discovered, ("File", "Directory"), setloc)
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 39117d86e..4218ec137 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1425,6 +1425,40 @@ class TestSubmit(unittest.TestCase):
             stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 1)
 
+    @stubs
+    def test_submit_uuid_inputs(self, stubs):
+        def list_side_effect(**kwargs):
+            m = mock.MagicMock()
+            if "count" in kwargs:
+                m.execute.return_value = {"items": [
+                    {"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", "portable_data_hash": "99999999999999999999999999999998+99"}
+                ]}
+            else:
+                m.execute.return_value = {"items": []}
+            return m
+        stubs.api.collections().list.side_effect = list_side_effect
+
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job_with_uuids.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container['mounts']['/var/lib/cwl/cwl.input.json']['content']['y']['basename'] = 'zzzzz-4zz18-zzzzzzzzzzzzzzz'
+        expect_container['mounts']['/var/lib/cwl/cwl.input.json']['content']['y']['http://arvados.org/cwl#collectionUUID'] = 'zzzzz-4zz18-zzzzzzzzzzzzzzz'
+        expect_container['mounts']['/var/lib/cwl/cwl.input.json']['content']['z']['listing'][0]['http://arvados.org/cwl#collectionUUID'] = 'zzzzz-4zz18-zzzzzzzzzzzzzzz'
+        del expect_container['mounts']['/var/lib/cwl/cwl.input.json']['content']['z']['listing'][0]['size']
+
+        stubs.api.collections().list.assert_has_calls([
+            mock.call(count='none',
+                      filters=[['uuid', 'in', ['zzzzz-4zz18-zzzzzzzzzzzzzzz', 'zzzzz-4zz18-zzzzzzzzzzzzzzz', 'zzzzz-4zz18-zzzzzzzzzzzzzzz']]],
+                      select=['uuid', 'portable_data_hash'])])
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
+
 
 class TestCreateTemplate(unittest.TestCase):
     existing_template_uuid = "zzzzz-d1hrv-validworkfloyml"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list