[ARVADOS] created: 1.3.0-63-g06bcc58df

Git user git at public.curoverse.com
Tue Dec 18 09:53:57 EST 2018


        at  06bcc58df3e531f4af2bbc3ccf13aacdac068623 (commit)


commit 06bcc58df3e531f4af2bbc3ccf13aacdac068623
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Dec 17 16:23:07 2018 -0500

    14327: Don't create new collection if source has expected secondaryFiles
    
    If all secondary files/directories are located in the same collection
    as the primary file and the paths and names that are consistent with
    staging, don't create a new collection.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 26c85d300..c844311d5 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -119,6 +119,32 @@ class ArvPathMapper(PathMapper):
         else:
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
+    def needs_new_collection(self, srcobj, prefix=""):
+        loc = srcobj["location"]
+        if loc.startswith("_:"):
+            return True
+        if prefix:
+            if loc != prefix+srcobj["basename"]:
+                return True
+        else:
+            i = loc.rfind("/")
+            if i > -1:
+                prefix = loc[:i+1]
+            else:
+                prefix = loc+"/"
+        if srcobj["class"] == "File" and loc not in self._pathmap:
+            return True
+        if srcobj.get("secondaryFiles"):
+            for s in srcobj["secondaryFiles"]:
+                if self.needs_new_collection(s, prefix):
+                    return True
+        if srcobj.get("listing"):
+            prefix = "%s%s/" % (prefix, srcobj["basename"])
+            for l in srcobj["listing"]:
+                if self.needs_new_collection(l, prefix):
+                    return True
+        return False
+
     def setup(self, referenced_files, basedir):
         # type: (List[Any], unicode) -> None
         uploadfiles = set()
@@ -169,6 +195,9 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
+                if not self.needs_new_collection(srcobj):
+                    continue
+
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index fb3c257d9..fe8c2536c 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -102,3 +102,120 @@ class TestPathmap(unittest.TestCase):
                 "class": "File",
                 "location": "file:tests/hw.py"
             }], "", "/test/%s", "/test/%s/%s")
+
+    def test_needs_new_collection(self):
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py"
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        self.assertTrue(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999992+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.other"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "hw",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "wh",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "_:123",
+                "basename": "hw.pyc",
+                "contents": "123"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["_:123"] = True
+        self.assertTrue(p.needs_new_collection(a))

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list