[ARVADOS] updated: 1.3.0-70-geabe449b9

Git user git at public.curoverse.com
Tue Dec 18 14:15:10 EST 2018


Summary of changes:
 sdk/cwl/arvados_cwl/pathmapper.py |  40 ++++++++++++
 sdk/cwl/tests/test_pathmapper.py  | 129 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+)

       via  eabe449b9a17e14eb2ba67f6e5cb8ce1dafc8005 (commit)
       via  d353ff6f67923d472c1623493c7d11b35aebea90 (commit)
       via  06bcc58df3e531f4af2bbc3ccf13aacdac068623 (commit)
      from  d5508647a2dfd23684c08383fc18c77de3d1c825 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit eabe449b9a17e14eb2ba67f6e5cb8ce1dafc8005
Merge: d5508647a d353ff6f6
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Dec 18 14:14:58 2018 -0500

    Merge branch '14327-cwl-unnecessary-collections' refs #14327
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>


commit d353ff6f67923d472c1623493c7d11b35aebea90
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Dec 18 14:08:58 2018 -0500

    14327: Add comments
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index c844311d5..0b2a22788 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -120,6 +120,14 @@ class ArvPathMapper(PathMapper):
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
     def needs_new_collection(self, srcobj, prefix=""):
+        """Check if files need to be staged into a new collection.
+
+        If all the files are in the same collection and in the same
+        paths they would be staged to, return False.  Otherwise, a new
+        collection is needed with files copied/created in the
+        appropriate places.
+        """
+
         loc = srcobj["location"]
         if loc.startswith("_:"):
             return True
@@ -134,10 +142,9 @@ class ArvPathMapper(PathMapper):
                 prefix = loc+"/"
         if srcobj["class"] == "File" and loc not in self._pathmap:
             return True
-        if srcobj.get("secondaryFiles"):
-            for s in srcobj["secondaryFiles"]:
-                if self.needs_new_collection(s, prefix):
-                    return True
+        for s in srcobj.get("secondaryFiles", []):
+            if self.needs_new_collection(s, prefix):
+                return True
         if srcobj.get("listing"):
             prefix = "%s%s/" % (prefix, srcobj["basename"])
             for l in srcobj["listing"]:
@@ -195,6 +202,10 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
+                # If all secondary files/directories are located in
+                # the same collection as the primary file and the
+                # paths and names that are consistent with staging,
+                # don't create a new collection.
                 if not self.needs_new_collection(srcobj):
                     continue
 
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index fe8c2536c..b78e89012 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -105,6 +105,8 @@ class TestPathmap(unittest.TestCase):
 
     def test_needs_new_collection(self):
         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+
+        # Plain file.  Don't need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -114,9 +116,12 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # A file that isn't in the pathmap (for some reason).  Need a new collection.
         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
         self.assertTrue(p.needs_new_collection(a))
 
+        # A file with a secondary file in the same collection.  Don't need
+        # a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -132,6 +137,8 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # Secondary file is in a different collection from the
+        # a new collectionprimary.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -147,6 +154,8 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file should be staged to a different name than
+        # path in location.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -162,6 +171,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file is a directory.  Do not need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -183,6 +193,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # Secondary file is a renamed directory.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -204,6 +215,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file is a file literal.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list