[ARVADOS] updated: 1.3.0-70-geabe449b9
Git user
git at public.curoverse.com
Tue Dec 18 14:15:10 EST 2018
Summary of changes:
sdk/cwl/arvados_cwl/pathmapper.py | 40 ++++++++++++
sdk/cwl/tests/test_pathmapper.py | 129 ++++++++++++++++++++++++++++++++++++++
2 files changed, 169 insertions(+)
via eabe449b9a17e14eb2ba67f6e5cb8ce1dafc8005 (commit)
via d353ff6f67923d472c1623493c7d11b35aebea90 (commit)
via 06bcc58df3e531f4af2bbc3ccf13aacdac068623 (commit)
from d5508647a2dfd23684c08383fc18c77de3d1c825 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit eabe449b9a17e14eb2ba67f6e5cb8ce1dafc8005
Merge: d5508647a d353ff6f6
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Tue Dec 18 14:14:58 2018 -0500
Merge branch '14327-cwl-unnecessary-collections' refs #14327
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
commit d353ff6f67923d472c1623493c7d11b35aebea90
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Tue Dec 18 14:08:58 2018 -0500
14327: Add comments
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index c844311d5..0b2a22788 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -120,6 +120,14 @@ class ArvPathMapper(PathMapper):
raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
def needs_new_collection(self, srcobj, prefix=""):
+ """Check if files need to be staged into a new collection.
+
+ If all the files are in the same collection and in the same
+ paths they would be staged to, return False. Otherwise, a new
+ collection is needed with files copied/created in the
+ appropriate places.
+ """
+
loc = srcobj["location"]
if loc.startswith("_:"):
return True
@@ -134,10 +142,9 @@ class ArvPathMapper(PathMapper):
prefix = loc+"/"
if srcobj["class"] == "File" and loc not in self._pathmap:
return True
- if srcobj.get("secondaryFiles"):
- for s in srcobj["secondaryFiles"]:
- if self.needs_new_collection(s, prefix):
- return True
+ for s in srcobj.get("secondaryFiles", []):
+ if self.needs_new_collection(s, prefix):
+ return True
if srcobj.get("listing"):
prefix = "%s%s/" % (prefix, srcobj["basename"])
for l in srcobj["listing"]:
@@ -195,6 +202,10 @@ class ArvPathMapper(PathMapper):
elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
(srcobj["location"].startswith("_:") and "contents" in srcobj)):
+ # If all secondary files/directories are located in
+ # the same collection as the primary file and the
+ # paths and names that are consistent with staging,
+ # don't create a new collection.
if not self.needs_new_collection(srcobj):
continue
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index fe8c2536c..b78e89012 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -105,6 +105,8 @@ class TestPathmap(unittest.TestCase):
def test_needs_new_collection(self):
arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+
+ # Plain file. Don't need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -114,9 +116,12 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
self.assertFalse(p.needs_new_collection(a))
+ # A file that isn't in the pathmap (for some reason). Need a new collection.
p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
self.assertTrue(p.needs_new_collection(a))
+ # A file with a secondary file in the same collection. Don't need
+ # a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -132,6 +137,8 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
self.assertFalse(p.needs_new_collection(a))
+ # Secondary file is in a different collection from the
+ # a new collectionprimary. Need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -147,6 +154,8 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
self.assertTrue(p.needs_new_collection(a))
+ # Secondary file should be staged to a different name than
+ # path in location. Need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -162,6 +171,7 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
self.assertTrue(p.needs_new_collection(a))
+ # Secondary file is a directory. Do not need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -183,6 +193,7 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
self.assertFalse(p.needs_new_collection(a))
+ # Secondary file is a renamed directory. Need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -204,6 +215,7 @@ class TestPathmap(unittest.TestCase):
p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
self.assertTrue(p.needs_new_collection(a))
+ # Secondary file is a file literal. Need a new collection.
a = {
"class": "File",
"location": "keep:99999999999999999999999999999991+99/hw.py",
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list