[ARVADOS] created: 1.3.0-63-g06bcc58df
Git user
git at public.curoverse.com
Tue Dec 18 09:53:57 EST 2018
at 06bcc58df3e531f4af2bbc3ccf13aacdac068623 (commit)
commit 06bcc58df3e531f4af2bbc3ccf13aacdac068623
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Mon Dec 17 16:23:07 2018 -0500
14327: Don't create new collection if source has expected secondaryFiles
If all secondary files/directories are located in the same collection
as the primary file and the paths and names that are consistent with
staging, don't create a new collection.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 26c85d300..c844311d5 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -119,6 +119,32 @@ class ArvPathMapper(PathMapper):
else:
raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
+ def needs_new_collection(self, srcobj, prefix=""):
+ loc = srcobj["location"]
+ if loc.startswith("_:"):
+ return True
+ if prefix:
+ if loc != prefix+srcobj["basename"]:
+ return True
+ else:
+ i = loc.rfind("/")
+ if i > -1:
+ prefix = loc[:i+1]
+ else:
+ prefix = loc+"/"
+ if srcobj["class"] == "File" and loc not in self._pathmap:
+ return True
+ if srcobj.get("secondaryFiles"):
+ for s in srcobj["secondaryFiles"]:
+ if self.needs_new_collection(s, prefix):
+ return True
+ if srcobj.get("listing"):
+ prefix = "%s%s/" % (prefix, srcobj["basename"])
+ for l in srcobj["listing"]:
+ if self.needs_new_collection(l, prefix):
+ return True
+ return False
+
def setup(self, referenced_files, basedir):
# type: (List[Any], unicode) -> None
uploadfiles = set()
@@ -169,6 +195,9 @@ class ArvPathMapper(PathMapper):
elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
(srcobj["location"].startswith("_:") and "contents" in srcobj)):
+ if not self.needs_new_collection(srcobj):
+ continue
+
c = arvados.collection.Collection(api_client=self.arvrunner.api,
keep_client=self.arvrunner.keep_client,
num_retries=self.arvrunner.num_retries )
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index fb3c257d9..fe8c2536c 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -102,3 +102,120 @@ class TestPathmap(unittest.TestCase):
"class": "File",
"location": "file:tests/hw.py"
}], "", "/test/%s", "/test/%s/%s")
+
+ def test_needs_new_collection(self):
+ arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py"
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ self.assertFalse(p.needs_new_collection(a))
+
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ self.assertTrue(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+ "basename": "hw.pyc"
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+ self.assertFalse(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999992+99/hw.pyc",
+ "basename": "hw.pyc"
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
+ self.assertTrue(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+ "basename": "hw.other"
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+ self.assertTrue(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "Directory",
+ "location": "keep:99999999999999999999999999999991+99/hw",
+ "basename": "hw",
+ "listing": [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw/h2",
+ "basename": "h2"
+ }]
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+ self.assertFalse(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "Directory",
+ "location": "keep:99999999999999999999999999999991+99/hw",
+ "basename": "wh",
+ "listing": [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw/h2",
+ "basename": "h2"
+ }]
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+ p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+ self.assertTrue(p.needs_new_collection(a))
+
+ a = {
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py",
+ "basename": "hw.py",
+ "secondaryFiles": [{
+ "class": "File",
+ "location": "_:123",
+ "basename": "hw.pyc",
+ "contents": "123"
+ }]
+ }
+ p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+ p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+ p._pathmap["_:123"] = True
+ self.assertTrue(p.needs_new_collection(a))
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list