[ARVADOS] updated: 2.1.0-2314-gfaf47b573
Git user
git at public.arvados.org
Fri Apr 15 17:57:48 UTC 2022
Summary of changes:
sdk/cwl/arvados_cwl/executor.py | 3 ++-
sdk/cwl/arvados_cwl/pathmapper.py | 18 ++++++++++++++----
sdk/cwl/arvados_cwl/runner.py | 36 +++++++++++++++++++++++-------------
3 files changed, 39 insertions(+), 18 deletions(-)
via faf47b57391a6e704b82623e997d9213ffc9442c (commit)
via 4d212400b51fc73c1b21db30f90c6714d244b777 (commit)
via adc0f36eeab40f4b8e0603247392b3c804d7272a (commit)
via 29e3fe60d22218d122a1a3448a144bfcfd64785a (commit)
from bf95fa077c79c6bd4ac548ec534b618e97e3f00b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit faf47b57391a6e704b82623e997d9213ffc9442c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Apr 15 13:44:57 2022 -0400
18994: Fix for colon characters in filenames
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 6e23d80a8..5f24d2407 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -18,6 +18,7 @@ import json
import re
from functools import partial
import time
+import urllib
from cwltool.errors import WorkflowException
import cwltool.workflow
@@ -450,7 +451,7 @@ The 'jobs' API is no longer supported.
srccollection = sp[0][5:]
try:
reader = self.collection_cache.get(srccollection)
- srcpath = "/".join(sp[1:]) if len(sp) > 1 else "."
+ srcpath = urllib.parse.unquote("/".join(sp[1:]) if len(sp) > 1 else ".")
final.copy(srcpath, v.target, source_collection=reader, overwrite=False)
except arvados.errors.ArgumentError as e:
logger.error("Creating CollectionReader for '%s' '%s': %s", k, v, e)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index e0216aae5..d7b535eeb 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -166,7 +166,7 @@ class ArvPathMapper(PathMapper):
else:
suffix = loc[len(prefix):]
- if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
+ if "basename" in srcobj and prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
return True
if srcobj["class"] == "File" and loc not in self._pathmap:
commit 4d212400b51fc73c1b21db30f90c6714d244b777
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Apr 15 12:00:16 2022 -0400
18994: Fix suffix
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index a5dd5ccf3..e0216aae5 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -163,7 +163,8 @@ class ArvPathMapper(PathMapper):
suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
else:
prefix = loc+"/"
- suffix = ""
+ else:
+ suffix = loc[len(prefix):]
if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
return True
commit adc0f36eeab40f4b8e0603247392b3c804d7272a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Apr 15 11:51:08 2022 -0400
18994: Remove debug prints
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 0cbf9eb56..a5dd5ccf3 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -74,7 +74,6 @@ class ArvPathMapper(PathMapper):
if isinstance(src, basestring) and src.startswith("keep:"):
if collection_pdh_pattern.match(src):
- #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
if arvados_cwl.util.collectionUUID in srcobj:
@@ -94,12 +93,9 @@ class ArvPathMapper(PathMapper):
raiseOSError=True)
with SourceLine(srcobj, "location", WorkflowException, debug):
if isinstance(st, arvados.commands.run.UploadFile):
- print("VV", (src, ab, st))
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
-
- #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
else:
raise WorkflowException("Input file path '%s' is invalid" % st)
elif src.startswith("_:"):
@@ -125,7 +121,6 @@ class ArvPathMapper(PathMapper):
self.visit(l, uploadfiles)
def addentry(self, obj, c, path, remap):
- print(obj["location"], self._pathmap)
if obj["location"] in self._pathmap:
src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
if srcpath == "":
@@ -170,9 +165,7 @@ class ArvPathMapper(PathMapper):
prefix = loc+"/"
suffix = ""
- print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@"))
if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
- print("LLL -> needs new collection")
return True
if srcobj["class"] == "File" and loc not in self._pathmap:
@@ -211,12 +204,9 @@ class ArvPathMapper(PathMapper):
packed=False)
for src, ab, st in uploadfiles:
- print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@"))
self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"),
"Directory" if os.path.isdir(ab) else "File", True)
- print("CCCCC", self._pathmap)
-
for srcobj in referenced_files:
remap = []
if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index eae7a7789..ddd1d2bac 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -403,8 +403,6 @@ def upload_dependencies(arvrunner, name, document_loader,
else:
del discovered[d]
- print("NN", sc)
-
mapper = ArvPathMapper(arvrunner, sc, "",
"keep:%s",
"keep:%s/%s",
@@ -412,8 +410,6 @@ def upload_dependencies(arvrunner, name, document_loader,
single_collection=True,
optional_deps=optional_deps)
- print("whargh", mapper._pathmap)
-
def setloc(p):
loc = p.get("location")
if loc and (not loc.startswith("_:")) and (not loc.startswith("keep:")):
commit 29e3fe60d22218d122a1a3448a144bfcfd64785a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Apr 15 11:48:59 2022 -0400
18994: Fixing quoting WIP
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index dbfa9902c..0cbf9eb56 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -52,7 +52,8 @@ class ArvPathMapper(PathMapper):
"""Convert container-local paths to and from Keep collection ids."""
def __init__(self, arvrunner, referenced_files, input_basedir,
- collection_pattern, file_pattern, name=None, single_collection=False):
+ collection_pattern, file_pattern, name=None, single_collection=False,
+ optional_deps=None):
self.arvrunner = arvrunner
self.input_basedir = input_basedir
self.collection_pattern = collection_pattern
@@ -61,6 +62,7 @@ class ArvPathMapper(PathMapper):
self.referenced_files = [r["location"] for r in referenced_files]
self.single_collection = single_collection
self.pdh_to_uuid = {}
+ self.optional_deps = optional_deps or []
super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
def visit(self, srcobj, uploadfiles):
@@ -72,7 +74,9 @@ class ArvPathMapper(PathMapper):
if isinstance(src, basestring) and src.startswith("keep:"):
if collection_pdh_pattern.match(src):
+ #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+
if arvados_cwl.util.collectionUUID in srcobj:
self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
elif not collection_uuid_pattern.match(src):
@@ -90,9 +94,12 @@ class ArvPathMapper(PathMapper):
raiseOSError=True)
with SourceLine(srcobj, "location", WorkflowException, debug):
if isinstance(st, arvados.commands.run.UploadFile):
+ print("VV", (src, ab, st))
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
+
+ #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
else:
raise WorkflowException("Input file path '%s' is invalid" % st)
elif src.startswith("_:"):
@@ -118,6 +125,7 @@ class ArvPathMapper(PathMapper):
self.visit(l, uploadfiles)
def addentry(self, obj, c, path, remap):
+ print(obj["location"], self._pathmap)
if obj["location"] in self._pathmap:
src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
if srcpath == "":
@@ -135,6 +143,9 @@ class ArvPathMapper(PathMapper):
f.write(obj["contents"])
remap.append((obj["location"], path + "/" + obj["basename"]))
else:
+ for opt in self.optional_deps:
+ if obj["location"] == opt["location"]:
+ return
raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
def needs_new_collection(self, srcobj, prefix=""):
@@ -149,14 +160,19 @@ class ArvPathMapper(PathMapper):
loc = srcobj["location"]
if loc.startswith("_:"):
return True
+
if not prefix:
i = loc.rfind("/")
if i > -1:
prefix = loc[:i+1]
+ suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
else:
prefix = loc+"/"
+ suffix = ""
- if loc != prefix+srcobj["basename"]:
+ print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@"))
+ if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
+ print("LLL -> needs new collection")
return True
if srcobj["class"] == "File" and loc not in self._pathmap:
@@ -195,9 +211,12 @@ class ArvPathMapper(PathMapper):
packed=False)
for src, ab, st in uploadfiles:
- self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
+ print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@"))
+ self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"),
"Directory" if os.path.isdir(ab) else "File", True)
+ print("CCCCC", self._pathmap)
+
for srcobj in referenced_files:
remap = []
if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
@@ -221,7 +240,7 @@ class ArvPathMapper(PathMapper):
elif srcobj["class"] == "File" and self.needs_new_collection(srcobj):
c = arvados.collection.Collection(api_client=self.arvrunner.api,
keep_client=self.arvrunner.keep_client,
- num_retries=self.arvrunner.num_retries )
+ num_retries=self.arvrunner.num_retries)
self.addentry(srcobj, c, ".", remap)
container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 38e2c4d80..eae7a7789 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -285,10 +285,18 @@ def upload_dependencies(arvrunner, name, document_loader,
sc_result = scandeps(uri, scanobj,
loadref_fields,
- set(("$include", "$schemas", "location")),
+ set(("$include", "location")),
loadref, urljoin=document_loader.fetcher.urljoin,
nestdirs=False)
+ optional_deps = scandeps(uri, scanobj,
+ loadref_fields,
+ set(("$schemas",)),
+ loadref, urljoin=document_loader.fetcher.urljoin,
+ nestdirs=False)
+
+ sc_result.extend(optional_deps)
+
sc = []
uuids = {}
@@ -345,24 +353,25 @@ def upload_dependencies(arvrunner, name, document_loader,
if include_primary and "id" in workflowobj:
sc.append({"class": "File", "location": workflowobj["id"]})
- if "$schemas" in workflowobj:
- for s in workflowobj["$schemas"]:
- sc.append({"class": "File", "location": s})
+ #if "$schemas" in workflowobj:
+ # for s in workflowobj["$schemas"]:
+ # sc.append({"class": "File", "location": s})
def visit_default(obj):
- remove = [False]
+ #remove = [False]
def ensure_default_location(f):
if "location" not in f and "path" in f:
f["location"] = f["path"]
del f["path"]
- if "location" in f and not arvrunner.fs_access.exists(f["location"]):
- # Doesn't exist, remove from list of dependencies to upload
- sc[:] = [x for x in sc if x["location"] != f["location"]]
- # Delete "default" from workflowobj
- remove[0] = True
+ optional_deps.append(f)
+ #if "location" in f and not arvrunner.fs_access.exists(f["location"]):
+ # # Doesn't exist, remove from list of dependencies to upload
+ # sc[:] = [x for x in sc if x["location"] != f["location"]]
+ # # Delete "default" from workflowobj
+ # remove[0] = True
visit_class(obj["default"], ("File", "Directory"), ensure_default_location)
- if remove[0]:
- del obj["default"]
+ #if remove[0]:
+ # del obj["default"]
find_defaults(workflowobj, visit_default)
@@ -394,11 +403,16 @@ def upload_dependencies(arvrunner, name, document_loader,
else:
del discovered[d]
+ print("NN", sc)
+
mapper = ArvPathMapper(arvrunner, sc, "",
"keep:%s",
"keep:%s/%s",
name=name,
- single_collection=True)
+ single_collection=True,
+ optional_deps=optional_deps)
+
+ print("whargh", mapper._pathmap)
def setloc(p):
loc = p.get("location")
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list