[ARVADOS] updated: 2.1.0-2314-gfaf47b573

Git user git at public.arvados.org
Fri Apr 15 17:57:48 UTC 2022


Summary of changes:
 sdk/cwl/arvados_cwl/executor.py   |  3 ++-
 sdk/cwl/arvados_cwl/pathmapper.py | 18 ++++++++++++++----
 sdk/cwl/arvados_cwl/runner.py     | 36 +++++++++++++++++++++++-------------
 3 files changed, 39 insertions(+), 18 deletions(-)

       via  faf47b57391a6e704b82623e997d9213ffc9442c (commit)
       via  4d212400b51fc73c1b21db30f90c6714d244b777 (commit)
       via  adc0f36eeab40f4b8e0603247392b3c804d7272a (commit)
       via  29e3fe60d22218d122a1a3448a144bfcfd64785a (commit)
      from  bf95fa077c79c6bd4ac548ec534b618e97e3f00b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit faf47b57391a6e704b82623e997d9213ffc9442c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 15 13:44:57 2022 -0400

    18994: Fix for colon characters in filenames
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 6e23d80a8..5f24d2407 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -18,6 +18,7 @@ import json
 import re
 from functools import partial
 import time
+import urllib
 
 from cwltool.errors import WorkflowException
 import cwltool.workflow
@@ -450,7 +451,7 @@ The 'jobs' API is no longer supported.
             srccollection = sp[0][5:]
             try:
                 reader = self.collection_cache.get(srccollection)
-                srcpath = "/".join(sp[1:]) if len(sp) > 1 else "."
+                srcpath = urllib.parse.unquote("/".join(sp[1:]) if len(sp) > 1 else ".")
                 final.copy(srcpath, v.target, source_collection=reader, overwrite=False)
             except arvados.errors.ArgumentError as e:
                 logger.error("Creating CollectionReader for '%s' '%s': %s", k, v, e)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index e0216aae5..d7b535eeb 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -166,7 +166,7 @@ class ArvPathMapper(PathMapper):
         else:
             suffix = loc[len(prefix):]
 
-        if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
+        if "basename" in srcobj and prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
             return True
 
         if srcobj["class"] == "File" and loc not in self._pathmap:

commit 4d212400b51fc73c1b21db30f90c6714d244b777
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 15 12:00:16 2022 -0400

    18994: Fix suffix
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index a5dd5ccf3..e0216aae5 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -163,7 +163,8 @@ class ArvPathMapper(PathMapper):
                 suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
             else:
                 prefix = loc+"/"
-                suffix = ""
+        else:
+            suffix = loc[len(prefix):]
 
         if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
             return True

commit adc0f36eeab40f4b8e0603247392b3c804d7272a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 15 11:51:08 2022 -0400

    18994: Remove debug prints
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 0cbf9eb56..a5dd5ccf3 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -74,7 +74,6 @@ class ArvPathMapper(PathMapper):
 
         if isinstance(src, basestring) and src.startswith("keep:"):
             if collection_pdh_pattern.match(src):
-                #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
                 self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
 
                 if arvados_cwl.util.collectionUUID in srcobj:
@@ -94,12 +93,9 @@ class ArvPathMapper(PathMapper):
                                                    raiseOSError=True)
                 with SourceLine(srcobj, "location", WorkflowException, debug):
                     if isinstance(st, arvados.commands.run.UploadFile):
-                        print("VV", (src, ab, st))
                         uploadfiles.add((src, ab, st))
                     elif isinstance(st, arvados.commands.run.ArvFile):
                         self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
-
-                        #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
                     else:
                         raise WorkflowException("Input file path '%s' is invalid" % st)
             elif src.startswith("_:"):
@@ -125,7 +121,6 @@ class ArvPathMapper(PathMapper):
                 self.visit(l, uploadfiles)
 
     def addentry(self, obj, c, path, remap):
-        print(obj["location"], self._pathmap)
         if obj["location"] in self._pathmap:
             src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
             if srcpath == "":
@@ -170,9 +165,7 @@ class ArvPathMapper(PathMapper):
                 prefix = loc+"/"
                 suffix = ""
 
-        print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@"))
         if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
-            print("LLL -> needs new collection")
             return True
 
         if srcobj["class"] == "File" and loc not in self._pathmap:
@@ -211,12 +204,9 @@ class ArvPathMapper(PathMapper):
                                          packed=False)
 
         for src, ab, st in uploadfiles:
-            print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@"))
             self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"),
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
-        print("CCCCC", self._pathmap)
-
         for srcobj in referenced_files:
             remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index eae7a7789..ddd1d2bac 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -403,8 +403,6 @@ def upload_dependencies(arvrunner, name, document_loader,
         else:
             del discovered[d]
 
-    print("NN", sc)
-
     mapper = ArvPathMapper(arvrunner, sc, "",
                            "keep:%s",
                            "keep:%s/%s",
@@ -412,8 +410,6 @@ def upload_dependencies(arvrunner, name, document_loader,
                            single_collection=True,
                            optional_deps=optional_deps)
 
-    print("whargh", mapper._pathmap)
-
     def setloc(p):
         loc = p.get("location")
         if loc and (not loc.startswith("_:")) and (not loc.startswith("keep:")):

commit 29e3fe60d22218d122a1a3448a144bfcfd64785a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 15 11:48:59 2022 -0400

    18994: Fixing quoting WIP
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index dbfa9902c..0cbf9eb56 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -52,7 +52,8 @@ class ArvPathMapper(PathMapper):
     """Convert container-local paths to and from Keep collection ids."""
 
     def __init__(self, arvrunner, referenced_files, input_basedir,
-                 collection_pattern, file_pattern, name=None, single_collection=False):
+                 collection_pattern, file_pattern, name=None, single_collection=False,
+                 optional_deps=None):
         self.arvrunner = arvrunner
         self.input_basedir = input_basedir
         self.collection_pattern = collection_pattern
@@ -61,6 +62,7 @@ class ArvPathMapper(PathMapper):
         self.referenced_files = [r["location"] for r in referenced_files]
         self.single_collection = single_collection
         self.pdh_to_uuid = {}
+        self.optional_deps = optional_deps or []
         super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
 
     def visit(self, srcobj, uploadfiles):
@@ -72,7 +74,9 @@ class ArvPathMapper(PathMapper):
 
         if isinstance(src, basestring) and src.startswith("keep:"):
             if collection_pdh_pattern.match(src):
+                #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
                 self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+
                 if arvados_cwl.util.collectionUUID in srcobj:
                     self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
             elif not collection_uuid_pattern.match(src):
@@ -90,9 +94,12 @@ class ArvPathMapper(PathMapper):
                                                    raiseOSError=True)
                 with SourceLine(srcobj, "location", WorkflowException, debug):
                     if isinstance(st, arvados.commands.run.UploadFile):
+                        print("VV", (src, ab, st))
                         uploadfiles.add((src, ab, st))
                     elif isinstance(st, arvados.commands.run.ArvFile):
                         self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
+
+                        #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
                     else:
                         raise WorkflowException("Input file path '%s' is invalid" % st)
             elif src.startswith("_:"):
@@ -118,6 +125,7 @@ class ArvPathMapper(PathMapper):
                 self.visit(l, uploadfiles)
 
     def addentry(self, obj, c, path, remap):
+        print(obj["location"], self._pathmap)
         if obj["location"] in self._pathmap:
             src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
             if srcpath == "":
@@ -135,6 +143,9 @@ class ArvPathMapper(PathMapper):
                 f.write(obj["contents"])
             remap.append((obj["location"], path + "/" + obj["basename"]))
         else:
+            for opt in self.optional_deps:
+                if obj["location"] == opt["location"]:
+                    return
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
     def needs_new_collection(self, srcobj, prefix=""):
@@ -149,14 +160,19 @@ class ArvPathMapper(PathMapper):
         loc = srcobj["location"]
         if loc.startswith("_:"):
             return True
+
         if not prefix:
             i = loc.rfind("/")
             if i > -1:
                 prefix = loc[:i+1]
+                suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
             else:
                 prefix = loc+"/"
+                suffix = ""
 
-        if loc != prefix+srcobj["basename"]:
+        print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@"))
+        if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
+            print("LLL -> needs new collection")
             return True
 
         if srcobj["class"] == "File" and loc not in self._pathmap:
@@ -195,9 +211,12 @@ class ArvPathMapper(PathMapper):
                                          packed=False)
 
         for src, ab, st in uploadfiles:
-            self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
+            print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@"))
+            self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"),
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
+        print("CCCCC", self._pathmap)
+
         for srcobj in referenced_files:
             remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
@@ -221,7 +240,7 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and self.needs_new_collection(srcobj):
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
-                                                  num_retries=self.arvrunner.num_retries                                                  )
+                                                  num_retries=self.arvrunner.num_retries)
                 self.addentry(srcobj, c, ".", remap)
 
                 container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 38e2c4d80..eae7a7789 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -285,10 +285,18 @@ def upload_dependencies(arvrunner, name, document_loader,
 
     sc_result = scandeps(uri, scanobj,
                          loadref_fields,
-                         set(("$include", "$schemas", "location")),
+                         set(("$include", "location")),
                          loadref, urljoin=document_loader.fetcher.urljoin,
                          nestdirs=False)
 
+    optional_deps = scandeps(uri, scanobj,
+                                  loadref_fields,
+                                  set(("$schemas",)),
+                                  loadref, urljoin=document_loader.fetcher.urljoin,
+                                  nestdirs=False)
+
+    sc_result.extend(optional_deps)
+
     sc = []
     uuids = {}
 
@@ -345,24 +353,25 @@ def upload_dependencies(arvrunner, name, document_loader,
     if include_primary and "id" in workflowobj:
         sc.append({"class": "File", "location": workflowobj["id"]})
 
-    if "$schemas" in workflowobj:
-        for s in workflowobj["$schemas"]:
-            sc.append({"class": "File", "location": s})
+    #if "$schemas" in workflowobj:
+    #    for s in workflowobj["$schemas"]:
+    #        sc.append({"class": "File", "location": s})
 
     def visit_default(obj):
-        remove = [False]
+        #remove = [False]
         def ensure_default_location(f):
             if "location" not in f and "path" in f:
                 f["location"] = f["path"]
                 del f["path"]
-            if "location" in f and not arvrunner.fs_access.exists(f["location"]):
-                # Doesn't exist, remove from list of dependencies to upload
-                sc[:] = [x for x in sc if x["location"] != f["location"]]
-                # Delete "default" from workflowobj
-                remove[0] = True
+            optional_deps.append(f)
+            #if "location" in f and not arvrunner.fs_access.exists(f["location"]):
+            #    # Doesn't exist, remove from list of dependencies to upload
+            #    sc[:] = [x for x in sc if x["location"] != f["location"]]
+            #    # Delete "default" from workflowobj
+            #    remove[0] = True
         visit_class(obj["default"], ("File", "Directory"), ensure_default_location)
-        if remove[0]:
-            del obj["default"]
+        #if remove[0]:
+        #    del obj["default"]
 
     find_defaults(workflowobj, visit_default)
 
@@ -394,11 +403,16 @@ def upload_dependencies(arvrunner, name, document_loader,
         else:
             del discovered[d]
 
+    print("NN", sc)
+
     mapper = ArvPathMapper(arvrunner, sc, "",
                            "keep:%s",
                            "keep:%s/%s",
                            name=name,
-                           single_collection=True)
+                           single_collection=True,
+                           optional_deps=optional_deps)
+
+    print("whargh", mapper._pathmap)
 
     def setloc(p):
         loc = p.get("location")

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list