[ARVADOS] created: 1.1.4-364-g57fcaf4

Git user git at public.curoverse.com
Thu Jun 7 10:50:43 EDT 2018


        at  57fcaf45fe40806dca5f001aabed9b413243183e (commit)


commit 57fcaf45fe40806dca5f001aabed9b413243183e
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Jun 7 09:54:47 2018 -0400

    11907: Make collections containing dependencies have predictable PDHs
    
    Removes optimization that tries to avoid redundant file
    uploads (decided complexity tradeoff isn't worth it.)  Collections
    created from local file inputs longer use block packing.
    
    Likely to invalidate job reuse for jobs submitted by past versions of
    a-c-r, but will have more stable job reuse going forward.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 5c60f7d..5b29ae5 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -76,7 +76,6 @@ class ArvCwlRunner(object):
         self.workflow_eval_lock = threading.Condition(threading.RLock())
         self.final_output = None
         self.final_status = None
-        self.uploaded = {}
         self.num_retries = num_retries
         self.uuid = None
         self.stop_polling = threading.Event()
@@ -238,12 +237,6 @@ class ArvCwlRunner(object):
         finally:
             self.stop_polling.set()
 
-    def get_uploaded(self):
-        return self.uploaded.copy()
-
-    def add_uploaded(self, src, pair):
-        self.uploaded[src] = pair
-
     def add_intermediate_output(self, uuid):
         if uuid:
             self.intermediate_output_collections.append(uuid)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index bd4b528..27e48f1 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -127,19 +127,6 @@ class ArvPathMapper(PathMapper):
                                                        keep_client=self.arvrunner.keep_client,
                                                        num_retries=self.arvrunner.num_retries)
 
-        already_uploaded = self.arvrunner.get_uploaded()
-        copied_files = set()
-        for k in referenced_files:
-            loc = k["location"]
-            if loc in already_uploaded:
-                v = already_uploaded[loc]
-                self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), v.type, True)
-                if self.single_collection:
-                    basename = k["basename"]
-                    if basename not in collection:
-                        self.addentry({"location": loc, "class": v.type, "basename": basename}, collection, ".", [])
-                        copied_files.add((loc, basename, v.type))
-
         for srcobj in referenced_files:
             self.visit(srcobj, uploadfiles)
 
@@ -150,16 +137,12 @@ class ArvPathMapper(PathMapper):
                                          fnPattern="keep:%s/%s",
                                          name=self.name,
                                          project=self.arvrunner.project_uuid,
-                                         collection=collection)
+                                         collection=collection,
+                                         packed=False)
 
         for src, ab, st in uploadfiles:
             self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
                                            "Directory" if os.path.isdir(ab) else "File", True)
-            self.arvrunner.add_uploaded(src, self._pathmap[src])
-
-        for loc, basename, cls in copied_files:
-            fn = "keep:%s/%s" % (collection.portable_data_hash(), basename)
-            self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
 
         for srcobj in referenced_files:
             remap = []
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 3ce08f6..cf91f69 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -122,11 +122,18 @@ def upload_dependencies(arvrunner, name, document_loader,
         # that external references in $include and $mixin are captured.
         scanobj = loadref("", workflowobj["id"])
 
-    sc = scandeps(uri, scanobj,
+    sc_result = scandeps(uri, scanobj,
                   loadref_fields,
                   set(("$include", "$schemas", "location")),
                   loadref, urljoin=document_loader.fetcher.urljoin)
 
+    sc = []
+    def only_real(obj):
+        if obj.get("location", "").startswith("file:"):
+            sc.append(obj)
+
+    visit_class(sc_result, ("File", "Directory"), only_real)
+
     normalizeFilesDirs(sc)
 
     if include_primary and "id" in workflowobj:
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 8efcb6e..4c31d3b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -37,7 +37,7 @@ setup(name='arvados-cwl-runner',
           'schema-salad==2.7.20180501211602',
           'typing >= 3.5.3',
           'ruamel.yaml >=0.13.11, <0.15',
-          'arvados-python-client>=1.1.4.20180507184611',
+          'arvados-python-client>=1.1.4.20180607143841',
           'setuptools',
           'ciso8601 >=1.0.6, <2.0.0'
       ],

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list