[ARVADOS] created: 1.1.4-364-g57fcaf4
Git user
git at public.curoverse.com
Thu Jun 7 10:50:43 EDT 2018
at 57fcaf45fe40806dca5f001aabed9b413243183e (commit)
commit 57fcaf45fe40806dca5f001aabed9b413243183e
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Thu Jun 7 09:54:47 2018 -0400
11907: Make collections containing dependencies have predictable PDHs
Removes optimization that tries to avoid redundant file
uploads (decided complexity tradeoff isn't worth it.) Collections
created from local file inputs longer use block packing.
Likely to invalidate job reuse for jobs submitted by past versions of
a-c-r, but will have more stable job reuse going forward.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 5c60f7d..5b29ae5 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -76,7 +76,6 @@ class ArvCwlRunner(object):
self.workflow_eval_lock = threading.Condition(threading.RLock())
self.final_output = None
self.final_status = None
- self.uploaded = {}
self.num_retries = num_retries
self.uuid = None
self.stop_polling = threading.Event()
@@ -238,12 +237,6 @@ class ArvCwlRunner(object):
finally:
self.stop_polling.set()
- def get_uploaded(self):
- return self.uploaded.copy()
-
- def add_uploaded(self, src, pair):
- self.uploaded[src] = pair
-
def add_intermediate_output(self, uuid):
if uuid:
self.intermediate_output_collections.append(uuid)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index bd4b528..27e48f1 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -127,19 +127,6 @@ class ArvPathMapper(PathMapper):
keep_client=self.arvrunner.keep_client,
num_retries=self.arvrunner.num_retries)
- already_uploaded = self.arvrunner.get_uploaded()
- copied_files = set()
- for k in referenced_files:
- loc = k["location"]
- if loc in already_uploaded:
- v = already_uploaded[loc]
- self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), v.type, True)
- if self.single_collection:
- basename = k["basename"]
- if basename not in collection:
- self.addentry({"location": loc, "class": v.type, "basename": basename}, collection, ".", [])
- copied_files.add((loc, basename, v.type))
-
for srcobj in referenced_files:
self.visit(srcobj, uploadfiles)
@@ -150,16 +137,12 @@ class ArvPathMapper(PathMapper):
fnPattern="keep:%s/%s",
name=self.name,
project=self.arvrunner.project_uuid,
- collection=collection)
+ collection=collection,
+ packed=False)
for src, ab, st in uploadfiles:
self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
"Directory" if os.path.isdir(ab) else "File", True)
- self.arvrunner.add_uploaded(src, self._pathmap[src])
-
- for loc, basename, cls in copied_files:
- fn = "keep:%s/%s" % (collection.portable_data_hash(), basename)
- self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
for srcobj in referenced_files:
remap = []
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 3ce08f6..cf91f69 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -122,11 +122,18 @@ def upload_dependencies(arvrunner, name, document_loader,
# that external references in $include and $mixin are captured.
scanobj = loadref("", workflowobj["id"])
- sc = scandeps(uri, scanobj,
+ sc_result = scandeps(uri, scanobj,
loadref_fields,
set(("$include", "$schemas", "location")),
loadref, urljoin=document_loader.fetcher.urljoin)
+ sc = []
+ def only_real(obj):
+ if obj.get("location", "").startswith("file:"):
+ sc.append(obj)
+
+ visit_class(sc_result, ("File", "Directory"), only_real)
+
normalizeFilesDirs(sc)
if include_primary and "id" in workflowobj:
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 8efcb6e..4c31d3b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -37,7 +37,7 @@ setup(name='arvados-cwl-runner',
'schema-salad==2.7.20180501211602',
'typing >= 3.5.3',
'ruamel.yaml >=0.13.11, <0.15',
- 'arvados-python-client>=1.1.4.20180507184611',
+ 'arvados-python-client>=1.1.4.20180607143841',
'setuptools',
'ciso8601 >=1.0.6, <2.0.0'
],
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list