[ARVADOS] created: 8316594ba5a3bfd9869b60178a889721da585da9
Git user
git at public.curoverse.com
Tue May 23 15:27:30 EDT 2017
at 8316594ba5a3bfd9869b60178a889721da585da9 (commit)
commit 8316594ba5a3bfd9869b60178a889721da585da9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed May 17 09:03:18 2017 -0400
11543: Upload tool dependencies into single collection. Add test for collection per tool. Fix other tests.
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 46436b5..25a240c 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -105,7 +105,8 @@ class ArvCwlRunner(object):
kwargs["fetcher_constructor"] = partial(CollectionFetcher,
api_client=self.api,
fs_access=CollectionFsAccess("", collection_cache=self.collection_cache),
- num_retries=self.num_retries)
+ num_retries=self.num_retries,
+ overrides=kwargs.get("override_tools"))
if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
return ArvadosCommandTool(self, toolpath_object, **kwargs)
elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
@@ -334,7 +335,8 @@ class ArvCwlRunner(object):
# Upload direct dependencies of workflow steps, get back mapping of files to keep references.
# Also uploads docker images.
- upload_workflow_deps(self, tool)
+ override_tools = {}
+ upload_workflow_deps(self, tool, override_tools)
# Reload tool object which may have been updated by
# upload_workflow_deps
@@ -342,7 +344,8 @@ class ArvCwlRunner(object):
makeTool=self.arv_make_tool,
loader=tool.doc_loader,
avsc_names=tool.doc_schema,
- metadata=tool.metadata)
+ metadata=tool.metadata,
+ override_tools=override_tools)
# Upload local file references in the job order.
job_order = upload_job_order(self, "%s input" % kwargs["name"],
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 4e48216..eb26e9e 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -94,6 +94,12 @@ $graph:
doc: |
Select preferred compute partitions on which to run jobs.
fields:
+ - name: class
+ type: string
+ doc: "Always 'arv:PartitionRequirement'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
- name: partition
type:
- string
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 534a675..7736cab 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -149,13 +149,16 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
return os.path.realpath(path)
class CollectionFetcher(DefaultFetcher):
- def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4):
+ def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4, overrides=None):
super(CollectionFetcher, self).__init__(cache, session)
self.api_client = api_client
self.fsaccess = fs_access
self.num_retries = num_retries
+ self.overrides = overrides if overrides else {}
def fetch_text(self, url):
+ if url in self.overrides:
+ return self.overrides[url]
if url.startswith("keep:"):
with self.fsaccess.open(url, "r") as f:
return f.read()
@@ -166,6 +169,8 @@ class CollectionFetcher(DefaultFetcher):
return super(CollectionFetcher, self).fetch_text(url)
def check_exists(self, url):
+ if url in self.overrides:
+ return True
try:
if url.startswith("http://arvados.org/cwl"):
return True
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 5e2ee46..52a65ff 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -36,13 +36,14 @@ class ArvPathMapper(PathMapper):
pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
def __init__(self, arvrunner, referenced_files, input_basedir,
- collection_pattern, file_pattern, name=None, **kwargs):
+ collection_pattern, file_pattern, name=None, single_collection=False, **kwargs):
self.arvrunner = arvrunner
self.input_basedir = input_basedir
self.collection_pattern = collection_pattern
self.file_pattern = file_pattern
self.name = name
self.referenced_files = [r["location"] for r in referenced_files]
+ self.single_collection = single_collection
super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
def visit(self, srcobj, uploadfiles):
@@ -105,46 +106,61 @@ class ArvPathMapper(PathMapper):
# type: (List[Any], unicode) -> None
uploadfiles = set()
+ collection = None
+ if self.single_collection:
+ collection = arvados.collection.Collection(api_client=self.arvrunner.api,
+ keep_client=self.arvrunner.keep_client,
+ num_retries=self.arvrunner.num_retries)
+
already_uploaded = self.arvrunner.get_uploaded()
+ copied_files = set()
for k in referenced_files:
loc = k["location"]
if loc in already_uploaded:
v = already_uploaded[loc]
- self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), "File", True)
+ self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), v.type, True)
+ if self.single_collection:
+ basename = k["basename"]
+ if basename not in collection:
+ self.addentry({"location": loc, "class": v.type, "basename": basename}, collection, ".", [])
+ copied_files.add((loc, basename, v.type))
for srcobj in referenced_files:
self.visit(srcobj, uploadfiles)
- if uploadfiles:
- arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
- self.arvrunner.api,
- dry_run=False,
- num_retries=self.arvrunner.num_retries,
- fnPattern="keep:%s/%s",
- name=self.name,
- project=self.arvrunner.project_uuid)
+ arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
+ self.arvrunner.api,
+ dry_run=False,
+ num_retries=self.arvrunner.num_retries,
+ fnPattern="keep:%s/%s",
+ name=self.name,
+ project=self.arvrunner.project_uuid,
+ collection=collection)
for src, ab, st in uploadfiles:
self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
"Directory" if os.path.isdir(ab) else "File", True)
self.arvrunner.add_uploaded(src, self._pathmap[src])
+ for loc, basename, cls in copied_files:
+ fn = "keep:%s/%s" % (collection.portable_data_hash(), basename)
+ self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
+
for srcobj in referenced_files:
subdirs = []
- if srcobj["class"] == "Directory":
- if srcobj["location"] not in self._pathmap:
- c = arvados.collection.Collection(api_client=self.arvrunner.api,
- keep_client=self.arvrunner.keep_client,
- num_retries=self.arvrunner.num_retries)
- for l in srcobj.get("listing", []):
- self.addentry(l, c, ".", subdirs)
-
- check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
- if not check["items"]:
- c.save_new(owner_uuid=self.arvrunner.project_uuid)
+ if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
+ c = arvados.collection.Collection(api_client=self.arvrunner.api,
+ keep_client=self.arvrunner.keep_client,
+ num_retries=self.arvrunner.num_retries)
+ for l in srcobj.get("listing", []):
+ self.addentry(l, c, ".", subdirs)
- ab = self.collection_pattern % c.portable_data_hash()
- self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
+ check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
+ if not check["items"]:
+ c.save_new(owner_uuid=self.arvrunner.project_uuid)
+
+ ab = self.collection_pattern % c.portable_data_hash()
+ self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
(srcobj["location"].startswith("_:") and "contents" in srcobj)):
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 69e4f5b..ad44910 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -100,7 +100,8 @@ def upload_dependencies(arvrunner, name, document_loader,
mapper = ArvPathMapper(arvrunner, sc, "",
"keep:%s",
"keep:%s/%s",
- name=name)
+ name=name,
+ single_collection=True)
def setloc(p):
if "location" in p and (not p["location"].startswith("_:")) and (not p["location"].startswith("keep:")):
@@ -186,7 +187,7 @@ def upload_job_order(arvrunner, name, tool, job_order):
return job_order
-def upload_workflow_deps(arvrunner, tool):
+def upload_workflow_deps(arvrunner, tool, override_tools):
# Ensure that Docker images needed by this workflow are available
upload_docker(arvrunner, tool)
@@ -203,6 +204,7 @@ def upload_workflow_deps(arvrunner, tool):
False,
include_primary=False)
document_loader.idx[deptool["id"]] = deptool
+ override_tools[deptool["id"]] = json.dumps(deptool)
tool.visit(upload_tool_deps)
diff --git a/sdk/cwl/tests/collection_per_tool/a.txt b/sdk/cwl/tests/collection_per_tool/a.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/b.txt b/sdk/cwl/tests/collection_per_tool/b.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/c.txt b/sdk/cwl/tests/collection_per_tool/c.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl b/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl
new file mode 100644
index 0000000..debf0e9
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: Workflow
+inputs: []
+outputs: []
+steps:
+ step1:
+ in: []
+ out: []
+ run: step1.cwl
+ step2:
+ in: []
+ out: []
+ run: step2.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl
new file mode 100644
index 0000000..26d6d14
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl
@@ -0,0 +1,45 @@
+cwlVersion: v1.0
+$graph:
+- class: Workflow
+ inputs: []
+ outputs: []
+ steps:
+ - in: []
+ out: []
+ run: '#step1.cwl'
+ id: '#main/step1'
+ - in: []
+ out: []
+ run: '#step2.cwl'
+ id: '#main/step2'
+ id: '#main'
+- class: CommandLineTool
+ inputs:
+ - type: File
+ default:
+ class: File
+ location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt
+ id: '#step1.cwl/a'
+ - type: File
+ default:
+ class: File
+ location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt
+ id: '#step1.cwl/b'
+ outputs: []
+ arguments: [echo, $(inputs.a), $(inputs.b)]
+ id: '#step1.cwl'
+- class: CommandLineTool
+ inputs:
+ - type: File
+ default:
+ class: File
+ location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt
+ id: '#step2.cwl/b'
+ - type: File
+ default:
+ class: File
+ location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt
+ id: '#step2.cwl/c'
+ outputs: []
+ arguments: [echo, $(inputs.c), $(inputs.b)]
+ id: '#step2.cwl'
diff --git a/sdk/cwl/tests/collection_per_tool/step1.cwl b/sdk/cwl/tests/collection_per_tool/step1.cwl
new file mode 100644
index 0000000..495abec
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/step1.cwl
@@ -0,0 +1,15 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+ a:
+ type: File
+ default:
+ class: File
+ location: a.txt
+ b:
+ type: File
+ default:
+ class: File
+ location: b.txt
+outputs: []
+arguments: [echo, $(inputs.a), $(inputs.b)]
\ No newline at end of file
diff --git a/sdk/cwl/tests/collection_per_tool/step2.cwl b/sdk/cwl/tests/collection_per_tool/step2.cwl
new file mode 100644
index 0000000..e3c53bd
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/step2.cwl
@@ -0,0 +1,15 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+ c:
+ type: File
+ default:
+ class: File
+ location: c.txt
+ b:
+ type: File
+ default:
+ class: File
+ location: b.txt
+outputs: []
+arguments: [echo, $(inputs.c), $(inputs.b)]
\ No newline at end of file
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index b39a984..3e7918a 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -16,7 +16,7 @@ from .mock_discovery import get_rootDesc
from arvados_cwl.pathmapper import ArvPathMapper
-def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
+def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None):
pdh = "99999999999999999999999999999991+99"
for c in files:
c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
@@ -67,7 +67,7 @@ class TestPathmap(unittest.TestCase):
"""Test pathmapper handling previously uploaded files."""
arvrunner = arvados_cwl.ArvCwlRunner(self.api)
- arvrunner.add_uploaded('file:tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File', staged=True))
+ arvrunner.add_uploaded('file:tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999992+99/hw.py', target='', type='File', staged=True))
upl.side_effect = upload_mock
@@ -76,7 +76,7 @@ class TestPathmap(unittest.TestCase):
"location": "file:tests/hw.py"
}], "", "/test/%s", "/test/%s/%s")
- self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
+ self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999992+99/hw.py', target='/test/99999999999999999999999999999992+99/hw.py', type='File', staged=True)},
p._pathmap)
@mock.patch("arvados.commands.run.uploadfiles")
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 85c49c9..47844c1 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -34,7 +34,6 @@ def stubs(func):
stubs.events = events
stubs.keepdocker = keepdocker
-
def putstub(p, **kwargs):
return "%s+%i" % (hashlib.md5(p).hexdigest(), len(p))
keep_client1().put.side_effect = putstub
@@ -53,33 +52,32 @@ def stubs(func):
"uuid": stubs.fake_user_uuid,
}
stubs.api.collections().list().execute.return_value = {"items": []}
- stubs.api.collections().create().execute.side_effect = ({
- "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
- "portable_data_hash": "99999999999999999999999999999991+99",
- "manifest_text": ""
- }, {
- "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
- "portable_data_hash": "99999999999999999999999999999992+99",
- "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"
- },
- {
- "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz4",
- "portable_data_hash": "99999999999999999999999999999994+99",
- "manifest_text": ""
- },
- {
- "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz5",
- "portable_data_hash": "99999999999999999999999999999995+99",
- "manifest_text": ""
- },
- {
- "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz6",
- "portable_data_hash": "99999999999999999999999999999996+99",
- "manifest_text": ""
- }
- )
- stubs.api.collections().get().execute.return_value = {
- "portable_data_hash": "99999999999999999999999999999993+99", "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"}
+
+ class CollectionExecute(object):
+ def __init__(self, exe):
+ self.exe = exe
+ def execute(self, num_retries=None):
+ return self.exe
+
+ def collection_createstub(created_collections, body, ensure_unique_name=None):
+ mt = body["manifest_text"]
+ uuid = "zzzzz-4zz18-zzzzzzzzzzzzzz%d" % len(created_collections)
+ pdh = "%s+%i" % (hashlib.md5(mt).hexdigest(), len(mt))
+ created_collections[uuid] = {
+ "uuid": uuid,
+ "portable_data_hash": pdh,
+ "manifest_text": mt
+ }
+ return CollectionExecute(created_collections[uuid])
+
+ def collection_getstub(created_collections, uuid):
+ for v in created_collections.itervalues():
+ if uuid in (v["uuid"], v["portable_data_hash"]):
+ return CollectionExecute(v)
+
+ created_collections = {}
+ stubs.api.collections().create.side_effect = functools.partial(collection_createstub, created_collections)
+ stubs.api.collections().get.side_effect = functools.partial(collection_getstub, created_collections)
stubs.expect_job_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
stubs.api.jobs().create().execute.return_value = {
@@ -106,7 +104,7 @@ def stubs(func):
'script_parameters': {
'x': {
'basename': 'blorp.txt',
- 'location': 'keep:99999999999999999999999999999992+99/blorp.txt',
+ 'location': 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
'class': 'File'
},
'y': {
@@ -123,8 +121,7 @@ def stubs(func):
}],
'class': 'Directory'
},
- 'cwl:tool':
- '99999999999999999999999999999994+99/workflow.cwl#main'
+ 'cwl:tool': '3fffdeaa75e018172e1b583425f4ebff+60/workflow.cwl#main'
},
'repository': 'arvados',
'script_version': 'master',
@@ -141,12 +138,12 @@ def stubs(func):
'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__, 'min_ram_mb_per_node': 1024},
'script_parameters': {
'y': {"value": {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'}},
- 'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999992+99/blorp.txt'}},
+ 'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'}},
'z': {"value": {'basename': 'anonymous', 'class': 'Directory',
'listing': [
{'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
]}},
- 'cwl:tool': '99999999999999999999999999999994+99/workflow.cwl#main',
+ 'cwl:tool': '3fffdeaa75e018172e1b583425f4ebff+60/workflow.cwl#main',
'arv:enable_reuse': True,
'arv:on_error': 'continue'
},
@@ -191,7 +188,7 @@ def stubs(func):
'kind': 'json',
'content': {
'y': {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'},
- 'x': {'basename': u'blorp.txt', 'class': 'File', 'location': u'keep:99999999999999999999999999999992+99/blorp.txt'},
+ 'x': {'basename': u'blorp.txt', 'class': 'File', 'location': u'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'},
'z': {'basename': 'anonymous', 'class': 'Directory', 'listing': [
{'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
]}
@@ -245,21 +242,24 @@ class TestSubmit(unittest.TestCase):
self.assertEqual(exited, 0)
stubs.api.collections().create.assert_has_calls([
- mock.call(),
mock.call(body=JsonDiffMatcher({
'manifest_text':
'. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
'replication_desired': None,
'name': 'submit_tool.cwl dependencies',
}), ensure_unique_name=True),
- mock.call().execute(num_retries=4),
mock.call(body=JsonDiffMatcher({
'manifest_text':
'. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
'replication_desired': None,
'name': 'submit_wf.cwl input',
}), ensure_unique_name=True),
- mock.call().execute(num_retries=4)])
+ mock.call(body=JsonDiffMatcher({
+ 'manifest_text':
+ '. 61df2ed9ee3eb7dd9b799e5ca35305fa+1217 0:1217:workflow.cwl\n',
+ 'replication_desired': None,
+ 'name': 'submit_wf.cwl',
+ }), ensure_unique_name=True) ])
arvdock.assert_has_calls([
mock.call(stubs.api, {"class": "DockerRequirement", "dockerPull": "debian:8"}, True, None),
@@ -428,21 +428,18 @@ class TestSubmit(unittest.TestCase):
logging.exception("")
stubs.api.collections().create.assert_has_calls([
- mock.call(),
mock.call(body=JsonDiffMatcher({
'manifest_text':
'. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
'replication_desired': None,
'name': 'submit_tool.cwl dependencies',
}), ensure_unique_name=True),
- mock.call().execute(num_retries=4),
mock.call(body=JsonDiffMatcher({
'manifest_text':
'. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
'replication_desired': None,
'name': 'submit_wf.cwl input',
- }), ensure_unique_name=True),
- mock.call().execute(num_retries=4)])
+ }), ensure_unique_name=True)])
expect_container = copy.deepcopy(stubs.expect_container_spec)
stubs.api.container_requests().create.assert_called_with(
@@ -854,7 +851,7 @@ class TestCreateTemplate(unittest.TestCase):
'dataclass': 'File',
'required': True,
'type': 'File',
- 'value': '99999999999999999999999999999992+99/blorp.txt',
+ 'value': '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
}
expect_component['script_parameters']['y'] = {
'dataclass': 'Collection',
@@ -1106,6 +1103,36 @@ class TestCreateWorkflow(unittest.TestCase):
self.existing_workflow_uuid + '\n')
+ @stubs
+ def test_create_collection_per_tool(self, stubs):
+ project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+ capture_stdout = cStringIO.StringIO()
+
+ exited = arvados_cwl.main(
+ ["--create-workflow", "--debug",
+ "--api=containers",
+ "--project-uuid", project_uuid,
+ "tests/collection_per_tool/collection_per_tool.cwl"],
+ capture_stdout, sys.stderr, api_client=stubs.api)
+ self.assertEqual(exited, 0)
+
+ expect_workflow = open("tests/collection_per_tool/collection_per_tool_packed.cwl").read()
+
+ body = {
+ "workflow": {
+ "owner_uuid": project_uuid,
+ "name": "collection_per_tool.cwl",
+ "description": "",
+ "definition": expect_workflow,
+ }
+ }
+ stubs.api.workflows().create.assert_called_with(
+ body=JsonDiffMatcher(body))
+
+ self.assertEqual(capture_stdout.getvalue(),
+ stubs.expect_workflow_uuid + '\n')
+
class TestTemplateInputs(unittest.TestCase):
expect_template = {
"components": {
@@ -1116,7 +1143,7 @@ class TestTemplateInputs(unittest.TestCase):
},
'script_parameters': {
'cwl:tool':
- '99999999999999999999999999999992+99/workflow.cwl#main',
+ '6c5ee1cd606088106d9f28367cde1e41+60/workflow.cwl#main',
'optionalFloatInput': None,
'fileInput': {
'type': 'File',
@@ -1176,8 +1203,8 @@ class TestTemplateInputs(unittest.TestCase):
expect_template = copy.deepcopy(self.expect_template)
params = expect_template[
"components"]["inputs_test.cwl"]["script_parameters"]
- params["fileInput"]["value"] = '99999999999999999999999999999992+99/blorp.txt'
- params["cwl:tool"] = '99999999999999999999999999999994+99/workflow.cwl#main'
+ params["fileInput"]["value"] = '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'
+ params["cwl:tool"] = '6c5ee1cd606088106d9f28367cde1e41+60/workflow.cwl#main'
params["floatInput"]["value"] = 1.234
params["boolInput"]["value"] = True
diff --git a/sdk/cwl/tests/wf/expect_packed.cwl b/sdk/cwl/tests/wf/expect_packed.cwl
index d7b9d61..c17612c 100644
--- a/sdk/cwl/tests/wf/expect_packed.cwl
+++ b/sdk/cwl/tests/wf/expect_packed.cwl
@@ -9,7 +9,7 @@ $graph:
type: File
default:
class: File
- location: keep:99999999999999999999999999999991+99/blub.txt
+ location: keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt
inputBinding:
position: 1
outputs: []
@@ -19,7 +19,7 @@ $graph:
inputs:
- id: '#main/x'
type: File
- default: {class: File, location: 'keep:99999999999999999999999999999992+99/blorp.txt',
+ default: {class: File, location: 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
basename: blorp.txt}
- id: '#main/y'
type: Directory
diff --git a/sdk/cwl/tests/wf/submit_wf_packed.cwl b/sdk/cwl/tests/wf/submit_wf_packed.cwl
index 1f7fee0..2a3edf7 100644
--- a/sdk/cwl/tests/wf/submit_wf_packed.cwl
+++ b/sdk/cwl/tests/wf/submit_wf_packed.cwl
@@ -9,7 +9,7 @@ $graph:
type: File
default:
class: File
- location: keep:99999999999999999999999999999991+99/blub.txt
+ location: keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt
inputBinding:
position: 1
outputs: []
commit 4a1df4f3aed07a31da8570363b5eaceccf4dffa8
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed May 17 09:03:18 2017 -0400
11543: arvados.command.run.uploadfiles takes optional Collection to upload to.
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py
index d8bb26e..216b512 100644
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -144,43 +144,46 @@ def write_file(collection, pathprefix, fn):
def uploadfiles(files, api, dry_run=False, num_retries=0,
project=None,
fnPattern="$(file %s/%s)",
- name=None):
+ name=None,
+ collection=None):
# Find the smallest path prefix that includes all the files that need to be uploaded.
# This starts at the root and iteratively removes common parent directory prefixes
# until all file paths no longer have a common parent.
- n = True
- pathprefix = "/"
- while n:
- pathstep = None
- for c in files:
- if pathstep is None:
- sp = c.fn.split('/')
- if len(sp) < 2:
- # no parent directories left
- n = False
- break
- # path step takes next directory
- pathstep = sp[0] + "/"
- else:
- # check if pathstep is common prefix for all files
- if not c.fn.startswith(pathstep):
- n = False
- break
- if n:
- # pathstep is common parent directory for all files, so remove the prefix
- # from each path
- pathprefix += pathstep
+ if files:
+ n = True
+ pathprefix = "/"
+ while n:
+ pathstep = None
for c in files:
- c.fn = c.fn[len(pathstep):]
-
- logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
+ if pathstep is None:
+ sp = c.fn.split('/')
+ if len(sp) < 2:
+ # no parent directories left
+ n = False
+ break
+ # path step takes next directory
+ pathstep = sp[0] + "/"
+ else:
+ # check if pathstep is common prefix for all files
+ if not c.fn.startswith(pathstep):
+ n = False
+ break
+ if n:
+ # pathstep is common parent directory for all files, so remove the prefix
+ # from each path
+ pathprefix += pathstep
+ for c in files:
+ c.fn = c.fn[len(pathstep):]
+
+ logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
if dry_run:
logger.info("$(input) is %s", pathprefix.rstrip('/'))
pdh = "$(input)"
else:
files = sorted(files, key=lambda x: x.fn)
- collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
+ if collection is None:
+ collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
prev = ""
for f in files:
localpath = os.path.join(pathprefix, f.fn)
@@ -199,8 +202,9 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
for src in iterfiles:
write_file(collection, pathprefix, os.path.join(root, src))
- filters=[["portable_data_hash", "=", collection.portable_data_hash()],
- ["name", "like", name+"%"]]
+ filters=[["portable_data_hash", "=", collection.portable_data_hash()]]
+ if name:
+ filters.append(["name", "like", name+"%"])
if project:
filters.append(["owner_uuid", "=", project])
@@ -210,7 +214,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
item = exists["items"][0]
pdh = item["portable_data_hash"]
logger.info("Using collection %s (%s)", pdh, item["uuid"])
- else:
+ elif len(collection) > 0:
collection.save_new(name=name, owner_uuid=project, ensure_unique_name=True)
pdh = collection.portable_data_hash()
logger.info("Uploaded to %s (%s)", pdh, collection.manifest_locator())
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list