[ARVADOS] created: 8316594ba5a3bfd9869b60178a889721da585da9

Git user git at public.curoverse.com
Tue May 23 15:25:54 EDT 2017


        at  8316594ba5a3bfd9869b60178a889721da585da9 (commit)


commit 8316594ba5a3bfd9869b60178a889721da585da9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed May 17 09:03:18 2017 -0400

    11543: Upload tool dependencies into single collection.  Add test for collection per tool.  Fix other tests.

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 46436b5..25a240c 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -105,7 +105,8 @@ class ArvCwlRunner(object):
         kwargs["fetcher_constructor"] = partial(CollectionFetcher,
                                                 api_client=self.api,
                                                 fs_access=CollectionFsAccess("", collection_cache=self.collection_cache),
-                                                num_retries=self.num_retries)
+                                                num_retries=self.num_retries,
+                                                overrides=kwargs.get("override_tools"))
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
             return ArvadosCommandTool(self, toolpath_object, **kwargs)
         elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
@@ -334,7 +335,8 @@ class ArvCwlRunner(object):
 
         # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
         # Also uploads docker images.
-        upload_workflow_deps(self, tool)
+        override_tools = {}
+        upload_workflow_deps(self, tool, override_tools)
 
         # Reload tool object which may have been updated by
         # upload_workflow_deps
@@ -342,7 +344,8 @@ class ArvCwlRunner(object):
                                   makeTool=self.arv_make_tool,
                                   loader=tool.doc_loader,
                                   avsc_names=tool.doc_schema,
-                                  metadata=tool.metadata)
+                                  metadata=tool.metadata,
+                                  override_tools=override_tools)
 
         # Upload local file references in the job order.
         job_order = upload_job_order(self, "%s input" % kwargs["name"],
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 4e48216..eb26e9e 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -94,6 +94,12 @@ $graph:
   doc: |
     Select preferred compute partitions on which to run jobs.
   fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:PartitionRequirement'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
     - name: partition
       type:
         - string
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 534a675..7736cab 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -149,13 +149,16 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
             return os.path.realpath(path)
 
 class CollectionFetcher(DefaultFetcher):
-    def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4):
+    def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4, overrides=None):
         super(CollectionFetcher, self).__init__(cache, session)
         self.api_client = api_client
         self.fsaccess = fs_access
         self.num_retries = num_retries
+        self.overrides = overrides if overrides else {}
 
     def fetch_text(self, url):
+        if url in self.overrides:
+            return self.overrides[url]
         if url.startswith("keep:"):
             with self.fsaccess.open(url, "r") as f:
                 return f.read()
@@ -166,6 +169,8 @@ class CollectionFetcher(DefaultFetcher):
         return super(CollectionFetcher, self).fetch_text(url)
 
     def check_exists(self, url):
+        if url in self.overrides:
+            return True
         try:
             if url.startswith("http://arvados.org/cwl"):
                 return True
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 5e2ee46..52a65ff 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -36,13 +36,14 @@ class ArvPathMapper(PathMapper):
     pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
 
     def __init__(self, arvrunner, referenced_files, input_basedir,
-                 collection_pattern, file_pattern, name=None, **kwargs):
+                 collection_pattern, file_pattern, name=None, single_collection=False, **kwargs):
         self.arvrunner = arvrunner
         self.input_basedir = input_basedir
         self.collection_pattern = collection_pattern
         self.file_pattern = file_pattern
         self.name = name
         self.referenced_files = [r["location"] for r in referenced_files]
+        self.single_collection = single_collection
         super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
 
     def visit(self, srcobj, uploadfiles):
@@ -105,46 +106,61 @@ class ArvPathMapper(PathMapper):
         # type: (List[Any], unicode) -> None
         uploadfiles = set()
 
+        collection = None
+        if self.single_collection:
+            collection = arvados.collection.Collection(api_client=self.arvrunner.api,
+                                                       keep_client=self.arvrunner.keep_client,
+                                                       num_retries=self.arvrunner.num_retries)
+
         already_uploaded = self.arvrunner.get_uploaded()
+        copied_files = set()
         for k in referenced_files:
             loc = k["location"]
             if loc in already_uploaded:
                 v = already_uploaded[loc]
-                self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), "File", True)
+                self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), v.type, True)
+                if self.single_collection:
+                    basename = k["basename"]
+                    if basename not in collection:
+                        self.addentry({"location": loc, "class": v.type, "basename": basename}, collection, ".", [])
+                        copied_files.add((loc, basename, v.type))
 
         for srcobj in referenced_files:
             self.visit(srcobj, uploadfiles)
 
-        if uploadfiles:
-            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
-                                             self.arvrunner.api,
-                                             dry_run=False,
-                                             num_retries=self.arvrunner.num_retries,
-                                             fnPattern="keep:%s/%s",
-                                             name=self.name,
-                                             project=self.arvrunner.project_uuid)
+        arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
+                                         self.arvrunner.api,
+                                         dry_run=False,
+                                         num_retries=self.arvrunner.num_retries,
+                                         fnPattern="keep:%s/%s",
+                                         name=self.name,
+                                         project=self.arvrunner.project_uuid,
+                                         collection=collection)
 
         for src, ab, st in uploadfiles:
             self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
                                            "Directory" if os.path.isdir(ab) else "File", True)
             self.arvrunner.add_uploaded(src, self._pathmap[src])
 
+        for loc, basename, cls in copied_files:
+            fn = "keep:%s/%s" % (collection.portable_data_hash(), basename)
+            self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
+
         for srcobj in referenced_files:
             subdirs = []
-            if srcobj["class"] == "Directory":
-                if srcobj["location"] not in self._pathmap:
-                    c = arvados.collection.Collection(api_client=self.arvrunner.api,
-                                                      keep_client=self.arvrunner.keep_client,
-                                                      num_retries=self.arvrunner.num_retries)
-                    for l in srcobj.get("listing", []):
-                        self.addentry(l, c, ".", subdirs)
-
-                    check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
-                    if not check["items"]:
-                        c.save_new(owner_uuid=self.arvrunner.project_uuid)
+            if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
+                c = arvados.collection.Collection(api_client=self.arvrunner.api,
+                                                  keep_client=self.arvrunner.keep_client,
+                                                  num_retries=self.arvrunner.num_retries)
+                for l in srcobj.get("listing", []):
+                    self.addentry(l, c, ".", subdirs)
 
-                    ab = self.collection_pattern % c.portable_data_hash()
-                    self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
+                check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
+                if not check["items"]:
+                    c.save_new(owner_uuid=self.arvrunner.project_uuid)
+
+                ab = self.collection_pattern % c.portable_data_hash()
+                self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 69e4f5b..ad44910 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -100,7 +100,8 @@ def upload_dependencies(arvrunner, name, document_loader,
     mapper = ArvPathMapper(arvrunner, sc, "",
                            "keep:%s",
                            "keep:%s/%s",
-                           name=name)
+                           name=name,
+                           single_collection=True)
 
     def setloc(p):
         if "location" in p and (not p["location"].startswith("_:")) and (not p["location"].startswith("keep:")):
@@ -186,7 +187,7 @@ def upload_job_order(arvrunner, name, tool, job_order):
 
     return job_order
 
-def upload_workflow_deps(arvrunner, tool):
+def upload_workflow_deps(arvrunner, tool, override_tools):
     # Ensure that Docker images needed by this workflow are available
 
     upload_docker(arvrunner, tool)
@@ -203,6 +204,7 @@ def upload_workflow_deps(arvrunner, tool):
                                 False,
                                 include_primary=False)
             document_loader.idx[deptool["id"]] = deptool
+            override_tools[deptool["id"]] = json.dumps(deptool)
 
     tool.visit(upload_tool_deps)
 
diff --git a/sdk/cwl/tests/collection_per_tool/a.txt b/sdk/cwl/tests/collection_per_tool/a.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/b.txt b/sdk/cwl/tests/collection_per_tool/b.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/c.txt b/sdk/cwl/tests/collection_per_tool/c.txt
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl b/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl
new file mode 100644
index 0000000..debf0e9
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/collection_per_tool.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: Workflow
+inputs: []
+outputs: []
+steps:
+  step1:
+    in: []
+    out: []
+    run: step1.cwl
+  step2:
+    in: []
+    out: []
+    run: step2.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl
new file mode 100644
index 0000000..26d6d14
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl
@@ -0,0 +1,45 @@
+cwlVersion: v1.0
+$graph:
+- class: Workflow
+  inputs: []
+  outputs: []
+  steps:
+  - in: []
+    out: []
+    run: '#step1.cwl'
+    id: '#main/step1'
+  - in: []
+    out: []
+    run: '#step2.cwl'
+    id: '#main/step2'
+  id: '#main'
+- class: CommandLineTool
+  inputs:
+  - type: File
+    default:
+      class: File
+      location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt
+    id: '#step1.cwl/a'
+  - type: File
+    default:
+      class: File
+      location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt
+    id: '#step1.cwl/b'
+  outputs: []
+  arguments: [echo, $(inputs.a), $(inputs.b)]
+  id: '#step1.cwl'
+- class: CommandLineTool
+  inputs:
+  - type: File
+    default:
+      class: File
+      location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt
+    id: '#step2.cwl/b'
+  - type: File
+    default:
+      class: File
+      location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt
+    id: '#step2.cwl/c'
+  outputs: []
+  arguments: [echo, $(inputs.c), $(inputs.b)]
+  id: '#step2.cwl'
diff --git a/sdk/cwl/tests/collection_per_tool/step1.cwl b/sdk/cwl/tests/collection_per_tool/step1.cwl
new file mode 100644
index 0000000..495abec
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/step1.cwl
@@ -0,0 +1,15 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+  a:
+    type: File
+    default:
+      class: File
+      location: a.txt
+  b:
+    type: File
+    default:
+      class: File
+      location: b.txt
+outputs: []
+arguments: [echo, $(inputs.a), $(inputs.b)]
\ No newline at end of file
diff --git a/sdk/cwl/tests/collection_per_tool/step2.cwl b/sdk/cwl/tests/collection_per_tool/step2.cwl
new file mode 100644
index 0000000..e3c53bd
--- /dev/null
+++ b/sdk/cwl/tests/collection_per_tool/step2.cwl
@@ -0,0 +1,15 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+  c:
+    type: File
+    default:
+      class: File
+      location: c.txt
+  b:
+    type: File
+    default:
+      class: File
+      location: b.txt
+outputs: []
+arguments: [echo, $(inputs.c), $(inputs.b)]
\ No newline at end of file
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index b39a984..3e7918a 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -16,7 +16,7 @@ from .mock_discovery import get_rootDesc
 
 from arvados_cwl.pathmapper import ArvPathMapper
 
-def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
+def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None):
     pdh = "99999999999999999999999999999991+99"
     for c in files:
         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
@@ -67,7 +67,7 @@ class TestPathmap(unittest.TestCase):
         """Test pathmapper handling previously uploaded files."""
 
         arvrunner = arvados_cwl.ArvCwlRunner(self.api)
-        arvrunner.add_uploaded('file:tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File', staged=True))
+        arvrunner.add_uploaded('file:tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999992+99/hw.py', target='', type='File', staged=True))
 
         upl.side_effect = upload_mock
 
@@ -76,7 +76,7 @@ class TestPathmap(unittest.TestCase):
             "location": "file:tests/hw.py"
         }], "", "/test/%s", "/test/%s/%s")
 
-        self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
+        self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999992+99/hw.py', target='/test/99999999999999999999999999999992+99/hw.py', type='File', staged=True)},
                          p._pathmap)
 
     @mock.patch("arvados.commands.run.uploadfiles")
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 85c49c9..47844c1 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -34,7 +34,6 @@ def stubs(func):
         stubs.events = events
         stubs.keepdocker = keepdocker
 
-
         def putstub(p, **kwargs):
             return "%s+%i" % (hashlib.md5(p).hexdigest(), len(p))
         keep_client1().put.side_effect = putstub
@@ -53,33 +52,32 @@ def stubs(func):
             "uuid": stubs.fake_user_uuid,
         }
         stubs.api.collections().list().execute.return_value = {"items": []}
-        stubs.api.collections().create().execute.side_effect = ({
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
-            "portable_data_hash": "99999999999999999999999999999991+99",
-            "manifest_text": ""
-        }, {
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
-            "portable_data_hash": "99999999999999999999999999999992+99",
-            "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"
-        },
-        {
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz4",
-            "portable_data_hash": "99999999999999999999999999999994+99",
-            "manifest_text": ""
-        },
-        {
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz5",
-            "portable_data_hash": "99999999999999999999999999999995+99",
-            "manifest_text": ""
-        },
-        {
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz6",
-            "portable_data_hash": "99999999999999999999999999999996+99",
-            "manifest_text": ""
-        }
-        )
-        stubs.api.collections().get().execute.return_value = {
-            "portable_data_hash": "99999999999999999999999999999993+99", "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"}
+
+        class CollectionExecute(object):
+            def __init__(self, exe):
+                self.exe = exe
+            def execute(self, num_retries=None):
+                return self.exe
+
+        def collection_createstub(created_collections, body, ensure_unique_name=None):
+            mt = body["manifest_text"]
+            uuid = "zzzzz-4zz18-zzzzzzzzzzzzzz%d" % len(created_collections)
+            pdh = "%s+%i" % (hashlib.md5(mt).hexdigest(), len(mt))
+            created_collections[uuid] = {
+                "uuid": uuid,
+                "portable_data_hash": pdh,
+                "manifest_text": mt
+            }
+            return CollectionExecute(created_collections[uuid])
+
+        def collection_getstub(created_collections, uuid):
+            for v in created_collections.itervalues():
+                if uuid in (v["uuid"], v["portable_data_hash"]):
+                    return CollectionExecute(v)
+
+        created_collections = {}
+        stubs.api.collections().create.side_effect = functools.partial(collection_createstub, created_collections)
+        stubs.api.collections().get.side_effect = functools.partial(collection_getstub, created_collections)
 
         stubs.expect_job_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         stubs.api.jobs().create().execute.return_value = {
@@ -106,7 +104,7 @@ def stubs(func):
             'script_parameters': {
                 'x': {
                     'basename': 'blorp.txt',
-                    'location': 'keep:99999999999999999999999999999992+99/blorp.txt',
+                    'location': 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
                     'class': 'File'
                 },
                 'y': {
@@ -123,8 +121,7 @@ def stubs(func):
                     }],
                     'class': 'Directory'
                 },
-                'cwl:tool':
-                '99999999999999999999999999999994+99/workflow.cwl#main'
+                'cwl:tool': '3fffdeaa75e018172e1b583425f4ebff+60/workflow.cwl#main'
             },
             'repository': 'arvados',
             'script_version': 'master',
@@ -141,12 +138,12 @@ def stubs(func):
                     'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__, 'min_ram_mb_per_node': 1024},
                     'script_parameters': {
                         'y': {"value": {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'}},
-                        'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999992+99/blorp.txt'}},
+                        'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'}},
                         'z': {"value": {'basename': 'anonymous', 'class': 'Directory',
                               'listing': [
                                   {'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
                               ]}},
-                        'cwl:tool': '99999999999999999999999999999994+99/workflow.cwl#main',
+                        'cwl:tool': '3fffdeaa75e018172e1b583425f4ebff+60/workflow.cwl#main',
                         'arv:enable_reuse': True,
                         'arv:on_error': 'continue'
                     },
@@ -191,7 +188,7 @@ def stubs(func):
                     'kind': 'json',
                     'content': {
                         'y': {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'},
-                        'x': {'basename': u'blorp.txt', 'class': 'File', 'location': u'keep:99999999999999999999999999999992+99/blorp.txt'},
+                        'x': {'basename': u'blorp.txt', 'class': 'File', 'location': u'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'},
                         'z': {'basename': 'anonymous', 'class': 'Directory', 'listing': [
                             {'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
                         ]}
@@ -245,21 +242,24 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(exited, 0)
 
         stubs.api.collections().create.assert_has_calls([
-            mock.call(),
             mock.call(body=JsonDiffMatcher({
                 'manifest_text':
                 '. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
                 'replication_desired': None,
                 'name': 'submit_tool.cwl dependencies',
             }), ensure_unique_name=True),
-            mock.call().execute(num_retries=4),
             mock.call(body=JsonDiffMatcher({
                 'manifest_text':
                 '. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
                 'replication_desired': None,
                 'name': 'submit_wf.cwl input',
             }), ensure_unique_name=True),
-            mock.call().execute(num_retries=4)])
+            mock.call(body=JsonDiffMatcher({
+                'manifest_text':
+                '. 61df2ed9ee3eb7dd9b799e5ca35305fa+1217 0:1217:workflow.cwl\n',
+                'replication_desired': None,
+                'name': 'submit_wf.cwl',
+            }), ensure_unique_name=True)        ])
 
         arvdock.assert_has_calls([
             mock.call(stubs.api, {"class": "DockerRequirement", "dockerPull": "debian:8"}, True, None),
@@ -428,21 +428,18 @@ class TestSubmit(unittest.TestCase):
             logging.exception("")
 
         stubs.api.collections().create.assert_has_calls([
-            mock.call(),
             mock.call(body=JsonDiffMatcher({
                 'manifest_text':
                 '. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
                 'replication_desired': None,
                 'name': 'submit_tool.cwl dependencies',
             }), ensure_unique_name=True),
-            mock.call().execute(num_retries=4),
             mock.call(body=JsonDiffMatcher({
                 'manifest_text':
                 '. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
                 'replication_desired': None,
                 'name': 'submit_wf.cwl input',
-            }), ensure_unique_name=True),
-            mock.call().execute(num_retries=4)])
+            }), ensure_unique_name=True)])
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         stubs.api.container_requests().create.assert_called_with(
@@ -854,7 +851,7 @@ class TestCreateTemplate(unittest.TestCase):
             'dataclass': 'File',
             'required': True,
             'type': 'File',
-            'value': '99999999999999999999999999999992+99/blorp.txt',
+            'value': '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
         }
         expect_component['script_parameters']['y'] = {
             'dataclass': 'Collection',
@@ -1106,6 +1103,36 @@ class TestCreateWorkflow(unittest.TestCase):
                          self.existing_workflow_uuid + '\n')
 
 
+    @stubs
+    def test_create_collection_per_tool(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=containers",
+             "--project-uuid", project_uuid,
+             "tests/collection_per_tool/collection_per_tool.cwl"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        expect_workflow = open("tests/collection_per_tool/collection_per_tool_packed.cwl").read()
+
+        body = {
+            "workflow": {
+                "owner_uuid": project_uuid,
+                "name": "collection_per_tool.cwl",
+                "description": "",
+                "definition": expect_workflow,
+            }
+        }
+        stubs.api.workflows().create.assert_called_with(
+            body=JsonDiffMatcher(body))
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_workflow_uuid + '\n')
+
 class TestTemplateInputs(unittest.TestCase):
     expect_template = {
         "components": {
@@ -1116,7 +1143,7 @@ class TestTemplateInputs(unittest.TestCase):
                 },
                 'script_parameters': {
                     'cwl:tool':
-                    '99999999999999999999999999999992+99/workflow.cwl#main',
+                    '6c5ee1cd606088106d9f28367cde1e41+60/workflow.cwl#main',
                     'optionalFloatInput': None,
                     'fileInput': {
                         'type': 'File',
@@ -1176,8 +1203,8 @@ class TestTemplateInputs(unittest.TestCase):
         expect_template = copy.deepcopy(self.expect_template)
         params = expect_template[
             "components"]["inputs_test.cwl"]["script_parameters"]
-        params["fileInput"]["value"] = '99999999999999999999999999999992+99/blorp.txt'
-        params["cwl:tool"] = '99999999999999999999999999999994+99/workflow.cwl#main'
+        params["fileInput"]["value"] = '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'
+        params["cwl:tool"] = '6c5ee1cd606088106d9f28367cde1e41+60/workflow.cwl#main'
         params["floatInput"]["value"] = 1.234
         params["boolInput"]["value"] = True
 
diff --git a/sdk/cwl/tests/wf/expect_packed.cwl b/sdk/cwl/tests/wf/expect_packed.cwl
index d7b9d61..c17612c 100644
--- a/sdk/cwl/tests/wf/expect_packed.cwl
+++ b/sdk/cwl/tests/wf/expect_packed.cwl
@@ -9,7 +9,7 @@ $graph:
     type: File
     default:
       class: File
-      location: keep:99999999999999999999999999999991+99/blub.txt
+      location: keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt
     inputBinding:
       position: 1
   outputs: []
@@ -19,7 +19,7 @@ $graph:
   inputs:
   - id: '#main/x'
     type: File
-    default: {class: File, location: 'keep:99999999999999999999999999999992+99/blorp.txt',
+    default: {class: File, location: 'keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
       basename: blorp.txt}
   - id: '#main/y'
     type: Directory
diff --git a/sdk/cwl/tests/wf/submit_wf_packed.cwl b/sdk/cwl/tests/wf/submit_wf_packed.cwl
index 1f7fee0..2a3edf7 100644
--- a/sdk/cwl/tests/wf/submit_wf_packed.cwl
+++ b/sdk/cwl/tests/wf/submit_wf_packed.cwl
@@ -9,7 +9,7 @@ $graph:
     type: File
     default:
       class: File
-      location: keep:99999999999999999999999999999991+99/blub.txt
+      location: keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt
     inputBinding:
       position: 1
   outputs: []

commit 4a1df4f3aed07a31da8570363b5eaceccf4dffa8
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed May 17 09:03:18 2017 -0400

    11543: arvados.command.run.uploadfiles takes optional Collection to upload to.

diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py
index d8bb26e..216b512 100644
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -144,43 +144,46 @@ def write_file(collection, pathprefix, fn):
 def uploadfiles(files, api, dry_run=False, num_retries=0,
                 project=None,
                 fnPattern="$(file %s/%s)",
-                name=None):
+                name=None,
+                collection=None):
     # Find the smallest path prefix that includes all the files that need to be uploaded.
     # This starts at the root and iteratively removes common parent directory prefixes
     # until all file paths no longer have a common parent.
-    n = True
-    pathprefix = "/"
-    while n:
-        pathstep = None
-        for c in files:
-            if pathstep is None:
-                sp = c.fn.split('/')
-                if len(sp) < 2:
-                    # no parent directories left
-                    n = False
-                    break
-                # path step takes next directory
-                pathstep = sp[0] + "/"
-            else:
-                # check if pathstep is common prefix for all files
-                if not c.fn.startswith(pathstep):
-                    n = False
-                    break
-        if n:
-            # pathstep is common parent directory for all files, so remove the prefix
-            # from each path
-            pathprefix += pathstep
+    if files:
+        n = True
+        pathprefix = "/"
+        while n:
+            pathstep = None
             for c in files:
-                c.fn = c.fn[len(pathstep):]
-
-    logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
+                if pathstep is None:
+                    sp = c.fn.split('/')
+                    if len(sp) < 2:
+                        # no parent directories left
+                        n = False
+                        break
+                    # path step takes next directory
+                    pathstep = sp[0] + "/"
+                else:
+                    # check if pathstep is common prefix for all files
+                    if not c.fn.startswith(pathstep):
+                        n = False
+                        break
+            if n:
+                # pathstep is common parent directory for all files, so remove the prefix
+                # from each path
+                pathprefix += pathstep
+                for c in files:
+                    c.fn = c.fn[len(pathstep):]
+
+        logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
 
     if dry_run:
         logger.info("$(input) is %s", pathprefix.rstrip('/'))
         pdh = "$(input)"
     else:
         files = sorted(files, key=lambda x: x.fn)
-        collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
+        if collection is None:
+            collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
         prev = ""
         for f in files:
             localpath = os.path.join(pathprefix, f.fn)
@@ -199,8 +202,9 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
                     for src in iterfiles:
                         write_file(collection, pathprefix, os.path.join(root, src))
 
-        filters=[["portable_data_hash", "=", collection.portable_data_hash()],
-                 ["name", "like", name+"%"]]
+        filters=[["portable_data_hash", "=", collection.portable_data_hash()]]
+        if name:
+            filters.append(["name", "like", name+"%"])
         if project:
             filters.append(["owner_uuid", "=", project])
 
@@ -210,7 +214,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
             item = exists["items"][0]
             pdh = item["portable_data_hash"]
             logger.info("Using collection %s (%s)", pdh, item["uuid"])
-        else:
+        elif len(collection) > 0:
             collection.save_new(name=name, owner_uuid=project, ensure_unique_name=True)
             pdh = collection.portable_data_hash()
             logger.info("Uploaded to %s (%s)", pdh, collection.manifest_locator())

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list