[ARVADOS] created: 807125b8423058d336165f069bf5d618f77845b7

Git user git at public.curoverse.com
Thu Oct 13 08:36:10 EDT 2016


        at  807125b8423058d336165f069bf5d618f77845b7 (commit)


commit 807125b8423058d336165f069bf5d618f77845b7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Oct 12 11:51:03 2016 -0400

    10221: Don't depend on st.keepref from arvados.commands.run.statfile and uploadfile, use file_pattern to construct predictable keep reference.

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 288397a..73c81ce 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -37,11 +37,11 @@ class ArvPathMapper(PathMapper):
                 # Local FS ref, may need to be uploaded or may be on keep
                 # mount.
                 ab = abspath(src, self.input_basedir)
-                st = arvados.commands.run.statfile("", ab, fnPattern=self.file_pattern)
+                st = arvados.commands.run.statfile("", ab, fnPattern="keep:%s/%s")
                 if isinstance(st, arvados.commands.run.UploadFile):
                     uploadfiles.add((src, ab, st))
                 elif isinstance(st, arvados.commands.run.ArvFile):
-                    self._pathmap[src] = MapperEnt("keep:"+st.keepref, st.fn, "File")
+                    self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
                 elif src.startswith("_:"):
                     if "contents" in srcobj:
                         pass
@@ -91,12 +91,12 @@ class ArvPathMapper(PathMapper):
                                              self.arvrunner.api,
                                              dry_run=False,
                                              num_retries=self.arvrunner.num_retries,
-                                             fnPattern=self.file_pattern,
+                                             fnPattern="keep:%s/%s",
                                              name=self.name,
                                              project=self.arvrunner.project_uuid)
 
         for src, ab, st in uploadfiles:
-            self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
+            self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
             self.arvrunner.add_uploaded(src, self._pathmap[src])
 
         for srcobj in referenced_files:
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index 065904d..7e13066 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -18,9 +18,7 @@ from arvados_cwl.pathmapper import ArvPathMapper
 def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
     pdh = "99999999999999999999999999999991+99"
     for c in files:
-        c.fn = os.path.basename(c.fn)
-        c.keepref = "%s/%s" % (pdh, c.fn)
-        c.fn = fnPattern % (pdh, c.fn)
+        c.fn = fnPattern % (pdh, os.path.basename(c.fn))
 
 class TestPathmap(unittest.TestCase):
     def test_keepref(self):
@@ -79,7 +77,6 @@ class TestPathmap(unittest.TestCase):
         # keep mount, so we can construct a direct reference directly without upload.
         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
             st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
-            st.keepref = "99999999999999999999999999999991+99/hw.py"
             return st
 
         upl.side_effect = upload_mock

commit 84f161dfc3229ad3a849fd5e152f254359a56252
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Oct 12 11:27:37 2016 -0400

    10221: Add path mapper tests direct keep references, uploaded files, and keep mounted files.

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 5051d97..288397a 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -41,7 +41,7 @@ class ArvPathMapper(PathMapper):
                 if isinstance(st, arvados.commands.run.UploadFile):
                     uploadfiles.add((src, ab, st))
                 elif isinstance(st, arvados.commands.run.ArvFile):
-                    self._pathmap[src] = MapperEnt("keep:"+ab, self.collection_pattern % ab, "File")
+                    self._pathmap[src] = MapperEnt("keep:"+st.keepref, st.fn, "File")
                 elif src.startswith("_:"):
                     if "contents" in srcobj:
                         pass
@@ -96,7 +96,7 @@ class ArvPathMapper(PathMapper):
                                              project=self.arvrunner.project_uuid)
 
         for src, ab, st in uploadfiles:
-            self._pathmap[src] = MapperEnt("keep:" + st.keepref, self.collection_pattern % st.keepref, "File")
+            self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
             self.arvrunner.add_uploaded(src, self._pathmap[src])
 
         for srcobj in referenced_files:
diff --git a/sdk/cwl/tests/hw.py b/sdk/cwl/tests/hw.py
new file mode 100644
index 0000000..62c813a
--- /dev/null
+++ b/sdk/cwl/tests/hw.py
@@ -0,0 +1 @@
+print "Hello world"
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
new file mode 100644
index 0000000..065904d
--- /dev/null
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -0,0 +1,94 @@
+import functools
+import mock
+import sys
+import unittest
+import json
+import logging
+import os
+
+import arvados
+import arvados.keep
+import arvados.collection
+import arvados_cwl
+
+from cwltool.pathmapper import MapperEnt
+
+from arvados_cwl.pathmapper import ArvPathMapper
+
+def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
+    pdh = "99999999999999999999999999999991+99"
+    for c in files:
+        c.fn = os.path.basename(c.fn)
+        c.keepref = "%s/%s" % (pdh, c.fn)
+        c.fn = fnPattern % (pdh, c.fn)
+
+class TestPathmap(unittest.TestCase):
+    def test_keepref(self):
+        """Test direct keep references."""
+
+        arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+        p = ArvPathMapper(arvrunner, [{
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py"
+        }], "", "/test/%s", "/test/%s/%s")
+
+        self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+                         p._pathmap)
+
+    @mock.patch("arvados.commands.run.uploadfiles")
+    def test_upload(self, upl):
+        """Test pathmapper uploading files."""
+
+        arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+        upl.side_effect = upload_mock
+
+        p = ArvPathMapper(arvrunner, [{
+            "class": "File",
+            "location": "tests/hw.py"
+        }], "", "/test/%s", "/test/%s/%s")
+
+        self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+                         p._pathmap)
+
+    @mock.patch("arvados.commands.run.uploadfiles")
+    def test_prev_uploaded(self, upl):
+        """Test pathmapper handling previously uploaded files."""
+
+        arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+        arvrunner.add_uploaded('tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File'))
+
+        upl.side_effect = upload_mock
+
+        p = ArvPathMapper(arvrunner, [{
+            "class": "File",
+            "location": "tests/hw.py"
+        }], "", "/test/%s", "/test/%s/%s")
+
+        self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+                         p._pathmap)
+
+    @mock.patch("arvados.commands.run.uploadfiles")
+    @mock.patch("arvados.commands.run.statfile")
+    def test_statfile(self, statfile, upl):
+        """Test pathmapper handling ArvFile references."""
+        arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+        # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
+        # keep mount, so we can construct a direct reference directly without upload.
+        def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
+            st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
+            st.keepref = "99999999999999999999999999999991+99/hw.py"
+            return st
+
+        upl.side_effect = upload_mock
+        statfile.side_effect = statfile_mock
+
+        p = ArvPathMapper(arvrunner, [{
+            "class": "File",
+            "location": "tests/hw.py"
+        }], "", "/test/%s", "/test/%s/%s")
+
+        self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+                         p._pathmap)
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 6674efb..007f809 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -60,7 +60,13 @@ def stubs(func):
             "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz5",
             "portable_data_hash": "99999999999999999999999999999995+99",
             "manifest_text": ""
-        }        )
+        },
+        {
+            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz6",
+            "portable_data_hash": "99999999999999999999999999999996+99",
+            "manifest_text": ""
+        }
+        )
         stubs.api.collections().get().execute.return_value = {
             "portable_data_hash": "99999999999999999999999999999993+99", "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"}
 

commit 022095999a584b12789259b577965122bb676194
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Oct 12 10:59:05 2016 -0400

    10221: Always upload all dependencies up front.  Ensures consistent reuse
    behavior between --local and --submit.  Fix pathmapping bugs where
    previously-uploaded files were added to path map but not actually mapped to
    target paths.

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index c90f890..c8ba5a4 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -24,7 +24,7 @@ import arvados.config
 
 from .arvcontainer import ArvadosContainer, RunnerContainer
 from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
-from. runner import Runner
+from. runner import Runner, upload_instance
 from .arvtool import ArvadosCommandTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
 from .fsaccess import CollectionFsAccess
@@ -64,6 +64,8 @@ class ArvCwlRunner(object):
         self.pipeline = None
         self.final_output_collection = None
         self.output_name = output_name
+        self.project_uuid = None
+
         if keep_client is not None:
             self.keep_client = keep_client
         else:
@@ -266,6 +268,8 @@ class ArvCwlRunner(object):
             kwargs["docker_outdir"] = "$(task.outdir)"
             kwargs["tmpdir"] = "$(task.tmpdir)"
 
+        upload_instance(self, shortname(tool.tool["id"]), tool, job_order)
+
         runnerjob = None
         if kwargs.get("submit"):
             if self.work_api == "containers":
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 228d433..5051d97 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -41,7 +41,7 @@ class ArvPathMapper(PathMapper):
                 if isinstance(st, arvados.commands.run.UploadFile):
                     uploadfiles.add((src, ab, st))
                 elif isinstance(st, arvados.commands.run.ArvFile):
-                    self._pathmap[src] = MapperEnt(ab, st.fn, "File")
+                    self._pathmap[src] = MapperEnt("keep:"+ab, self.collection_pattern % ab, "File")
                 elif src.startswith("_:"):
                     if "contents" in srcobj:
                         pass
@@ -78,9 +78,11 @@ class ArvPathMapper(PathMapper):
 
     def setup(self, referenced_files, basedir):
         # type: (List[Any], unicode) -> None
-        self._pathmap = self.arvrunner.get_uploaded()
         uploadfiles = set()
 
+        for k,v in self.arvrunner.get_uploaded().iteritems():
+            self._pathmap[k] = MapperEnt(v.resolved, self.collection_pattern % v.resolved[5:], "File")
+
         for srcobj in referenced_files:
             self.visit(srcobj, uploadfiles)
 
@@ -94,7 +96,7 @@ class ArvPathMapper(PathMapper):
                                              project=self.arvrunner.project_uuid)
 
         for src, ab, st in uploadfiles:
-            self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
+            self._pathmap[src] = MapperEnt("keep:" + st.keepref, self.collection_pattern % st.keepref, "File")
             self.arvrunner.add_uploaded(src, self._pathmap[src])
 
         for srcobj in referenced_files:
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index e5b4e00..054d353 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -112,6 +112,30 @@ def upload_docker(arvrunner, tool):
         for s in tool.steps:
             upload_docker(arvrunner, s.embedded_tool)
 
+def upload_instance(arvrunner, name, tool, job_order):
+        upload_docker(arvrunner, tool)
+
+        workflowmapper = upload_dependencies(arvrunner,
+                                             name,
+                                             tool.doc_loader,
+                                             tool.tool,
+                                             tool.tool["id"],
+                                             True)
+
+        jobmapper = upload_dependencies(arvrunner,
+                                        os.path.basename(job_order.get("id", "#")),
+                                        tool.doc_loader,
+                                        job_order,
+                                        job_order.get("id", "#"),
+                                        False)
+
+        adjustDirObjs(job_order, trim_listing)
+
+        if "id" in job_order:
+            del job_order["id"]
+
+        return workflowmapper
+
 
 class Runner(object):
     def __init__(self, runner, tool, job_order, enable_reuse, output_name):
@@ -128,31 +152,8 @@ class Runner(object):
         pass
 
     def arvados_job_spec(self, *args, **kwargs):
-        upload_docker(self.arvrunner, self.tool)
-
         self.name = os.path.basename(self.tool.tool["id"])
-
-        workflowmapper = upload_dependencies(self.arvrunner,
-                                             self.name,
-                                             self.tool.doc_loader,
-                                             self.tool.tool,
-                                             self.tool.tool["id"],
-                                             True)
-
-        jobmapper = upload_dependencies(self.arvrunner,
-                                        os.path.basename(self.job_order.get("id", "#")),
-                                        self.tool.doc_loader,
-                                        self.job_order,
-                                        self.job_order.get("id", "#"),
-                                        False)
-
-        adjustDirObjs(self.job_order, trim_listing)
-
-        if "id" in self.job_order:
-            del self.job_order["id"]
-
-        return workflowmapper
-
+        return upload_instance(self.arvrunner, self.name, self.tool, self.job_order)
 
     def done(self, record):
         if record["state"] == "Complete":

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list