[ARVADOS] created: 8c65090500a5b3ff6f73f20a6af9b1debf442674

Git user git at public.curoverse.com
Thu Jul 14 17:25:01 EDT 2016


        at  8c65090500a5b3ff6f73f20a6af9b1debf442674 (commit)


commit 8c65090500a5b3ff6f73f20a6af9b1debf442674
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 16:57:27 2016 -0400

    9570: Use special path mapper for staging files to output dir.

diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index 15cafad..2b1947e 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -7,12 +7,12 @@ from cwltool.errors import WorkflowException
 from cwltool.draft2tool import revmap_file, CommandLineTool
 from cwltool.load_tool import fetch_document
 from cwltool.builder import Builder
-from cwltool.pathmapper import PathMapper
 
 import arvados.collection
 
 from .arvdocker import arv_docker_get_image
 from .runner import Runner
+from .pathmapper import InitialWorkDirPathMapper
 from . import done
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -38,8 +38,8 @@ class ArvadosJob(object):
         if self.generatefiles["listing"]:
             vwd = arvados.collection.Collection()
             script_parameters["task.vwd"] = {}
-            generatemapper = PathMapper([self.generatefiles], self.outdir,
-                                        ".", separateDirs=False)
+            generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "",
+                                        separateDirs=False)
             for f, p in generatemapper.items():
                 if p.type == "CreateFile":
                     with vwd.open(p.target, "w") as n:
@@ -47,9 +47,9 @@ class ArvadosJob(object):
             vwd.save_new()
             for f, p in generatemapper.items():
                 if p.type == "File":
-                    script_parameters["task.vwd"][p.target] = self.pathmapper.mapper(f).target
+                    script_parameters["task.vwd"][p.target] = p.resolved
                 if p.type == "CreateFile":
-                    script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
+                    script_parameters["task.vwd"][p.target] = "%s/%s" % (vwd.portable_data_hash(), p.target)
 
         script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
         if self.environment:
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 0dc80fc..fb4ae5a 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -140,3 +140,17 @@ class ArvPathMapper(PathMapper):
             return (target, "keep:" + target[len(self.keepdir)+1:])
         else:
             return super(ArvPathMapper, self).reversemap(target)
+
+class InitialWorkDirPathMapper(PathMapper):
+    def setup(self, referenced_files, basedir):
+        # type: (List[Any], unicode) -> None
+
+        # Go through each file and set the target to its own directory along
+        # with any secondary files.
+        stagedir = self.stagedir
+        for fob in referenced_files:
+            self.visit(fob, stagedir, basedir)
+
+        for path, (ab, tgt, type) in self._pathmap.items():
+            if type in ("File", "Directory") and ab.startswith("keep:"):
+                self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type)

commit 4c24fb3457bcf1a95248945368431d1e7eb5993a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 16:40:24 2016 -0400

    9570: Call normalizeFilesDirs to make sure basename is there.

diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index fbe5879..d7d5d2b 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -8,7 +8,7 @@ import re
 import cwltool.draft2tool
 from cwltool.draft2tool import CommandLineTool
 import cwltool.workflow
-from cwltool.process import get_feature, scandeps, UnsupportedRequirement
+from cwltool.process import get_feature, scandeps, UnsupportedRequirement, normalizeFilesDirs
 from cwltool.load_tool import fetch_document
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
 
@@ -74,6 +74,9 @@ class Runner(object):
         adjustDirObjs(sc, partial(visitFiles, workflowfiles))
         adjustDirObjs(self.job_order, partial(visitFiles, jobfiles))
 
+        normalizeFilesDirs(jobfiles)
+        normalizeFilesDirs(workflowfiles)
+
         keepprefix = kwargs.get("keepprefix", "")
         workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
                                        keepprefix+"%s",

commit 70639c8511682d7d1e228b5a1fb8854b6d7c7a11
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 16:34:19 2016 -0400

    9570: Just add keep: to input keep locators in cwl-runner.

diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
index 4b6ff44..2a1873a 100755
--- a/crunch_scripts/cwl-runner
+++ b/crunch_scripts/cwl-runner
@@ -32,20 +32,20 @@ try:
 
     def keeppath(v):
         if arvados.util.keep_locator_pattern.match(v):
-            return "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], v)
+            return "keep:%s" % v
         else:
             return v
 
     def keeppathObj(v):
         v["location"] = keeppath(v["location"])
 
-    job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
+    job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])
 
     for k,v in job_order_object.items():
         if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
             job_order_object[k] = {
                 "class": "File",
-                "path": keeppath(v)
+                "location": "keep:%s" % v
             }
 
     adjustFileObjs(job_order_object, keeppathObj)

commit f6f8b8236cbfd0e10eadf6e8b63aa5ac8f813ab6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 15:40:05 2016 -0400

    9570: Adjust Directory objects, too.

diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 1b5d8a4..fbe5879 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -10,7 +10,7 @@ from cwltool.draft2tool import CommandLineTool
 import cwltool.workflow
 from cwltool.process import get_feature, scandeps, UnsupportedRequirement
 from cwltool.load_tool import fetch_document
-from cwltool.pathmapper import adjustFileObjs
+from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
 
 import arvados.collection
 
@@ -71,6 +71,8 @@ class Runner(object):
                       loadref)
         adjustFileObjs(sc, partial(visitFiles, workflowfiles))
         adjustFileObjs(self.job_order, partial(visitFiles, jobfiles))
+        adjustDirObjs(sc, partial(visitFiles, workflowfiles))
+        adjustDirObjs(self.job_order, partial(visitFiles, jobfiles))
 
         keepprefix = kwargs.get("keepprefix", "")
         workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
@@ -88,6 +90,7 @@ class Runner(object):
         def setloc(p):
             p["location"] = jobmapper.mapper(p["location"])[1]
         adjustFileObjs(self.job_order, setloc)
+        adjustDirObjs(self.job_order, setloc)
 
         if "id" in self.job_order:
             del self.job_order["id"]
@@ -120,6 +123,7 @@ class Runner(object):
                     if not path.startswith("keep:"):
                         fileobj["location"] = "keep:%s/%s" % (record["output"], path)
                 adjustFileObjs(outputs, keepify)
+                adjustDirObjs(outputs, keepify)
             except Exception as e:
                 logger.error("While getting final output object: %s", e)
             self.arvrunner.output_callback(outputs, processStatus)

commit 90b404919bc02a4698d1fd592a0912584824269d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 15:01:39 2016 -0400

    9570: Remove spurious print statement.

diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 661a9e0..d2d38b0 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -17,7 +17,6 @@ class CollectionFsAccess(cwltool.process.StdFsAccess):
         self.collections = {}
 
     def get_collection(self, path):
-        print "PPP", path
         p = path.split("/")
         if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
             pdh = p[0][5:]

commit 686055d4eed6cd52f56e68db8755eec0926d845d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 14:54:18 2016 -0400

    9570: Bump cwltool dependency

diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index b015dc7..1763ff3 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -467,7 +467,7 @@ fpm_build schema_salad "" "" python 1.14.20160708181155
 fpm_build ruamel.yaml "" "" python 0.11.11 --python-setup-py-arguments "--single-version-externally-managed"
 
 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20160712154127
+fpm_build cwltool "" "" python 1.0.20160714182449
 
 # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
 fpm_build rdflib-jsonld "" "" python 0.3.0
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 667d7ac..36ad22b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -32,7 +32,7 @@ setup(name='arvados-cwl-runner',
       # Make sure to update arvados/build/run-build-packages.sh as well
       # when updating the cwltool version pin.
       install_requires=[
-          'cwltool==1.0.20160712154127',
+          'cwltool==1.0.20160714182449',
           'arvados-python-client>=0.1.20160322001610'
       ],
       data_files=[

commit cf80d66e354093fbc5d64aabad66ceda957c4fe7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 14:49:21 2016 -0400

    9570: Ensure on_error is set.

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 9256814..61736cc 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -125,6 +125,7 @@ class ArvCwlRunner(object):
         kwargs["enable_reuse"] = kwargs.get("enable_reuse")
         kwargs["use_container"] = True
         kwargs["tmpdir_prefix"] = "tmp"
+        kwargs["on_error"] = "continue"
 
         if self.work_api == "containers":
             kwargs["outdir"] = "/var/spool/cwl"

commit b2be6f72c86379aeb62b96680a905389fddba796
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 14:35:37 2016 -0400

    9570: Ensure tmpdir_prefix is set.

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 88da772..9256814 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -124,6 +124,7 @@ class ArvCwlRunner(object):
         kwargs["fs_access"] = self.fs_access
         kwargs["enable_reuse"] = kwargs.get("enable_reuse")
         kwargs["use_container"] = True
+        kwargs["tmpdir_prefix"] = "tmp"
 
         if self.work_api == "containers":
             kwargs["outdir"] = "/var/spool/cwl"

commit d913e0b3a387332f5f8000be7dd8ab3d15a3ce37
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 14:18:59 2016 -0400

    9570: Ensure use_container is set to True

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 847da90..88da772 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -123,6 +123,7 @@ class ArvCwlRunner(object):
 
         kwargs["fs_access"] = self.fs_access
         kwargs["enable_reuse"] = kwargs.get("enable_reuse")
+        kwargs["use_container"] = True
 
         if self.work_api == "containers":
             kwargs["outdir"] = "/var/spool/cwl"
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 19cb7ea..1b5d8a4 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -115,12 +115,11 @@ class Runner(object):
                 outc = arvados.collection.Collection(record["output"])
                 with outc.open("cwl.output.json") as f:
                     outputs = json.load(f)
-                def keepify(path):
+                def keepify(fileobj):
+                    path = fileobj["location"]
                     if not path.startswith("keep:"):
-                        return "keep:%s/%s" % (record["output"], path)
-                    else:
-                        return path
-                adjustFiles(outputs, keepify)
+                        fileobj["location"] = "keep:%s/%s" % (record["output"], path)
+                adjustFileObjs(outputs, keepify)
             except Exception as e:
                 logger.error("While getting final output object: %s", e)
             self.arvrunner.output_callback(outputs, processStatus)

commit 7ee484b49e3dd4367fb7299dbdb2dcb8eae3275b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jul 14 13:57:09 2016 -0400

    9570: Fix cwl-runner for updated cwltool.

diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
index c786fc1..4b6ff44 100755
--- a/crunch_scripts/cwl-runner
+++ b/crunch_scripts/cwl-runner
@@ -19,7 +19,7 @@ import os
 import json
 import argparse
 from arvados.api import OrderedJsonModel
-from cwltool.process import adjustFiles
+from cwltool.process import adjustFileObjs
 from cwltool.load_tool import load_tool
 
 # Print package versions
@@ -36,6 +36,9 @@ try:
         else:
             return v
 
+    def keeppathObj(v):
+        v["location"] = keeppath(v["location"])
+
     job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
 
     for k,v in job_order_object.items():
@@ -45,7 +48,7 @@ try:
                 "path": keeppath(v)
             }
 
-    adjustFiles(job_order_object, keeppath)
+    adjustFileObjs(job_order_object, keeppathObj)
 
     runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
 
@@ -63,15 +66,16 @@ try:
     outputObj = runner.arvExecutor(t, job_order_object, **vars(args))
 
     files = {}
-    def capture(path):
+    def capture(fileobj):
+        path = fileobj["location"]
         sp = path.split("/")
         col = sp[0][5:]
         if col not in files:
             files[col] = set()
         files[col].add("/".join(sp[1:]))
-        return path
+        fileobj["location"] = path
 
-    adjustFiles(outputObj, capture)
+    adjustFileObjs(outputObj, capture)
 
     final = arvados.collection.Collection()
 
@@ -80,10 +84,10 @@ try:
             for f in c:
                 final.copy(f, f, c, True)
 
-    def makeRelative(path):
-        return "/".join(path.split("/")[1:])
+    def makeRelative(fileobj):
+        fileobj["location"] = "/".join(fileobj["location"].split("/")[1:])
 
-    adjustFiles(outputObj, makeRelative)
+    adjustFileObjs(outputObj, makeRelative)
 
     with final.open("cwl.output.json", "w") as f:
         json.dump(outputObj, f, indent=4)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list