[ARVADOS] created: 72f92ad04b77bf781f27ff355b4cc91d7f36ec0f
Git user
git at public.curoverse.com
Fri Jul 15 09:17:48 EDT 2016
at 72f92ad04b77bf781f27ff355b4cc91d7f36ec0f (commit)
commit 72f92ad04b77bf781f27ff355b4cc91d7f36ec0f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 16:57:27 2016 -0400
9570: Use special path mapper for staging files to output dir.
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index 15cafad..0035597 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -7,12 +7,12 @@ from cwltool.errors import WorkflowException
from cwltool.draft2tool import revmap_file, CommandLineTool
from cwltool.load_tool import fetch_document
from cwltool.builder import Builder
-from cwltool.pathmapper import PathMapper
import arvados.collection
from .arvdocker import arv_docker_get_image
from .runner import Runner
+from .pathmapper import InitialWorkDirPathMapper
from . import done
logger = logging.getLogger('arvados.cwl-runner')
@@ -38,8 +38,8 @@ class ArvadosJob(object):
if self.generatefiles["listing"]:
vwd = arvados.collection.Collection()
script_parameters["task.vwd"] = {}
- generatemapper = PathMapper([self.generatefiles], self.outdir,
- ".", separateDirs=False)
+ generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "",
+ separateDirs=False)
for f, p in generatemapper.items():
if p.type == "CreateFile":
with vwd.open(p.target, "w") as n:
@@ -47,7 +47,7 @@ class ArvadosJob(object):
vwd.save_new()
for f, p in generatemapper.items():
if p.type == "File":
- script_parameters["task.vwd"][p.target] = self.pathmapper.mapper(f).target
+ script_parameters["task.vwd"][p.target] = p.resolved
if p.type == "CreateFile":
script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 0dc80fc..fb4ae5a 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -140,3 +140,17 @@ class ArvPathMapper(PathMapper):
return (target, "keep:" + target[len(self.keepdir)+1:])
else:
return super(ArvPathMapper, self).reversemap(target)
+
+class InitialWorkDirPathMapper(PathMapper):
+ def setup(self, referenced_files, basedir):
+ # type: (List[Any], unicode) -> None
+
+ # Go through each file and set the target to its own directory along
+ # with any secondary files.
+ stagedir = self.stagedir
+ for fob in referenced_files:
+ self.visit(fob, stagedir, basedir)
+
+ for path, (ab, tgt, type) in self._pathmap.items():
+ if type in ("File", "Directory") and ab.startswith("keep:"):
+ self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type)
commit 7a041a2b0068e02bbb9743407d915522a1e2d1dc
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 16:40:24 2016 -0400
9570: Call normalizeFilesDirs to make sure basename is there.
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index fbe5879..d7d5d2b 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -8,7 +8,7 @@ import re
import cwltool.draft2tool
from cwltool.draft2tool import CommandLineTool
import cwltool.workflow
-from cwltool.process import get_feature, scandeps, UnsupportedRequirement
+from cwltool.process import get_feature, scandeps, UnsupportedRequirement, normalizeFilesDirs
from cwltool.load_tool import fetch_document
from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
@@ -74,6 +74,9 @@ class Runner(object):
adjustDirObjs(sc, partial(visitFiles, workflowfiles))
adjustDirObjs(self.job_order, partial(visitFiles, jobfiles))
+ normalizeFilesDirs(jobfiles)
+ normalizeFilesDirs(workflowfiles)
+
keepprefix = kwargs.get("keepprefix", "")
workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
keepprefix+"%s",
commit 133c1473a92a3f4d5d49c0b4e94a4656f913a3e8
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 16:34:19 2016 -0400
9570: Just add keep: to input keep locators in cwl-runner.
diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
index 4b6ff44..2a1873a 100755
--- a/crunch_scripts/cwl-runner
+++ b/crunch_scripts/cwl-runner
@@ -32,20 +32,20 @@ try:
def keeppath(v):
if arvados.util.keep_locator_pattern.match(v):
- return "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], v)
+ return "keep:%s" % v
else:
return v
def keeppathObj(v):
v["location"] = keeppath(v["location"])
- job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
+ job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])
for k,v in job_order_object.items():
if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
job_order_object[k] = {
"class": "File",
- "path": keeppath(v)
+ "location": "keep:%s" % v
}
adjustFileObjs(job_order_object, keeppathObj)
commit 4e78269414737d1e693dad494e796ef46de1ea8a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 15:40:05 2016 -0400
9570: Adjust Directory objects, too.
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 1b5d8a4..fbe5879 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -10,7 +10,7 @@ from cwltool.draft2tool import CommandLineTool
import cwltool.workflow
from cwltool.process import get_feature, scandeps, UnsupportedRequirement
from cwltool.load_tool import fetch_document
-from cwltool.pathmapper import adjustFileObjs
+from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
import arvados.collection
@@ -71,6 +71,8 @@ class Runner(object):
loadref)
adjustFileObjs(sc, partial(visitFiles, workflowfiles))
adjustFileObjs(self.job_order, partial(visitFiles, jobfiles))
+ adjustDirObjs(sc, partial(visitFiles, workflowfiles))
+ adjustDirObjs(self.job_order, partial(visitFiles, jobfiles))
keepprefix = kwargs.get("keepprefix", "")
workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
@@ -88,6 +90,7 @@ class Runner(object):
def setloc(p):
p["location"] = jobmapper.mapper(p["location"])[1]
adjustFileObjs(self.job_order, setloc)
+ adjustDirObjs(self.job_order, setloc)
if "id" in self.job_order:
del self.job_order["id"]
@@ -120,6 +123,7 @@ class Runner(object):
if not path.startswith("keep:"):
fileobj["location"] = "keep:%s/%s" % (record["output"], path)
adjustFileObjs(outputs, keepify)
+ adjustDirObjs(outputs, keepify)
except Exception as e:
logger.error("While getting final output object: %s", e)
self.arvrunner.output_callback(outputs, processStatus)
commit c211862a0f01b5073f59621ea3fe395d7f53390c
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 15:01:39 2016 -0400
9570: Remove spurious print statement.
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 661a9e0..d2d38b0 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -17,7 +17,6 @@ class CollectionFsAccess(cwltool.process.StdFsAccess):
self.collections = {}
def get_collection(self, path):
- print "PPP", path
p = path.split("/")
if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
pdh = p[0][5:]
commit 5605f8a67514037f5e7ca0e7780b23d6772317e4
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 14:54:18 2016 -0400
9570: Bump cwltool dependency
diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index b015dc7..1763ff3 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -467,7 +467,7 @@ fpm_build schema_salad "" "" python 1.14.20160708181155
fpm_build ruamel.yaml "" "" python 0.11.11 --python-setup-py-arguments "--single-version-externally-managed"
# And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20160712154127
+fpm_build cwltool "" "" python 1.0.20160714182449
# FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
fpm_build rdflib-jsonld "" "" python 0.3.0
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 667d7ac..36ad22b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -32,7 +32,7 @@ setup(name='arvados-cwl-runner',
# Make sure to update arvados/build/run-build-packages.sh as well
# when updating the cwltool version pin.
install_requires=[
- 'cwltool==1.0.20160712154127',
+ 'cwltool==1.0.20160714182449',
'arvados-python-client>=0.1.20160322001610'
],
data_files=[
commit ec2af1a4d13bbbdeedb83295d6177f7317cd8302
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jul 14 13:57:09 2016 -0400
9570: Update cwl-runner for latest cwltool. Ensure use_container,
tmpdir_prefix and on_error are set.
diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
index c786fc1..4b6ff44 100755
--- a/crunch_scripts/cwl-runner
+++ b/crunch_scripts/cwl-runner
@@ -19,7 +19,7 @@ import os
import json
import argparse
from arvados.api import OrderedJsonModel
-from cwltool.process import adjustFiles
+from cwltool.process import adjustFileObjs
from cwltool.load_tool import load_tool
# Print package versions
@@ -36,6 +36,9 @@ try:
else:
return v
+ def keeppathObj(v):
+ v["location"] = keeppath(v["location"])
+
job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
for k,v in job_order_object.items():
@@ -45,7 +48,7 @@ try:
"path": keeppath(v)
}
- adjustFiles(job_order_object, keeppath)
+ adjustFileObjs(job_order_object, keeppathObj)
runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
@@ -63,15 +66,16 @@ try:
outputObj = runner.arvExecutor(t, job_order_object, **vars(args))
files = {}
- def capture(path):
+ def capture(fileobj):
+ path = fileobj["location"]
sp = path.split("/")
col = sp[0][5:]
if col not in files:
files[col] = set()
files[col].add("/".join(sp[1:]))
- return path
+ fileobj["location"] = path
- adjustFiles(outputObj, capture)
+ adjustFileObjs(outputObj, capture)
final = arvados.collection.Collection()
@@ -80,10 +84,10 @@ try:
for f in c:
final.copy(f, f, c, True)
- def makeRelative(path):
- return "/".join(path.split("/")[1:])
+ def makeRelative(fileobj):
+ fileobj["location"] = "/".join(fileobj["location"].split("/")[1:])
- adjustFiles(outputObj, makeRelative)
+ adjustFileObjs(outputObj, makeRelative)
with final.open("cwl.output.json", "w") as f:
json.dump(outputObj, f, indent=4)
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 847da90..61736cc 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -123,6 +123,9 @@ class ArvCwlRunner(object):
kwargs["fs_access"] = self.fs_access
kwargs["enable_reuse"] = kwargs.get("enable_reuse")
+ kwargs["use_container"] = True
+ kwargs["tmpdir_prefix"] = "tmp"
+ kwargs["on_error"] = "continue"
if self.work_api == "containers":
kwargs["outdir"] = "/var/spool/cwl"
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 19cb7ea..1b5d8a4 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -115,12 +115,11 @@ class Runner(object):
outc = arvados.collection.Collection(record["output"])
with outc.open("cwl.output.json") as f:
outputs = json.load(f)
- def keepify(path):
+ def keepify(fileobj):
+ path = fileobj["location"]
if not path.startswith("keep:"):
- return "keep:%s/%s" % (record["output"], path)
- else:
- return path
- adjustFiles(outputs, keepify)
+ fileobj["location"] = "keep:%s/%s" % (record["output"], path)
+ adjustFileObjs(outputs, keepify)
except Exception as e:
logger.error("While getting final output object: %s", e)
self.arvrunner.output_callback(outputs, processStatus)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list