[ARVADOS] created: 807125b8423058d336165f069bf5d618f77845b7
Git user
git at public.curoverse.com
Thu Oct 13 08:36:10 EDT 2016
at 807125b8423058d336165f069bf5d618f77845b7 (commit)
commit 807125b8423058d336165f069bf5d618f77845b7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Oct 12 11:51:03 2016 -0400
10221: Don't depend on st.keepref from arvados.commands.run.statfile and uploadfile, use file_pattern to construct predictable keep reference.
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 288397a..73c81ce 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -37,11 +37,11 @@ class ArvPathMapper(PathMapper):
# Local FS ref, may need to be uploaded or may be on keep
# mount.
ab = abspath(src, self.input_basedir)
- st = arvados.commands.run.statfile("", ab, fnPattern=self.file_pattern)
+ st = arvados.commands.run.statfile("", ab, fnPattern="keep:%s/%s")
if isinstance(st, arvados.commands.run.UploadFile):
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
- self._pathmap[src] = MapperEnt("keep:"+st.keepref, st.fn, "File")
+ self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
elif src.startswith("_:"):
if "contents" in srcobj:
pass
@@ -91,12 +91,12 @@ class ArvPathMapper(PathMapper):
self.arvrunner.api,
dry_run=False,
num_retries=self.arvrunner.num_retries,
- fnPattern=self.file_pattern,
+ fnPattern="keep:%s/%s",
name=self.name,
project=self.arvrunner.project_uuid)
for src, ab, st in uploadfiles:
- self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
+ self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
self.arvrunner.add_uploaded(src, self._pathmap[src])
for srcobj in referenced_files:
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
index 065904d..7e13066 100644
--- a/sdk/cwl/tests/test_pathmapper.py
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -18,9 +18,7 @@ from arvados_cwl.pathmapper import ArvPathMapper
def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
pdh = "99999999999999999999999999999991+99"
for c in files:
- c.fn = os.path.basename(c.fn)
- c.keepref = "%s/%s" % (pdh, c.fn)
- c.fn = fnPattern % (pdh, c.fn)
+ c.fn = fnPattern % (pdh, os.path.basename(c.fn))
class TestPathmap(unittest.TestCase):
def test_keepref(self):
@@ -79,7 +77,6 @@ class TestPathmap(unittest.TestCase):
# keep mount, so we can construct a direct reference directly without upload.
def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
- st.keepref = "99999999999999999999999999999991+99/hw.py"
return st
upl.side_effect = upload_mock
commit 84f161dfc3229ad3a849fd5e152f254359a56252
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Oct 12 11:27:37 2016 -0400
10221: Add path mapper tests direct keep references, uploaded files, and keep mounted files.
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 5051d97..288397a 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -41,7 +41,7 @@ class ArvPathMapper(PathMapper):
if isinstance(st, arvados.commands.run.UploadFile):
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
- self._pathmap[src] = MapperEnt("keep:"+ab, self.collection_pattern % ab, "File")
+ self._pathmap[src] = MapperEnt("keep:"+st.keepref, st.fn, "File")
elif src.startswith("_:"):
if "contents" in srcobj:
pass
@@ -96,7 +96,7 @@ class ArvPathMapper(PathMapper):
project=self.arvrunner.project_uuid)
for src, ab, st in uploadfiles:
- self._pathmap[src] = MapperEnt("keep:" + st.keepref, self.collection_pattern % st.keepref, "File")
+ self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
self.arvrunner.add_uploaded(src, self._pathmap[src])
for srcobj in referenced_files:
diff --git a/sdk/cwl/tests/hw.py b/sdk/cwl/tests/hw.py
new file mode 100644
index 0000000..62c813a
--- /dev/null
+++ b/sdk/cwl/tests/hw.py
@@ -0,0 +1 @@
+print "Hello world"
diff --git a/sdk/cwl/tests/test_pathmapper.py b/sdk/cwl/tests/test_pathmapper.py
new file mode 100644
index 0000000..065904d
--- /dev/null
+++ b/sdk/cwl/tests/test_pathmapper.py
@@ -0,0 +1,94 @@
+import functools
+import mock
+import sys
+import unittest
+import json
+import logging
+import os
+
+import arvados
+import arvados.keep
+import arvados.collection
+import arvados_cwl
+
+from cwltool.pathmapper import MapperEnt
+
+from arvados_cwl.pathmapper import ArvPathMapper
+
+def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
+ pdh = "99999999999999999999999999999991+99"
+ for c in files:
+ c.fn = os.path.basename(c.fn)
+ c.keepref = "%s/%s" % (pdh, c.fn)
+ c.fn = fnPattern % (pdh, c.fn)
+
+class TestPathmap(unittest.TestCase):
+ def test_keepref(self):
+ """Test direct keep references."""
+
+ arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+ p = ArvPathMapper(arvrunner, [{
+ "class": "File",
+ "location": "keep:99999999999999999999999999999991+99/hw.py"
+ }], "", "/test/%s", "/test/%s/%s")
+
+ self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+ p._pathmap)
+
+ @mock.patch("arvados.commands.run.uploadfiles")
+ def test_upload(self, upl):
+ """Test pathmapper uploading files."""
+
+ arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+ upl.side_effect = upload_mock
+
+ p = ArvPathMapper(arvrunner, [{
+ "class": "File",
+ "location": "tests/hw.py"
+ }], "", "/test/%s", "/test/%s/%s")
+
+ self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+ p._pathmap)
+
+ @mock.patch("arvados.commands.run.uploadfiles")
+ def test_prev_uploaded(self, upl):
+ """Test pathmapper handling previously uploaded files."""
+
+ arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+ arvrunner.add_uploaded('tests/hw.py', MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File'))
+
+ upl.side_effect = upload_mock
+
+ p = ArvPathMapper(arvrunner, [{
+ "class": "File",
+ "location": "tests/hw.py"
+ }], "", "/test/%s", "/test/%s/%s")
+
+ self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+ p._pathmap)
+
+ @mock.patch("arvados.commands.run.uploadfiles")
+ @mock.patch("arvados.commands.run.statfile")
+ def test_statfile(self, statfile, upl):
+ """Test pathmapper handling ArvFile references."""
+ arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+
+ # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
+ # keep mount, so we can construct a direct reference directly without upload.
+ def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
+ st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
+ st.keepref = "99999999999999999999999999999991+99/hw.py"
+ return st
+
+ upl.side_effect = upload_mock
+ statfile.side_effect = statfile_mock
+
+ p = ArvPathMapper(arvrunner, [{
+ "class": "File",
+ "location": "tests/hw.py"
+ }], "", "/test/%s", "/test/%s/%s")
+
+ self.assertEqual({'tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File')},
+ p._pathmap)
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 6674efb..007f809 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -60,7 +60,13 @@ def stubs(func):
"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz5",
"portable_data_hash": "99999999999999999999999999999995+99",
"manifest_text": ""
- } )
+ },
+ {
+ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz6",
+ "portable_data_hash": "99999999999999999999999999999996+99",
+ "manifest_text": ""
+ }
+ )
stubs.api.collections().get().execute.return_value = {
"portable_data_hash": "99999999999999999999999999999993+99", "manifest_text": "./tool 00000000000000000000000000000000+0 0:0:submit_tool.cwl 0:0:blub.txt"}
commit 022095999a584b12789259b577965122bb676194
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Oct 12 10:59:05 2016 -0400
10221: Always upload all dependencies up front. Ensures consistent reuse
behavior between --local and --submit. Fix pathmapping bugs where
previously-uploaded files were added to path map but not actually mapped to
target paths.
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index c90f890..c8ba5a4 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -24,7 +24,7 @@ import arvados.config
from .arvcontainer import ArvadosContainer, RunnerContainer
from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
-from. runner import Runner
+from. runner import Runner, upload_instance
from .arvtool import ArvadosCommandTool
from .arvworkflow import ArvadosWorkflow, upload_workflow
from .fsaccess import CollectionFsAccess
@@ -64,6 +64,8 @@ class ArvCwlRunner(object):
self.pipeline = None
self.final_output_collection = None
self.output_name = output_name
+ self.project_uuid = None
+
if keep_client is not None:
self.keep_client = keep_client
else:
@@ -266,6 +268,8 @@ class ArvCwlRunner(object):
kwargs["docker_outdir"] = "$(task.outdir)"
kwargs["tmpdir"] = "$(task.tmpdir)"
+ upload_instance(self, shortname(tool.tool["id"]), tool, job_order)
+
runnerjob = None
if kwargs.get("submit"):
if self.work_api == "containers":
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 228d433..5051d97 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -41,7 +41,7 @@ class ArvPathMapper(PathMapper):
if isinstance(st, arvados.commands.run.UploadFile):
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
- self._pathmap[src] = MapperEnt(ab, st.fn, "File")
+ self._pathmap[src] = MapperEnt("keep:"+ab, self.collection_pattern % ab, "File")
elif src.startswith("_:"):
if "contents" in srcobj:
pass
@@ -78,9 +78,11 @@ class ArvPathMapper(PathMapper):
def setup(self, referenced_files, basedir):
# type: (List[Any], unicode) -> None
- self._pathmap = self.arvrunner.get_uploaded()
uploadfiles = set()
+ for k,v in self.arvrunner.get_uploaded().iteritems():
+ self._pathmap[k] = MapperEnt(v.resolved, self.collection_pattern % v.resolved[5:], "File")
+
for srcobj in referenced_files:
self.visit(srcobj, uploadfiles)
@@ -94,7 +96,7 @@ class ArvPathMapper(PathMapper):
project=self.arvrunner.project_uuid)
for src, ab, st in uploadfiles:
- self._pathmap[src] = MapperEnt("keep:" + st.keepref, st.fn, "File")
+ self._pathmap[src] = MapperEnt("keep:" + st.keepref, self.collection_pattern % st.keepref, "File")
self.arvrunner.add_uploaded(src, self._pathmap[src])
for srcobj in referenced_files:
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index e5b4e00..054d353 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -112,6 +112,30 @@ def upload_docker(arvrunner, tool):
for s in tool.steps:
upload_docker(arvrunner, s.embedded_tool)
+def upload_instance(arvrunner, name, tool, job_order):
+ upload_docker(arvrunner, tool)
+
+ workflowmapper = upload_dependencies(arvrunner,
+ name,
+ tool.doc_loader,
+ tool.tool,
+ tool.tool["id"],
+ True)
+
+ jobmapper = upload_dependencies(arvrunner,
+ os.path.basename(job_order.get("id", "#")),
+ tool.doc_loader,
+ job_order,
+ job_order.get("id", "#"),
+ False)
+
+ adjustDirObjs(job_order, trim_listing)
+
+ if "id" in job_order:
+ del job_order["id"]
+
+ return workflowmapper
+
class Runner(object):
def __init__(self, runner, tool, job_order, enable_reuse, output_name):
@@ -128,31 +152,8 @@ class Runner(object):
pass
def arvados_job_spec(self, *args, **kwargs):
- upload_docker(self.arvrunner, self.tool)
-
self.name = os.path.basename(self.tool.tool["id"])
-
- workflowmapper = upload_dependencies(self.arvrunner,
- self.name,
- self.tool.doc_loader,
- self.tool.tool,
- self.tool.tool["id"],
- True)
-
- jobmapper = upload_dependencies(self.arvrunner,
- os.path.basename(self.job_order.get("id", "#")),
- self.tool.doc_loader,
- self.job_order,
- self.job_order.get("id", "#"),
- False)
-
- adjustDirObjs(self.job_order, trim_listing)
-
- if "id" in self.job_order:
- del self.job_order["id"]
-
- return workflowmapper
-
+ return upload_instance(self.arvrunner, self.name, self.tool, self.job_order)
def done(self, record):
if record["state"] == "Complete":
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list