[ARVADOS] created: 1.1.4-326-g57e511e
Git user
git at public.curoverse.com
Fri May 25 10:30:20 EDT 2018
at 57e511e900aa1eb175fa7f308b09516ad83492b4 (commit)
commit 57e511e900aa1eb175fa7f308b09516ad83492b4
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Fri May 25 10:16:07 2018 -0400
11162: Add test for --submit-request-uuid
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index ce20aba..f8b557f 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1255,6 +1255,31 @@ class TestSubmit(unittest.TestCase):
self.assertEqual(capture_stdout.getvalue(),
stubs.expect_container_request_uuid + '\n')
+ @stubs
+ def test_submit_request_uuid(self, stubs):
+ stubs.expect_container_request_uuid = "zzzzz-xvhdp-yyyyyyyyyyyyyyy"
+
+ stubs.api.container_requests().update().execute.return_value = {
+ "uuid": stubs.expect_container_request_uuid,
+ "container_uuid": "zzzzz-dz642-zzzzzzzzzzzzzzz",
+ "state": "Queued"
+ }
+
+ capture_stdout = cStringIO.StringIO()
+ try:
+ exited = arvados_cwl.main(
+ ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-request-uuid=zzzzz-xvhdp-yyyyyyyyyyyyyyy",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+ self.assertEqual(exited, 0)
+ except:
+ logging.exception("")
+
+ stubs.api.container_requests().update.assert_called_with(
+ uuid="zzzzz-xvhdp-yyyyyyyyyyyyyyy", body=JsonDiffMatcher(stubs.expect_container_spec))
+ self.assertEqual(capture_stdout.getvalue(),
+ stubs.expect_container_request_uuid + '\n')
+
class TestCreateTemplate(unittest.TestCase):
existing_template_uuid = "zzzzz-d1hrv-validworkfloyml"
commit e4ea2e1dd1cb597b02d15bd7b9323705d6342f99
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Fri May 25 09:55:13 2018 -0400
11162: Fix tests.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/arvtool.py b/sdk/cwl/arvados_cwl/arvtool.py
index fea6adf..2d15610 100644
--- a/sdk/cwl/arvados_cwl/arvtool.py
+++ b/sdk/cwl/arvados_cwl/arvtool.py
@@ -12,6 +12,8 @@ class ArvadosCommandTool(CommandLineTool):
def __init__(self, arvrunner, toolpath_object, **kwargs):
super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+ if "cwlVersion" not in self.metadata:
+ raise Exception("missing")
self.arvrunner = arvrunner
self.work_api = kwargs["work_api"]
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 6f731fd..f675fb1 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -277,6 +277,7 @@ class ArvadosWorkflow(Workflow):
})
kwargs["loader"] = self.doc_loader
kwargs["avsc_names"] = self.doc_schema
+ kwargs["metadata"] = self.metadata
return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder_resolved, output_callback, **kwargs)
else:
return super(ArvadosWorkflow, self).job(joborder, output_callback, **kwargs)
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index ea77786..1ee1607 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -18,7 +18,7 @@ def my_parsedate(text):
if parsed:
return datetime.datetime(*parsed[:6])
else:
- datetime.datetime(1970, 1, 1)
+ return datetime.datetime(1970, 1, 1)
def fresh_cache(url, properties):
pr = properties[url]
@@ -89,7 +89,7 @@ def http_to_keep(api, project_uuid, url):
req = requests.get(url, stream=True, allow_redirects=True)
if req.status_code != 200:
- raise Exception("Failed to download '%s' got status %s " % (req.status_code, url))
+ raise Exception("Failed to download '%s' got status %s " % (url, req.status_code))
remember_headers(url, properties, req.headers)
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 35610e2..625f275 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -33,7 +33,7 @@ setup(name='arvados-cwl-runner',
# Note that arvados/build/run-build-packages.sh looks at this
# file to determine what version of cwltool and schema-salad to build.
install_requires=[
- 'cwltool==1.0.20180523203033',
+ 'cwltool==1.0.20180524215209',
'schema-salad==2.7.20180501211602',
'typing >= 3.5.3',
'ruamel.yaml >=0.13.11, <0.15',
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index 522946a..2295e93 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -53,7 +53,8 @@ class TestContainer(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
- basedir="", make_fs_access=make_fs_access, loader=Loader({}))
+ basedir="", make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse),
make_fs_access=make_fs_access, tmpdir="/tmp"):
@@ -139,7 +140,7 @@ class TestContainer(unittest.TestCase):
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
avsc_names=avsc_names, make_fs_access=make_fs_access,
- loader=Loader({}))
+ loader=Loader({}), metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
make_fs_access=make_fs_access, tmpdir="/tmp"):
@@ -251,7 +252,7 @@ class TestContainer(unittest.TestCase):
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
avsc_names=avsc_names, make_fs_access=make_fs_access,
- loader=Loader({}))
+ loader=Loader({}), metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_initial_work_dir",
make_fs_access=make_fs_access, tmpdir="/tmp"):
@@ -352,7 +353,8 @@ class TestContainer(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
- basedir="", make_fs_access=make_fs_access, loader=Loader({}))
+ basedir="", make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_redirect",
make_fs_access=make_fs_access, tmpdir="/tmp"):
@@ -477,7 +479,8 @@ class TestContainer(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
- basedir="", make_fs_access=make_fs_access, loader=Loader({}))
+ basedir="", make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
job_order = {
"p1": {
@@ -584,7 +587,8 @@ class TestContainer(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
- basedir="", make_fs_access=make_fs_access, loader=Loader({}))
+ basedir="", make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
job_order = {"pw": "blorp"}
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py
index abf9694..30930dd 100644
--- a/sdk/cwl/tests/test_job.py
+++ b/sdk/cwl/tests/test_job.py
@@ -59,7 +59,8 @@ class TestJob(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names,
- basedir="", make_fs_access=make_fs_access, loader=Loader({}))
+ basedir="", make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run(enable_reuse=enable_reuse)
@@ -150,7 +151,8 @@ class TestJob(unittest.TestCase):
make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names,
- make_fs_access=make_fs_access, loader=Loader({}))
+ make_fs_access=make_fs_access, loader=Loader({}),
+ metadata={"cwlVersion": "v1.0"})
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run(enable_reuse=True)
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 77bef07..ce20aba 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -234,7 +234,6 @@ def stubs(func):
},
'secret_mounts': {},
'state': 'Committed',
- 'owner_uuid': None,
'command': ['arvados-cwl-runner', '--local', '--api=containers',
'--no-log-timestamps', '--disable-validate',
'--eval-timeout=20', '--thread-count=4',
@@ -751,7 +750,6 @@ class TestSubmit(unittest.TestCase):
'kind': 'json'
}
}, 'state': 'Committed',
- 'owner_uuid': None,
'output_path': '/var/spool/cwl',
'name': 'expect_arvworkflow.cwl#main',
'container_image': 'arvados/jobs:'+arvados_cwl.__version__,
@@ -870,7 +868,6 @@ class TestSubmit(unittest.TestCase):
'kind': 'json'
}
}, 'state': 'Committed',
- 'owner_uuid': None,
'output_path': '/var/spool/cwl',
'name': 'a test workflow',
'container_image': 'arvados/jobs:'+arvados_cwl.__version__,
@@ -1236,7 +1233,6 @@ class TestSubmit(unittest.TestCase):
},
"name": "secret_wf.cwl",
"output_path": "/var/spool/cwl",
- "owner_uuid": None,
"priority": 500,
"properties": {},
"runtime_constraints": {
commit ddac86f6bae74398f58db75e383f5945c323a099
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed May 23 16:56:40 2018 -0400
11162: Bump cwltool for fix in generating basename from url
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 05ff8a2..35610e2 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -33,7 +33,7 @@ setup(name='arvados-cwl-runner',
# Note that arvados/build/run-build-packages.sh looks at this
# file to determine what version of cwltool and schema-salad to build.
install_requires=[
- 'cwltool==1.0.20180522135731',
+ 'cwltool==1.0.20180523203033',
'schema-salad==2.7.20180501211602',
'typing >= 3.5.3',
'ruamel.yaml >=0.13.11, <0.15',
commit b3f04be24d0d2c75808dccd9e5923ee25371cb17
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed May 23 15:23:44 2018 -0400
11162: Smarter http downloads.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index ab59ad3..ea77786 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -14,7 +14,11 @@ def my_formatdate(dt):
return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
def my_parsedate(text):
- return datetime.datetime(*email.utils.parsedate(text)[:6])
+ parsed = email.utils.parsedate(text)
+ if parsed:
+ return datetime.datetime(*parsed[:6])
+ else:
+ datetime.datetime(1970, 1, 1)
def fresh_cache(url, properties):
pr = properties[url]
@@ -53,7 +57,7 @@ def remember_headers(url, properties, headers):
def changed(url, properties):
- req = requests.head(url)
+ req = requests.head(url, allow_redirects=True)
remember_headers(url, properties, req.headers)
if req.status_code != 200:
@@ -67,21 +71,22 @@ def changed(url, properties):
def http_to_keep(api, project_uuid, url):
r = api.collections().list(filters=[["properties", "exists", url]]).execute()
- name = urlparse.urlparse(url).path.split("/")[-1]
for item in r["items"]:
properties = item["properties"]
if fresh_cache(url, properties):
# Do nothing
- return "keep:%s/%s" % (item["portable_data_hash"], name)
+ cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+ return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
if not changed(url, properties):
# ETag didn't change, same content, just update headers
api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
- return "keep:%s/%s" % (item["portable_data_hash"], name)
+ cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+ return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
properties = {}
- req = requests.get(url, stream=True)
+ req = requests.get(url, stream=True, allow_redirects=True)
if req.status_code != 200:
raise Exception("Failed to download '%s' got status %s " % (req.status_code, url))
@@ -92,6 +97,15 @@ def http_to_keep(api, project_uuid, url):
c = arvados.collection.Collection()
+ if req.headers.get("Content-Disposition"):
+ grp = re.search(r'filename=("((\"|[^"])+)"|([^][()<>@,;:\"/?={} ]+))', req.headers["Content-Disposition"])
+ if grp.groups(2):
+ name = grp.groups(2)
+ else:
+ name = grp.groups(3)
+ else:
+ name = urlparse.urlparse(url).path.split("/")[-1]
+
count = 0
start = time.time()
checkpoint = start
commit 6e17218f2fc39a828af4d3a19bdb29243739dafd
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed May 23 13:17:40 2018 -0400
11162: Add logging about checking cache
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index af9bf04..ab59ad3 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -20,6 +20,8 @@ def fresh_cache(url, properties):
pr = properties[url]
expires = None
+ logger.debug("Checking cache freshness for %s using %s", url, pr)
+
if "Cache-Control" in pr:
if re.match(r"immutable", pr["Cache-Control"]):
return True
commit 708a9f8e858f0791702083e3f429fcb3eea5af15
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed May 23 12:52:22 2018 -0400
11162: Set a default cache time of 24 hours for files fetched over http.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index 064aa8d..af9bf04 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -31,6 +31,11 @@ def fresh_cache(url, properties):
if expires is None and "Expires" in pr:
expires = my_parsedate(pr["Expires"])
+ if expires is None:
+ # Use a default cache time of 24 hours if upstream didn't set
+ # any cache headers, to reduce redundant downloads.
+ expires = my_parsedate(pr["Date"]) + datetime.timedelta(hours=24)
+
if not expires:
return False
commit a7af6b95953992a7e9554b56604cf8248d5a2bf4
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Tue May 22 20:45:58 2018 -0400
11162: Add progress for http data download.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index de7bfe6..064aa8d 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -6,6 +6,9 @@ import re
import arvados
import arvados.collection
import urlparse
+import logging
+
+logger = logging.getLogger('arvados.cwl-runner')
def my_formatdate(dt):
return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
@@ -35,7 +38,7 @@ def fresh_cache(url, properties):
def remember_headers(url, properties, headers):
properties.setdefault(url, {})
- for h in ("Cache-Control", "ETag", "Expires", "Date"):
+ for h in ("Cache-Control", "ETag", "Expires", "Date", "Content-Length"):
if h in headers:
properties[url][h] = headers[h]
if "Date" not in headers:
@@ -74,17 +77,31 @@ def http_to_keep(api, project_uuid, url):
req = requests.get(url, stream=True)
if req.status_code != 200:
- raise Exception("Got status %s" % req.status_code)
+ raise Exception("Failed to download '%s' got status %s " % (req.status_code, url))
remember_headers(url, properties, req.headers)
+ logger.info("Downloading %s (%s bytes)", url, properties[url]["Content-Length"])
+
c = arvados.collection.Collection()
+ count = 0
+ start = time.time()
+ checkpoint = start
with c.open(name, "w") as f:
- for chunk in req.iter_content(chunk_size=128):
+ for chunk in req.iter_content(chunk_size=1024):
+ count += len(chunk)
f.write(chunk)
-
- c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid)
+ now = time.time()
+ if (now - checkpoint) > 20:
+ bps = (float(count)/float(now - start))
+ logger.info("%2.1f%% complete, %3.2f MiB/s, %1.0f seconds left",
+ float(count * 100) / float(properties[url]["Content-Length"]),
+ bps/(1024*1024),
+ (int(properties[url]["Content-Length"])-count)/bps)
+ checkpoint = now
+
+ c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)
api.collections().update(uuid=c.manifest_locator(), body={"collection":{"properties": properties}}).execute()
commit 9a52f3d07288f8a0073736a2a273ddaeb97ddc18
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Tue May 22 16:53:16 2018 -0400
11162: Add --submit-request-uuid
Allows container request to be created separately from initialization
that occurs prior to actually committing the container to run.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 4eda886..5c60f7d 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -401,6 +401,9 @@ class ArvCwlRunner(object):
if self.intermediate_output_ttl < 0:
raise Exception("Invalid value %d for --intermediate-output-ttl, cannot be less than zero" % self.intermediate_output_ttl)
+ if kwargs.get("submit_request_uuid") and self.work_api != "containers":
+ raise Exception("--submit-request-uuid requires containers API, but using '{}' api".format(self.work_api))
+
if not kwargs.get("name"):
kwargs["name"] = self.name = tool.tool.get("label") or tool.metadata.get("label") or os.path.basename(tool.tool["id"])
@@ -706,6 +709,10 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
default=None)
+ parser.add_argument("--submit-request-uuid", type=str,
+ default=None,
+ help="Update and commit supplied container request instead of creating a new one (containers API only).")
+
parser.add_argument("--name", type=str,
help="Name to use for workflow execution instance.",
default=None)
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 4e7811d..0bec692 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -51,7 +51,6 @@ class ArvadosContainer(object):
container_request = {
"command": self.command_line,
- "owner_uuid": self.arvrunner.project_uuid,
"name": self.name,
"output_path": self.outdir,
"cwd": self.outdir,
@@ -61,6 +60,9 @@ class ArvadosContainer(object):
}
runtime_constraints = {}
+ if self.arvrunner.project_uuid:
+ container_request["owner_uuid"] = self.arvrunner.project_uuid
+
if self.arvrunner.secret_store.has_secret(self.command_line):
raise WorkflowException("Secret material leaked on command line, only file literals may contain secrets")
@@ -251,9 +253,15 @@ class ArvadosContainer(object):
self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback)
try:
- response = self.arvrunner.api.container_requests().create(
- body=container_request
- ).execute(num_retries=self.arvrunner.num_retries)
+ if kwargs.get("submit_request_uuid"):
+ response = self.arvrunner.api.container_requests().update(
+ uuid=kwargs["submit_request_uuid"],
+ body=container_request
+ ).execute(num_retries=self.arvrunner.num_retries)
+ else:
+ response = self.arvrunner.api.container_requests().create(
+ body=container_request
+ ).execute(num_retries=self.arvrunner.num_retries)
self.uuid = response["uuid"]
self.arvrunner.process_submitted(self)
@@ -343,7 +351,6 @@ class RunnerContainer(Runner):
self.job_order[param] = {"$include": mnt}
container_req = {
- "owner_uuid": self.arvrunner.project_uuid,
"name": self.name,
"output_path": "/var/spool/cwl",
"cwd": "/var/spool/cwl",
@@ -442,11 +449,18 @@ class RunnerContainer(Runner):
def run(self, **kwargs):
kwargs["keepprefix"] = "keep:"
job_spec = self.arvados_job_spec(**kwargs)
- job_spec.setdefault("owner_uuid", self.arvrunner.project_uuid)
+ if self.arvrunner.project_uuid:
+ job_spec["owner_uuid"] = self.arvrunner.project_uuid
- response = self.arvrunner.api.container_requests().create(
- body=job_spec
- ).execute(num_retries=self.arvrunner.num_retries)
+ if kwargs.get("submit_request_uuid"):
+ response = self.arvrunner.api.container_requests().update(
+ uuid=kwargs["submit_request_uuid"],
+ body=job_spec
+ ).execute(num_retries=self.arvrunner.num_retries)
+ else:
+ response = self.arvrunner.api.container_requests().create(
+ body=job_spec
+ ).execute(num_retries=self.arvrunner.num_retries)
self.uuid = response["uuid"]
self.arvrunner.process_submitted(self)
commit 6853ac7e518c5c6a888c5f5bd4edb27bdb564a2c
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Mon May 14 12:02:51 2018 -0400
11162: Handle missing git
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_version.py b/sdk/cwl/arvados_version.py
index a24d53d..13e6d36 100644
--- a/sdk/cwl/arvados_version.py
+++ b/sdk/cwl/arvados_version.py
@@ -34,7 +34,7 @@ def get_version(setup_dir, module):
else:
try:
save_version(setup_dir, module, git_latest_tag() + git_timestamp_tag())
- except subprocess.CalledProcessError:
+ except (subprocess.CalledProcessError, OSError):
pass
return read_version(setup_dir, module)
commit 2600c9f3e0cbe1072c1bc4887aa5febe8906ddec
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Fri May 11 16:36:54 2018 -0400
11162: Support public http and https file references
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index d509f40..4eda886 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -37,7 +37,7 @@ import arvados.commands._util as arv_cmd
from .arvcontainer import ArvadosContainer, RunnerContainer
from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
-from. runner import Runner, upload_docker, upload_job_order, upload_workflow_deps, upload_dependencies
+from. runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
from .arvtool import ArvadosCommandTool
from .arvworkflow import ArvadosWorkflow, upload_workflow
from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
new file mode 100644
index 0000000..de7bfe6
--- /dev/null
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -0,0 +1,91 @@
+import requests
+import email.utils
+import time
+import datetime
+import re
+import arvados
+import arvados.collection
+import urlparse
+
+def my_formatdate(dt):
+ return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
+
+def my_parsedate(text):
+ return datetime.datetime(*email.utils.parsedate(text)[:6])
+
+def fresh_cache(url, properties):
+ pr = properties[url]
+ expires = None
+
+ if "Cache-Control" in pr:
+ if re.match(r"immutable", pr["Cache-Control"]):
+ return True
+
+ g = re.match(r"(s-maxage|max-age)=(\d+)", pr["Cache-Control"])
+ if g:
+ expires = my_parsedate(pr["Date"]) + datetime.timedelta(seconds=int(g.group(2)))
+
+ if expires is None and "Expires" in pr:
+ expires = my_parsedate(pr["Expires"])
+
+ if not expires:
+ return False
+
+ return (datetime.datetime.utcnow() < expires)
+
+def remember_headers(url, properties, headers):
+ properties.setdefault(url, {})
+ for h in ("Cache-Control", "ETag", "Expires", "Date"):
+ if h in headers:
+ properties[url][h] = headers[h]
+ if "Date" not in headers:
+ properties[url]["Date"] = my_formatdate(datetime.datetime.utcnow())
+
+
+def changed(url, properties):
+ req = requests.head(url)
+ remember_headers(url, properties, req.headers)
+
+ if req.status_code != 200:
+ raise Exception("Got status %s" % req.status_code)
+
+ pr = properties[url]
+ if "ETag" in pr and "ETag" in req.headers:
+ if pr["ETag"] == req.headers["ETag"]:
+ return False
+ return True
+
+def http_to_keep(api, project_uuid, url):
+ r = api.collections().list(filters=[["properties", "exists", url]]).execute()
+ name = urlparse.urlparse(url).path.split("/")[-1]
+
+ for item in r["items"]:
+ properties = item["properties"]
+ if fresh_cache(url, properties):
+ # Do nothing
+ return "keep:%s/%s" % (item["portable_data_hash"], name)
+
+ if not changed(url, properties):
+ # ETag didn't change, same content, just update headers
+ api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
+ return "keep:%s/%s" % (item["portable_data_hash"], name)
+
+ properties = {}
+ req = requests.get(url, stream=True)
+
+ if req.status_code != 200:
+ raise Exception("Got status %s" % req.status_code)
+
+ remember_headers(url, properties, req.headers)
+
+ c = arvados.collection.Collection()
+
+ with c.open(name, "w") as f:
+ for chunk in req.iter_content(chunk_size=128):
+ f.write(chunk)
+
+ c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid)
+
+ api.collections().update(uuid=c.manifest_locator(), body={"collection":{"properties": properties}}).execute()
+
+ return "keep:%s/%s" % (c.portable_data_hash(), name)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 6fedb12..bd4b528 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -16,6 +16,8 @@ from schema_salad.sourceline import SourceLine
from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs
from cwltool.workflow import WorkflowException
+from .http import http_to_keep
+
logger = logging.getLogger('arvados.cwl-runner')
def trim_listing(obj):
@@ -81,6 +83,10 @@ class ArvPathMapper(PathMapper):
raise WorkflowException("File literal '%s' is missing `contents`" % src)
if srcobj["class"] == "Directory" and "listing" not in srcobj:
raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
+ elif src.startswith("http:") or src.startswith("https:"):
+ keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
+ logger.info("%s is %s", src, keepref)
+ self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
else:
self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index 4df89ee..05ff8a2 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -35,7 +35,7 @@ setup(name='arvados-cwl-runner',
install_requires=[
'cwltool==1.0.20180522135731',
'schema-salad==2.7.20180501211602',
- 'typing==3.5.3.0',
+ 'typing >= 3.5.3',
'ruamel.yaml >=0.13.11, <0.15',
'arvados-python-client>=1.1.4.20180507184611',
'setuptools',
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list