[ARVADOS] updated: 8645888f12c25edaaac8e03fb5691cfcfbcdb9b2
Git user
git at public.curoverse.com
Mon May 22 16:14:33 EDT 2017
Summary of changes:
doc/user/cwl/cwl-extensions.html.textile.liquid | 17 ++-
sdk/cwl/arvados_cwl/__init__.py | 7 +-
sdk/cwl/arvados_cwl/arv-cwl-schema.yml | 30 +++++
sdk/cwl/arvados_cwl/arvcontainer.py | 16 ++-
sdk/cwl/tests/test_container.py | 159 +++++++++++++-----------
sdk/cwl/tests/test_submit.py | 24 ++++
6 files changed, 173 insertions(+), 80 deletions(-)
via 8645888f12c25edaaac8e03fb5691cfcfbcdb9b2 (commit)
via 43dbdec9f0f6361e4705671bbdade34cd18adefa (commit)
from dd58470216d6a416e4d831a3b9c25bfdaa255fff (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 8645888f12c25edaaac8e03fb5691cfcfbcdb9b2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon May 22 16:08:49 2017 -0400
11100: Update/add tests for --intermediate-output-ttl
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 8e32d2a..374f368 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -209,6 +209,7 @@ class ArvadosContainer(object):
self.output_callback({}, "permanentFail")
def done(self, record):
+ outputs = {}
try:
if self.output_ttl:
self.arvrunner.add_intermediate_output(record["output_uuid"])
@@ -238,7 +239,6 @@ class ArvadosContainer(object):
num_retries=self.arvrunner.num_retries)
done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self))
- outputs = {}
if container["output"]:
outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
except WorkflowException as e:
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index af05773..2492516 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -28,6 +28,7 @@ class TestContainer(unittest.TestCase):
runner = mock.MagicMock()
runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
runner.api.collections().get().execute.return_value = {
@@ -72,6 +73,7 @@ class TestContainer(unittest.TestCase):
'state': 'Committed',
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
'container_image': 'arvados/jobs',
'command': ['ls', '/var/spool/cwl'],
'cwd': '/var/spool/cwl',
@@ -87,6 +89,7 @@ class TestContainer(unittest.TestCase):
runner = mock.MagicMock()
runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 3600
document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
@@ -110,6 +113,9 @@ class TestContainer(unittest.TestCase):
}, {
"class": "http://arvados.org/cwl#PartitionRequirement",
"partition": "blurb"
+ }, {
+ "class": "http://arvados.org/cwl#IntermediateOutput",
+ "outputTTL": 7200
}],
"baseCommand": "ls"
})
@@ -126,35 +132,36 @@ class TestContainer(unittest.TestCase):
call_args, call_kwargs = runner.api.container_requests().create.call_args
call_body_expected = {
- 'environment': {
- 'HOME': '/var/spool/cwl',
- 'TMPDIR': '/tmp'
- },
- 'name': 'test_resource_requirements',
- 'runtime_constraints': {
- 'vcpus': 3,
- 'ram': 3145728000,
- 'keep_cache_ram': 536870912,
- 'API': True
- },
- 'use_existing': True,
- 'priority': 1,
- 'mounts': {
- '/tmp': {'kind': 'tmp',
- "capacity": 4194304000 },
- '/var/spool/cwl': {'kind': 'tmp',
- "capacity": 5242880000 }
- },
- 'state': 'Committed',
- 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
- 'output_path': '/var/spool/cwl',
- 'container_image': 'arvados/jobs',
- 'command': ['ls'],
- 'cwd': '/var/spool/cwl',
- 'scheduling_parameters': {
- 'partitions': ['blurb']
- },
- 'properties': {}
+ 'environment': {
+ 'HOME': '/var/spool/cwl',
+ 'TMPDIR': '/tmp'
+ },
+ 'name': 'test_resource_requirements',
+ 'runtime_constraints': {
+ 'vcpus': 3,
+ 'ram': 3145728000,
+ 'keep_cache_ram': 536870912,
+ 'API': True
+ },
+ 'use_existing': True,
+ 'priority': 1,
+ 'mounts': {
+ '/tmp': {'kind': 'tmp',
+ "capacity": 4194304000 },
+ '/var/spool/cwl': {'kind': 'tmp',
+ "capacity": 5242880000 }
+ },
+ 'state': 'Committed',
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+ 'output_path': '/var/spool/cwl',
+ 'output_ttl': 7200,
+ 'container_image': 'arvados/jobs',
+ 'command': ['ls'],
+ 'cwd': '/var/spool/cwl',
+ 'scheduling_parameters': {
+ 'partitions': ['blurb']
+ },
+ 'properties': {}
}
call_body = call_kwargs.get('body', None)
@@ -172,6 +179,7 @@ class TestContainer(unittest.TestCase):
runner = mock.MagicMock()
runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
@@ -236,52 +244,53 @@ class TestContainer(unittest.TestCase):
vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])
call_body_expected = {
- 'environment': {
- 'HOME': '/var/spool/cwl',
- 'TMPDIR': '/tmp'
+ 'environment': {
+ 'HOME': '/var/spool/cwl',
+ 'TMPDIR': '/tmp'
+ },
+ 'name': 'test_initial_work_dir',
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 1073741824
+ },
+ 'use_existing': True,
+ 'priority': 1,
+ 'mounts': {
+ '/tmp': {'kind': 'tmp',
+ "capacity": 1073741824 },
+ '/var/spool/cwl': {'kind': 'tmp',
+ "capacity": 1073741824 },
+ '/var/spool/cwl/foo': {
+ 'kind': 'collection',
+ 'path': 'foo',
+ 'portable_data_hash': '99999999999999999999999999999996+99'
},
- 'name': 'test_initial_work_dir',
- 'runtime_constraints': {
- 'vcpus': 1,
- 'ram': 1073741824
- },
- 'use_existing': True,
- 'priority': 1,
- 'mounts': {
- '/tmp': {'kind': 'tmp',
- "capacity": 1073741824 },
- '/var/spool/cwl': {'kind': 'tmp',
- "capacity": 1073741824 },
- '/var/spool/cwl/foo': {
- 'kind': 'collection',
- 'path': 'foo',
- 'portable_data_hash': '99999999999999999999999999999996+99'
- },
- '/var/spool/cwl/foo2': {
- 'kind': 'collection',
- 'path': 'foo2',
- 'portable_data_hash': '99999999999999999999999999999996+99'
- },
- '/var/spool/cwl/filename': {
- 'kind': 'collection',
- 'path': 'filename',
- 'portable_data_hash': '99999999999999999999999999999996+99'
- },
- '/var/spool/cwl/subdir': {
- 'kind': 'collection',
- 'path': 'subdir',
- 'portable_data_hash': '99999999999999999999999999999996+99'
- }
+ '/var/spool/cwl/foo2': {
+ 'kind': 'collection',
+ 'path': 'foo2',
+ 'portable_data_hash': '99999999999999999999999999999996+99'
},
- 'state': 'Committed',
- 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
- 'output_path': '/var/spool/cwl',
- 'container_image': 'arvados/jobs',
- 'command': ['ls'],
- 'cwd': '/var/spool/cwl',
- 'scheduling_parameters': {
+ '/var/spool/cwl/filename': {
+ 'kind': 'collection',
+ 'path': 'filename',
+ 'portable_data_hash': '99999999999999999999999999999996+99'
},
- 'properties': {}
+ '/var/spool/cwl/subdir': {
+ 'kind': 'collection',
+ 'path': 'subdir',
+ 'portable_data_hash': '99999999999999999999999999999996+99'
+ }
+ },
+ 'state': 'Committed',
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+ 'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
+ 'container_image': 'arvados/jobs',
+ 'command': ['ls'],
+ 'cwd': '/var/spool/cwl',
+ 'scheduling_parameters': {
+ },
+ 'properties': {}
}
call_body = call_kwargs.get('body', None)
@@ -298,6 +307,7 @@ class TestContainer(unittest.TestCase):
runner = mock.MagicMock()
runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
runner.api.collections().get().execute.return_value = {
@@ -357,6 +367,7 @@ class TestContainer(unittest.TestCase):
'state': 'Committed',
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
'container_image': 'arvados/jobs',
'command': ['ls', '/var/spool/cwl'],
'cwd': '/var/spool/cwl',
@@ -387,6 +398,7 @@ class TestContainer(unittest.TestCase):
arvjob.collect_outputs = mock.MagicMock()
arvjob.successCodes = [0]
arvjob.outdir = "/var/spool/cwl"
+ arvjob.output_ttl = 3600
arvjob.collect_outputs.return_value = {"out": "stuff"}
@@ -402,6 +414,7 @@ class TestContainer(unittest.TestCase):
arvjob.collect_outputs.assert_called_with("keep:abc+123")
arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
+ runner.add_intermediate_output.assert_called_with("zzzzz-4zz18-zzzzzzzzzzzzzz2")
# The test passes no builder.resources
# Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
@@ -412,6 +425,7 @@ class TestContainer(unittest.TestCase):
runner = mock.MagicMock()
runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
runner.api.collections().get().execute.return_value = {
@@ -478,6 +492,7 @@ class TestContainer(unittest.TestCase):
'state': 'Committed',
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
'container_image': 'arvados/jobs',
'command': ['ls', '/var/spool/cwl'],
'cwd': '/var/spool/cwl',
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 85c49c9..731b1d5 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -520,6 +520,30 @@ class TestSubmit(unittest.TestCase):
self.assertEqual(capture_stdout.getvalue(),
stubs.expect_container_request_uuid + '\n')
+
+ @stubs
+ def test_submit_container_output_ttl(self, stubs):
+ capture_stdout = cStringIO.StringIO()
+ try:
+ exited = arvados_cwl.main(
+ ["--submit", "--no-wait", "--api=containers", "--debug", "--intermediate-output-ttl", "3600",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+ self.assertEqual(exited, 0)
+ except:
+ logging.exception("")
+
+ expect_container = copy.deepcopy(stubs.expect_container_spec)
+ expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+ '--enable-reuse', '--on-error=continue',
+ "--intermediate-output-ttl=3600",
+ '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+ stubs.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(expect_container))
+ self.assertEqual(capture_stdout.getvalue(),
+ stubs.expect_container_request_uuid + '\n')
+
@stubs
def test_submit_container_output_tags(self, stubs):
output_tags = "tag0,tag1,tag2"
commit 43dbdec9f0f6361e4705671bbdade34cd18adefa
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon May 22 14:32:08 2017 -0400
11100: Implement & document arv:IntermediateOutput hint.
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid
index 8aef8ac..5e5e497 100644
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -14,7 +14,7 @@ $namespaces:
cwltool: "http://commonwl.org/cwltool#"
</pre>
-Arvados extensions must go into the @hints@ section, for example:
+Arvados extensions should go into the @hints@ section, for example:
<pre>
hints:
@@ -27,6 +27,8 @@ hints:
arv:APIRequirement: {}
cwltool:LoadListingRequirement:
loadListing: shallow_listing
+ arv:IntermediateOutput:
+ outputTTL: 3600
</pre>
h2. arv:RunInSingleContainer
@@ -56,7 +58,9 @@ table(table table-bordered table-condensed).
h2. arv:APIRequirement
-Indicates that process wants to access to the Arvados API. Will be granted limited network access and have @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ set in the environment.
+Indicates that process wants to access to the Arvados API. Will be granted network access and have @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ set in the environment. Tools which rely on the Arvados API being present should put @arv:APIRequirement@ in the @requirements@ section of the tool (rather than @hints@) to indicate that that it is not portable to non-Arvados CWL runners.
+
+Use @arv:APIRequirement@ in @hints@ to enable general (non-Arvados-specific) network access for a tool.
h2. cwltool:LoadListingRequirement
@@ -71,3 +75,12 @@ table(table table-bordered table-condensed).
*shallow_listing*: Only expand the first level of directory listing. The @listing@ field on the toplevel Directory object will contain the directory contents, however @listing@ will not be defined on subdirectories.
*deep_listing*: Recursively expand all levels of directory listing. The @listing@ field will be provided on the toplevel object and all subdirectories.
+
+h2. arv:IntermediateOutput
+
+Specify desired handling of intermediate output collections.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed `outputTTL` seconds after creation, or on successful completion of workflow (whichever comes first). A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
+Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before before downstream steps that consume it are started. The recommend minimum value for TTL is the time required to complete entire the workflow.|
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 94d602d..a4e2f8a 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -212,7 +212,7 @@ class ArvCwlRunner(object):
logger.info("Cleaning up intermediate output collections")
for i in self.intermediate_output_collections:
try:
- self.api_client.collections().delete(uuid=i).execute(num_retries=self.num_retries)
+ self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
except:
logger.warn("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
if sys.exc_info()[0] is KeyboardInterrupt:
@@ -533,7 +533,7 @@ class ArvCwlRunner(object):
adjustDirObjs(self.final_output, partial(get_listing, self.fs_access))
adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
- if self.intermediate_output_ttl and self.final_status == "success":
+ if self.final_status == "success":
self.trash_intermediate_output()
return (self.final_output, self.final_status)
@@ -664,7 +664,8 @@ def add_arv_hints():
"http://arvados.org/cwl#RuntimeConstraints",
"http://arvados.org/cwl#PartitionRequirement",
"http://arvados.org/cwl#APIRequirement",
- "http://commonwl.org/cwltool#LoadListingRequirement"
+ "http://commonwl.org/cwltool#LoadListingRequirement",
+ "http://arvados.org/cwl#IntermediateOutput"
])
def main(args, stdout, stderr, api_client=None, keep_client=None):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 4e48216..1af6e38 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -114,3 +114,33 @@ $graph:
jsonldPredicate:
_id: "@type"
_type: "@vocab"
+
+- name: IntermediateOutput
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify desired handling of intermediate output collections.
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:IntermediateOutput'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ outputTTL:
+ type: int
+ doc: |
+ If the value is greater than zero, consider intermediate output
+ collections to be temporary and should be automatically trashed.
+ Temporary collections will be trashed `outputTTL` seconds after
+ creation, or on successful completion of workflow (whichever comes
+ first). A value of zero means intermediate output should be retained
+ indefinitely (this is the default behavior).
+
+ Note: arvados-cwl-runner currently does not take workflow dependencies
+ into account when setting the TTL on an intermediate output collection.
+ If the TTL is too short, it is possible for a collection to be trashed
+ before before downstream steps that consume it are started. The
+ recommend minimum value for TTL is the time required to complete
+ entire the workflow.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b195ce2..8e32d2a 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -43,7 +43,6 @@ class ArvadosContainer(object):
"priority": 1,
"state": "Committed",
"properties": {},
- "output_ttl", self.arvrunner.intermediate_output_ttl
}
runtime_constraints = {}
@@ -170,6 +169,16 @@ class ArvadosContainer(object):
if partition_req:
scheduling_parameters["partitions"] = aslist(partition_req["partition"])
+ intermediate_output_req, _ = get_feature(self, "http://arvados.org/cwl#IntermediateOutput")
+ if intermediate_output_req:
+ self.output_ttl = intermediate_output_req["outputTTL"]
+ else:
+ self.output_ttl = self.arvrunner.intermediate_output_ttl
+
+ if self.output_ttl < 0:
+ raise WorkflowError("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
+
+ container_request["output_ttl"] = self.output_ttl
container_request["mounts"] = mounts
container_request["runtime_constraints"] = runtime_constraints
container_request["use_existing"] = kwargs.get("enable_reuse", True)
@@ -201,7 +210,8 @@ class ArvadosContainer(object):
def done(self, record):
try:
- self.arvrunner.add_intermediate_output(record["output_uuid"])
+ if self.output_ttl:
+ self.arvrunner.add_intermediate_output(record["output_uuid"])
container = self.arvrunner.api.containers().get(
uuid=record["container_uuid"]
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list