[ARVADOS] created: 80459d52161120ae8e33da140984d596271d5195
Git user
git at public.curoverse.com
Mon Jun 26 10:45:21 EDT 2017
at 80459d52161120ae8e33da140984d596271d5195 (commit)
commit 80459d52161120ae8e33da140984d596271d5195
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Jun 26 10:45:16 2017 -0400
11095: Add arv:ReuseRequirement hint. Update tests & documentation.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid
index 8a62034..0ba1045 100644
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -14,7 +14,7 @@ $namespaces:
cwltool: "http://commonwl.org/cwltool#"
</pre>
-Arvados extensions should go into the @hints@ section, for example:
+For portability, Arvados extensions should go into the @hints@ section of your CWL file, for example:
<pre>
hints:
@@ -29,8 +29,12 @@ hints:
loadListing: shallow_listing
arv:IntermediateOutput:
outputTTL: 3600
+ arv:ReuseRequirement:
+ enableReuse: false
</pre>
+The one exception to this is @arv:APIRequirement@, see note below.
+
h2. arv:RunInSingleContainer
Indicates that a subworkflow should run in a single container and not be scheduled as separate steps.
@@ -84,3 +88,11 @@ table(table table-bordered table-condensed).
|_. Field |_. Type |_. Description |
|outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation. A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+
+h2. arv:ReuseRequirement
+
+Enable/disable work reuse for current process. Default true (work reuse enabled).
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|enableReuse|boolean|Enable/disable work reuse for current process. Default true (work reuse enabled).|
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index f7da563..64ec4e2 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -685,7 +685,8 @@ def add_arv_hints():
"http://arvados.org/cwl#PartitionRequirement",
"http://arvados.org/cwl#APIRequirement",
"http://commonwl.org/cwltool#LoadListingRequirement",
- "http://arvados.org/cwl#IntermediateOutput"
+ "http://arvados.org/cwl#IntermediateOutput",
+ "http://arvados.org/cwl#ReuseRequirement"
])
def main(args, stdout, stderr, api_client=None, keep_client=None):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 6838c0f..3107628 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -149,3 +149,19 @@ $graph:
be trashed before downstream steps that consume it are started. The
recommended minimum value for TTL is the expected duration of the
entire the workflow.
+
+- name: ReuseRequirement
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Enable/disable work reuse for current process. Default true (work reuse enabled).
+ fields:
+ - name: class
+ type: string
+ doc: "Always 'arv:ReuseRequirement'"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ - name: enableReuse
+ type: boolean
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b58a858..bf4aab5 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -184,9 +184,15 @@ class ArvadosContainer(object):
container_request["output_ttl"] = self.output_ttl
container_request["mounts"] = mounts
container_request["runtime_constraints"] = runtime_constraints
- container_request["use_existing"] = kwargs.get("enable_reuse", True)
container_request["scheduling_parameters"] = scheduling_parameters
+ enable_reuse = kwargs.get("enable_reuse", True)
+ if enable_reuse:
+ reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+ if reuse_req:
+ enable_reuse = reuse_req["enableReuse"]
+ container_request["use_existing"] = enable_reuse
+
if kwargs.get("runnerjob", "").startswith("arvwf:"):
wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index ab06386..877f423 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -124,6 +124,12 @@ class ArvadosJob(object):
if not self.arvrunner.ignore_docker_for_reuse:
filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
+ enable_reuse = kwargs.get("enable_reuse", True)
+ if enable_reuse:
+ reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+ if reuse_req:
+ enable_reuse = reuse_req["enableReuse"]
+
try:
with Perf(metrics, "create %s" % self.name):
response = self.arvrunner.api.jobs().create(
@@ -137,7 +143,7 @@ class ArvadosJob(object):
"runtime_constraints": runtime_constraints
},
filters=filters,
- find_or_create=kwargs.get("enable_reuse", True)
+ find_or_create=enable_reuse
).execute(num_retries=self.arvrunner.num_retries)
self.arvrunner.processes[response["uuid"]] = self
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index 87528b2..c38fb0b 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -82,3 +82,8 @@
}
tool: wf/listing_deep.cwl
doc: test deep directory listing
+
+- job: null
+ output: {}
+ tool: noreuse.cwl
+ doc: "Test arv:ReuseRequirement"
diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl
new file mode 100644
index 0000000..46771d1
--- /dev/null
+++ b/sdk/cwl/tests/noreuse.cwl
@@ -0,0 +1,16 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+ arv: "http://arvados.org/cwl#"
+inputs: []
+outputs: []
+steps:
+ step1:
+ in:
+ message:
+ default: "hello world"
+ out: [output]
+ hints:
+ arv:ReuseRequirement:
+ enableReuse: false
+ run: stdout.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index aab963d..b1f5bfe 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -116,6 +116,9 @@ class TestContainer(unittest.TestCase):
}, {
"class": "http://arvados.org/cwl#IntermediateOutput",
"outputTTL": 7200
+ }, {
+ "class": "http://arvados.org/cwl#ReuseRequirement",
+ "enableReuse": False
}],
"baseCommand": "ls"
})
@@ -127,7 +130,7 @@ class TestContainer(unittest.TestCase):
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
make_fs_access=make_fs_access, tmpdir="/tmp"):
- j.run()
+ j.run(enable_reuse=True)
call_args, call_kwargs = runner.api.container_requests().create.call_args
@@ -143,7 +146,7 @@ class TestContainer(unittest.TestCase):
'keep_cache_ram': 536870912,
'API': True
},
- 'use_existing': True,
+ 'use_existing': False,
'priority': 1,
'mounts': {
'/tmp': {'kind': 'tmp',
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py
index a71d1d8..d64381b 100644
--- a/sdk/cwl/tests/test_job.py
+++ b/sdk/cwl/tests/test_job.py
@@ -133,6 +133,10 @@ class TestJob(unittest.TestCase):
"outputDirType": "keep_output_dir"
}, {
"class": "http://arvados.org/cwl#APIRequirement",
+ },
+ {
+ "class": "http://arvados.org/cwl#ReuseRequirement",
+ "enableReuse": False
}],
"baseCommand": "ls"
}
@@ -142,7 +146,7 @@ class TestJob(unittest.TestCase):
make_fs_access=make_fs_access, loader=Loader({}))
arvtool.formatgraph = None
for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
- j.run()
+ j.run(enable_reuse=True)
runner.api.jobs().create.assert_called_with(
body=JsonDiffMatcher({
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
@@ -166,7 +170,7 @@ class TestJob(unittest.TestCase):
'keep_cache_mb_per_task': 512
}
}),
- find_or_create=True,
+ find_or_create=False,
filters=[['repository', '=', 'arvados'],
['script', '=', 'crunchrunner'],
['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list