[ARVADOS] created: 9486b15595a667742ef541d3f31f78507afea8e7
Git user
git at public.curoverse.com
Wed Oct 5 14:00:13 EDT 2016
at 9486b15595a667742ef541d3f31f78507afea8e7 (commit)
commit 9486b15595a667742ef541d3f31f78507afea8e7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Oct 5 11:08:40 2016 -0400
9308: Set task.keepTmpOutput: true when outputDirType is keep_output_dir.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 44b1b06..fe3eadd 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -13,6 +13,29 @@ $graph:
_id: "@type"
_type: "@vocab"
+- name: OutputDirType
+ type: enum
+ symbols:
+ - local_output_dir
+ - keep_output_dir
+ doc:
+ - |
+ local_output_dir: Use regular file system local to the compute node.
+ There must be sufficient local scratch space to store entire output;
+ specify this this with `outdirMin` of `ResourceRequirement`. Files are
+ batch uploaded to Keep when the process completes. Most compatible, but
+ upload step can be time consuming for very large files.
+ - |
+ keep_output_dir: Use writable Keep mount. Files are streamed to Keep as
+ they are written. Does not consume local scratch space, but does consume
+ RAM for output buffers (up to 192 MiB per file simultaneously open for
+ writing.) Best suited to processes which produce sequential output of
+ large files (non-sequential writes may produced fragmented file
+ manifests). Supports regular files and directories, does not support
+ special files such as symlinks, hard links, named pipes, named sockets,
+ or device nodes.
+
+
- name: RuntimeConstraints
type: record
doc: |
@@ -31,6 +54,11 @@ $graph:
MiB. Increase this to reduce cache thrashing in situations such as
accessing multiple large (64+ MiB) files at the same time, or
performing random access on a large file.
+ - name: outputDirType
+ type: OutputDirType?
+ doc: |
+ Preferred backing store for output staging. If not specified, the
+ system may choose which one to use.
- name: APIRequirement
type: record
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index 0818d5d..4ede88d 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -95,7 +95,13 @@ class ArvadosJob(object):
runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
if runtime_req:
- runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
+ if "keep_cache" in runtime_req:
+ runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
+ if "outputDirType" in runtime_req:
+ if runtime_req["outputDirType"] == "local_output_dir":
+ script_parameters["task.keepTmpOutput"] = False
+ elif runtime_req["outputDirType"] == "keep_output_dir":
+ script_parameters["task.keepTmpOutput"] = True
filters = [["repository", "=", "arvados"],
["script", "=", "crunchrunner"],
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py
index 7f31520..99e34d3 100644
--- a/sdk/cwl/tests/test_job.py
+++ b/sdk/cwl/tests/test_job.py
@@ -82,7 +82,8 @@ class TestJob(unittest.TestCase):
"tmpdirMin": 4000
}, {
"class": "http://arvados.org/cwl#RuntimeConstraints",
- "keep_cache": 512
+ "keep_cache": 512,
+ "outputDirType": "keep_output_dir"
}, {
"class": "http://arvados.org/cwl#APIRequirement",
}],
@@ -101,6 +102,7 @@ class TestJob(unittest.TestCase):
'script_parameters': {
'tasks': [{
'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
+ 'task.keepTmpOutput': True,
'command': ['ls']
}]
},
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list