[ARVADOS] updated: 8645888f12c25edaaac8e03fb5691cfcfbcdb9b2

Git user git at public.curoverse.com
Mon May 22 16:14:33 EDT 2017


Summary of changes:
 doc/user/cwl/cwl-extensions.html.textile.liquid |  17 ++-
 sdk/cwl/arvados_cwl/__init__.py                 |   7 +-
 sdk/cwl/arvados_cwl/arv-cwl-schema.yml          |  30 +++++
 sdk/cwl/arvados_cwl/arvcontainer.py             |  16 ++-
 sdk/cwl/tests/test_container.py                 | 159 +++++++++++++-----------
 sdk/cwl/tests/test_submit.py                    |  24 ++++
 6 files changed, 173 insertions(+), 80 deletions(-)

       via  8645888f12c25edaaac8e03fb5691cfcfbcdb9b2 (commit)
       via  43dbdec9f0f6361e4705671bbdade34cd18adefa (commit)
      from  dd58470216d6a416e4d831a3b9c25bfdaa255fff (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 8645888f12c25edaaac8e03fb5691cfcfbcdb9b2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon May 22 16:08:49 2017 -0400

    11100: Update/add tests for --intermediate-output-ttl

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 8e32d2a..374f368 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -209,6 +209,7 @@ class ArvadosContainer(object):
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
+        outputs = {}
         try:
             if self.output_ttl:
                 self.arvrunner.add_intermediate_output(record["output_uuid"])
@@ -238,7 +239,6 @@ class ArvadosContainer(object):
                                                            num_retries=self.arvrunner.num_retries)
                 done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self))
 
-            outputs = {}
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index af05773..2492516 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -28,6 +28,7 @@ class TestContainer(unittest.TestCase):
             runner = mock.MagicMock()
             runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
             runner.ignore_docker_for_reuse = False
+            runner.intermediate_output_ttl = 0
 
             keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
             runner.api.collections().get().execute.return_value = {
@@ -72,6 +73,7 @@ class TestContainer(unittest.TestCase):
                         'state': 'Committed',
                         'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                         'output_path': '/var/spool/cwl',
+                        'output_ttl': 0,
                         'container_image': 'arvados/jobs',
                         'command': ['ls', '/var/spool/cwl'],
                         'cwd': '/var/spool/cwl',
@@ -87,6 +89,7 @@ class TestContainer(unittest.TestCase):
         runner = mock.MagicMock()
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 3600
         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
@@ -110,6 +113,9 @@ class TestContainer(unittest.TestCase):
             }, {
                 "class": "http://arvados.org/cwl#PartitionRequirement",
                 "partition": "blurb"
+            }, {
+                "class": "http://arvados.org/cwl#IntermediateOutput",
+                "outputTTL": 7200
             }],
             "baseCommand": "ls"
         })
@@ -126,35 +132,36 @@ class TestContainer(unittest.TestCase):
         call_args, call_kwargs = runner.api.container_requests().create.call_args
 
         call_body_expected = {
-                'environment': {
-                    'HOME': '/var/spool/cwl',
-                    'TMPDIR': '/tmp'
-                },
-                'name': 'test_resource_requirements',
-                'runtime_constraints': {
-                    'vcpus': 3,
-                    'ram': 3145728000,
-                    'keep_cache_ram': 536870912,
-                    'API': True
-                },
-                'use_existing': True,
-                'priority': 1,
-                'mounts': {
-                    '/tmp': {'kind': 'tmp',
-                             "capacity": 4194304000 },
-                    '/var/spool/cwl': {'kind': 'tmp',
-                                       "capacity": 5242880000 }
-                },
-                'state': 'Committed',
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                'output_path': '/var/spool/cwl',
-                'container_image': 'arvados/jobs',
-                'command': ['ls'],
-                'cwd': '/var/spool/cwl',
-                'scheduling_parameters': {
-                    'partitions': ['blurb']
-                },
-                'properties': {}
+            'environment': {
+                'HOME': '/var/spool/cwl',
+                'TMPDIR': '/tmp'
+            },
+            'name': 'test_resource_requirements',
+            'runtime_constraints': {
+                'vcpus': 3,
+                'ram': 3145728000,
+                'keep_cache_ram': 536870912,
+                'API': True
+            },
+            'use_existing': True,
+            'priority': 1,
+            'mounts': {
+                '/tmp': {'kind': 'tmp',
+                         "capacity": 4194304000 },
+                '/var/spool/cwl': {'kind': 'tmp',
+                                   "capacity": 5242880000 }
+            },
+            'state': 'Committed',
+            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+            'output_path': '/var/spool/cwl',
+            'output_ttl': 7200,
+            'container_image': 'arvados/jobs',
+            'command': ['ls'],
+            'cwd': '/var/spool/cwl',
+            'scheduling_parameters': {
+                'partitions': ['blurb']
+            },
+            'properties': {}
         }
 
         call_body = call_kwargs.get('body', None)
@@ -172,6 +179,7 @@ class TestContainer(unittest.TestCase):
         runner = mock.MagicMock()
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
@@ -236,52 +244,53 @@ class TestContainer(unittest.TestCase):
         vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])
 
         call_body_expected = {
-                'environment': {
-                    'HOME': '/var/spool/cwl',
-                    'TMPDIR': '/tmp'
+            'environment': {
+                'HOME': '/var/spool/cwl',
+                'TMPDIR': '/tmp'
+            },
+            'name': 'test_initial_work_dir',
+            'runtime_constraints': {
+                'vcpus': 1,
+                'ram': 1073741824
+            },
+            'use_existing': True,
+            'priority': 1,
+            'mounts': {
+                '/tmp': {'kind': 'tmp',
+                         "capacity": 1073741824 },
+                '/var/spool/cwl': {'kind': 'tmp',
+                                   "capacity": 1073741824 },
+                '/var/spool/cwl/foo': {
+                    'kind': 'collection',
+                    'path': 'foo',
+                    'portable_data_hash': '99999999999999999999999999999996+99'
                 },
-                'name': 'test_initial_work_dir',
-                'runtime_constraints': {
-                    'vcpus': 1,
-                    'ram': 1073741824
-                },
-                'use_existing': True,
-                'priority': 1,
-                'mounts': {
-                    '/tmp': {'kind': 'tmp',
-                             "capacity": 1073741824 },
-                    '/var/spool/cwl': {'kind': 'tmp',
-                                       "capacity": 1073741824 },
-                    '/var/spool/cwl/foo': {
-                        'kind': 'collection',
-                        'path': 'foo',
-                        'portable_data_hash': '99999999999999999999999999999996+99'
-                    },
-                    '/var/spool/cwl/foo2': {
-                        'kind': 'collection',
-                        'path': 'foo2',
-                        'portable_data_hash': '99999999999999999999999999999996+99'
-                    },
-                    '/var/spool/cwl/filename': {
-                        'kind': 'collection',
-                        'path': 'filename',
-                        'portable_data_hash': '99999999999999999999999999999996+99'
-                    },
-                    '/var/spool/cwl/subdir': {
-                        'kind': 'collection',
-                        'path': 'subdir',
-                        'portable_data_hash': '99999999999999999999999999999996+99'
-                    }
+                '/var/spool/cwl/foo2': {
+                    'kind': 'collection',
+                    'path': 'foo2',
+                    'portable_data_hash': '99999999999999999999999999999996+99'
                 },
-                'state': 'Committed',
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                'output_path': '/var/spool/cwl',
-                'container_image': 'arvados/jobs',
-                'command': ['ls'],
-                'cwd': '/var/spool/cwl',
-                'scheduling_parameters': {
+                '/var/spool/cwl/filename': {
+                    'kind': 'collection',
+                    'path': 'filename',
+                    'portable_data_hash': '99999999999999999999999999999996+99'
                 },
-                'properties': {}
+                '/var/spool/cwl/subdir': {
+                    'kind': 'collection',
+                    'path': 'subdir',
+                    'portable_data_hash': '99999999999999999999999999999996+99'
+                }
+            },
+            'state': 'Committed',
+            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+            'output_path': '/var/spool/cwl',
+            'output_ttl': 0,
+            'container_image': 'arvados/jobs',
+            'command': ['ls'],
+            'cwd': '/var/spool/cwl',
+            'scheduling_parameters': {
+            },
+            'properties': {}
         }
 
         call_body = call_kwargs.get('body', None)
@@ -298,6 +307,7 @@ class TestContainer(unittest.TestCase):
         runner = mock.MagicMock()
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
 
         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
         runner.api.collections().get().execute.return_value = {
@@ -357,6 +367,7 @@ class TestContainer(unittest.TestCase):
                     'state': 'Committed',
                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                     'output_path': '/var/spool/cwl',
+                    'output_ttl': 0,
                     'container_image': 'arvados/jobs',
                     'command': ['ls', '/var/spool/cwl'],
                     'cwd': '/var/spool/cwl',
@@ -387,6 +398,7 @@ class TestContainer(unittest.TestCase):
         arvjob.collect_outputs = mock.MagicMock()
         arvjob.successCodes = [0]
         arvjob.outdir = "/var/spool/cwl"
+        arvjob.output_ttl = 3600
 
         arvjob.collect_outputs.return_value = {"out": "stuff"}
 
@@ -402,6 +414,7 @@ class TestContainer(unittest.TestCase):
 
         arvjob.collect_outputs.assert_called_with("keep:abc+123")
         arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
+        runner.add_intermediate_output.assert_called_with("zzzzz-4zz18-zzzzzzzzzzzzzz2")
 
     # The test passes no builder.resources
     # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
@@ -412,6 +425,7 @@ class TestContainer(unittest.TestCase):
         runner = mock.MagicMock()
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
 
         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
         runner.api.collections().get().execute.return_value = {
@@ -478,6 +492,7 @@ class TestContainer(unittest.TestCase):
                     'state': 'Committed',
                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                     'output_path': '/var/spool/cwl',
+                    'output_ttl': 0,
                     'container_image': 'arvados/jobs',
                     'command': ['ls', '/var/spool/cwl'],
                     'cwd': '/var/spool/cwl',
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 85c49c9..731b1d5 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -520,6 +520,30 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
 
+
+    @stubs
+    def test_submit_container_output_ttl(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--intermediate-output-ttl", "3600",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                                       '--enable-reuse', '--on-error=continue',
+                                       "--intermediate-output-ttl=3600",
+                                       '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
     @stubs
     def test_submit_container_output_tags(self, stubs):
         output_tags = "tag0,tag1,tag2"

commit 43dbdec9f0f6361e4705671bbdade34cd18adefa
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon May 22 14:32:08 2017 -0400

    11100: Implement & document arv:IntermediateOutput hint.

diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid
index 8aef8ac..5e5e497 100644
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -14,7 +14,7 @@ $namespaces:
   cwltool: "http://commonwl.org/cwltool#"
 </pre>
 
-Arvados extensions must go into the @hints@ section, for example:
+Arvados extensions should go into the @hints@ section, for example:
 
 <pre>
 hints:
@@ -27,6 +27,8 @@ hints:
   arv:APIRequirement: {}
   cwltool:LoadListingRequirement:
     loadListing: shallow_listing
+  arv:IntermediateOutput:
+    outputTTL: 3600
 </pre>
 
 h2. arv:RunInSingleContainer
@@ -56,7 +58,9 @@ table(table table-bordered table-condensed).
 
 h2. arv:APIRequirement
 
-Indicates that process wants to access to the Arvados API.  Will be granted limited network access and have @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ set in the environment.
+Indicates that process wants to access to the Arvados API.  Will be granted network access and have @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ set in the environment.  Tools which rely on the Arvados API being present should put @arv:APIRequirement@ in the @requirements@ section of the tool (rather than @hints@) to indicate that that it is not portable to non-Arvados CWL runners.
+
+Use @arv:APIRequirement@ in @hints@ to enable general (non-Arvados-specific) network access for a tool.
 
 h2. cwltool:LoadListingRequirement
 
@@ -71,3 +75,12 @@ table(table table-bordered table-condensed).
 *shallow_listing*: Only expand the first level of directory listing.  The @listing@ field on the toplevel Directory object will contain the directory contents, however @listing@ will not be defined on subdirectories.
 
 *deep_listing*: Recursively expand all levels of directory listing.  The @listing@ field will be provided on the toplevel object and all subdirectories.
+
+h2. arv:IntermediateOutput
+
+Specify desired handling of intermediate output collections.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed `outputTTL` seconds after creation, or on successful completion of workflow (whichever comes first).  A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
+Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before before downstream steps that consume it are started.  The recommend minimum value for TTL is the time required to complete entire the workflow.|
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 94d602d..a4e2f8a 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -212,7 +212,7 @@ class ArvCwlRunner(object):
         logger.info("Cleaning up intermediate output collections")
         for i in self.intermediate_output_collections:
             try:
-                self.api_client.collections().delete(uuid=i).execute(num_retries=self.num_retries)
+                self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
             except:
                 logger.warn("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
             if sys.exc_info()[0] is KeyboardInterrupt:
@@ -533,7 +533,7 @@ class ArvCwlRunner(object):
             adjustDirObjs(self.final_output, partial(get_listing, self.fs_access))
             adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
 
-        if self.intermediate_output_ttl and self.final_status == "success":
+        if self.final_status == "success":
             self.trash_intermediate_output()
 
         return (self.final_output, self.final_status)
@@ -664,7 +664,8 @@ def add_arv_hints():
         "http://arvados.org/cwl#RuntimeConstraints",
         "http://arvados.org/cwl#PartitionRequirement",
         "http://arvados.org/cwl#APIRequirement",
-        "http://commonwl.org/cwltool#LoadListingRequirement"
+        "http://commonwl.org/cwltool#LoadListingRequirement",
+        "http://arvados.org/cwl#IntermediateOutput"
     ])
 
 def main(args, stdout, stderr, api_client=None, keep_client=None):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 4e48216..1af6e38 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -114,3 +114,33 @@ $graph:
       jsonldPredicate:
         _id: "@type"
         _type: "@vocab"
+
+- name: IntermediateOutput
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify desired handling of intermediate output collections.
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:IntermediateOutput'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    outputTTL:
+      type: int
+      doc: |
+        If the value is greater than zero, consider intermediate output
+        collections to be temporary and should be automatically trashed.
+        Temporary collections will be trashed `outputTTL` seconds after
+        creation, or on successful completion of workflow (whichever comes
+        first).  A value of zero means intermediate output should be retained
+        indefinitely (this is the default behavior).
+
+        Note: arvados-cwl-runner currently does not take workflow dependencies
+        into account when setting the TTL on an intermediate output collection.
+        If the TTL is too short, it is possible for a collection to be trashed
+        before before downstream steps that consume it are started.  The
+        recommend minimum value for TTL is the time required to complete
+        entire the workflow.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b195ce2..8e32d2a 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -43,7 +43,6 @@ class ArvadosContainer(object):
             "priority": 1,
             "state": "Committed",
             "properties": {},
-            "output_ttl", self.arvrunner.intermediate_output_ttl
         }
         runtime_constraints = {}
 
@@ -170,6 +169,16 @@ class ArvadosContainer(object):
         if partition_req:
             scheduling_parameters["partitions"] = aslist(partition_req["partition"])
 
+        intermediate_output_req, _ = get_feature(self, "http://arvados.org/cwl#IntermediateOutput")
+        if intermediate_output_req:
+            self.output_ttl = intermediate_output_req["outputTTL"]
+        else:
+            self.output_ttl = self.arvrunner.intermediate_output_ttl
+
+        if self.output_ttl < 0:
+            raise WorkflowError("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
+
+        container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
         container_request["runtime_constraints"] = runtime_constraints
         container_request["use_existing"] = kwargs.get("enable_reuse", True)
@@ -201,7 +210,8 @@ class ArvadosContainer(object):
 
     def done(self, record):
         try:
-            self.arvrunner.add_intermediate_output(record["output_uuid"])
+            if self.output_ttl:
+                self.arvrunner.add_intermediate_output(record["output_uuid"])
 
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list