[ARVADOS] created: 80459d52161120ae8e33da140984d596271d5195

Git user git at public.curoverse.com
Mon Jun 26 10:45:21 EDT 2017


        at  80459d52161120ae8e33da140984d596271d5195 (commit)


commit 80459d52161120ae8e33da140984d596271d5195
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Jun 26 10:45:16 2017 -0400

    11095: Add arv:ReuseRequirement hint.  Update tests & documentation.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid
index 8a62034..0ba1045 100644
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -14,7 +14,7 @@ $namespaces:
   cwltool: "http://commonwl.org/cwltool#"
 </pre>
 
-Arvados extensions should go into the @hints@ section, for example:
+For portability, Arvados extensions should go into the @hints@ section of your CWL file, for example:
 
 <pre>
 hints:
@@ -29,8 +29,12 @@ hints:
     loadListing: shallow_listing
   arv:IntermediateOutput:
     outputTTL: 3600
+  arv:ReuseRequirement:
+    enableReuse: false
 </pre>
 
+The one exception to this is @arv:APIRequirement@, see note below.
+
 h2. arv:RunInSingleContainer
 
 Indicates that a subworkflow should run in a single container and not be scheduled as separate steps.
@@ -84,3 +88,11 @@ table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation.  A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
 Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started.  The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+
+h2. arv:ReuseRequirement
+
+Enable/disable work reuse for current process.  Default true (work reuse enabled).
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|enableReuse|boolean|Enable/disable work reuse for current process.  Default true (work reuse enabled).|
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index f7da563..64ec4e2 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -685,7 +685,8 @@ def add_arv_hints():
         "http://arvados.org/cwl#PartitionRequirement",
         "http://arvados.org/cwl#APIRequirement",
         "http://commonwl.org/cwltool#LoadListingRequirement",
-        "http://arvados.org/cwl#IntermediateOutput"
+        "http://arvados.org/cwl#IntermediateOutput",
+        "http://arvados.org/cwl#ReuseRequirement"
     ])
 
 def main(args, stdout, stderr, api_client=None, keep_client=None):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 6838c0f..3107628 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -149,3 +149,19 @@ $graph:
         be trashed before downstream steps that consume it are started.  The
         recommended minimum value for TTL is the expected duration of the
         entire the workflow.
+
+- name: ReuseRequirement
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Enable/disable work reuse for current process.  Default true (work reuse enabled).
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:ReuseRequirement'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    - name: enableReuse
+      type: boolean
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b58a858..bf4aab5 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -184,9 +184,15 @@ class ArvadosContainer(object):
         container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
         container_request["runtime_constraints"] = runtime_constraints
-        container_request["use_existing"] = kwargs.get("enable_reuse", True)
         container_request["scheduling_parameters"] = scheduling_parameters
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+        container_request["use_existing"] = enable_reuse
+
         if kwargs.get("runnerjob", "").startswith("arvwf:"):
             wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
             wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
index ab06386..877f423 100644
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -124,6 +124,12 @@ class ArvadosJob(object):
         if not self.arvrunner.ignore_docker_for_reuse:
             filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+
         try:
             with Perf(metrics, "create %s" % self.name):
                 response = self.arvrunner.api.jobs().create(
@@ -137,7 +143,7 @@ class ArvadosJob(object):
                         "runtime_constraints": runtime_constraints
                     },
                     filters=filters,
-                    find_or_create=kwargs.get("enable_reuse", True)
+                    find_or_create=enable_reuse
                 ).execute(num_retries=self.arvrunner.num_retries)
 
             self.arvrunner.processes[response["uuid"]] = self
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index 87528b2..c38fb0b 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -82,3 +82,8 @@
   }
   tool: wf/listing_deep.cwl
   doc: test deep directory listing
+
+- job: null
+  output: {}
+  tool: noreuse.cwl
+  doc: "Test arv:ReuseRequirement"
diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl
new file mode 100644
index 0000000..46771d1
--- /dev/null
+++ b/sdk/cwl/tests/noreuse.cwl
@@ -0,0 +1,16 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+inputs: []
+outputs: []
+steps:
+  step1:
+    in:
+      message:
+        default: "hello world"
+    out: [output]
+    hints:
+      arv:ReuseRequirement:
+        enableReuse: false
+    run: stdout.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index aab963d..b1f5bfe 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -116,6 +116,9 @@ class TestContainer(unittest.TestCase):
             }, {
                 "class": "http://arvados.org/cwl#IntermediateOutput",
                 "outputTTL": 7200
+            }, {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         })
@@ -127,7 +130,7 @@ class TestContainer(unittest.TestCase):
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
                              make_fs_access=make_fs_access, tmpdir="/tmp"):
-            j.run()
+            j.run(enable_reuse=True)
 
         call_args, call_kwargs = runner.api.container_requests().create.call_args
 
@@ -143,7 +146,7 @@ class TestContainer(unittest.TestCase):
                 'keep_cache_ram': 536870912,
                 'API': True
             },
-            'use_existing': True,
+            'use_existing': False,
             'priority': 1,
             'mounts': {
                 '/tmp': {'kind': 'tmp',
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py
index a71d1d8..d64381b 100644
--- a/sdk/cwl/tests/test_job.py
+++ b/sdk/cwl/tests/test_job.py
@@ -133,6 +133,10 @@ class TestJob(unittest.TestCase):
                 "outputDirType": "keep_output_dir"
             }, {
                 "class": "http://arvados.org/cwl#APIRequirement",
+            },
+            {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         }
@@ -142,7 +146,7 @@ class TestJob(unittest.TestCase):
                                                  make_fs_access=make_fs_access, loader=Loader({}))
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
-            j.run()
+            j.run(enable_reuse=True)
         runner.api.jobs().create.assert_called_with(
             body=JsonDiffMatcher({
                 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
@@ -166,7 +170,7 @@ class TestJob(unittest.TestCase):
                     'keep_cache_mb_per_task': 512
                 }
             }),
-            find_or_create=True,
+            find_or_create=False,
             filters=[['repository', '=', 'arvados'],
                      ['script', '=', 'crunchrunner'],
                      ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list