[ARVADOS] updated: 2.3.2-31-g0677b9ccf

Git user git at public.arvados.org
Mon Feb 21 19:04:25 UTC 2022


Summary of changes:
 doc/user/cwl/cwl-run-options.html.textile.liquid |  1 +
 doc/user/cwl/cwl-runner.html.textile.liquid      |  8 +++
 sdk/cwl/arvados_cwl/__init__.py                  |  4 ++
 sdk/cwl/arvados_cwl/arvcontainer.py              |  3 +-
 sdk/cwl/arvados_cwl/arvdocker.py                 | 54 +++++++++++++-
 sdk/cwl/arvados_cwl/context.py                   |  1 +
 sdk/cwl/arvados_cwl/runner.py                    | 12 ++--
 sdk/cwl/setup.py                                 |  2 +-
 sdk/cwl/tests/test_container.py                  | 90 ++++++++++++++++++++++++
 sdk/cwl/tests/test_submit.py                     |  9 +--
 10 files changed, 173 insertions(+), 11 deletions(-)

       via  0677b9ccf4b9961baf982469ab20d8fc49022195 (commit)
      from  9015155cfdd845ce6a6bdceda8b0ae078cdd3103 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 0677b9ccf4b9961baf982469ab20d8fc49022195
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Feb 18 17:52:54 2022 -0500

    Merge branch '18773-check-image-id' refs #18773
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/doc/user/cwl/cwl-run-options.html.textile.liquid b/doc/user/cwl/cwl-run-options.html.textile.liquid
index a1c102593..d331dad87 100644
--- a/doc/user/cwl/cwl-run-options.html.textile.liquid
+++ b/doc/user/cwl/cwl-run-options.html.textile.liquid
@@ -51,6 +51,7 @@ table(table table-bordered table-condensed).
 |==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
 |==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
 |==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
+|==--match-submitter-images==|Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.|
 |==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one.|
 |==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster|
 |==--name NAME==|Name to use for workflow execution instance.|
diff --git a/doc/user/cwl/cwl-runner.html.textile.liquid b/doc/user/cwl/cwl-runner.html.textile.liquid
index b108de551..07663849a 100644
--- a/doc/user/cwl/cwl-runner.html.textile.liquid
+++ b/doc/user/cwl/cwl-runner.html.textile.liquid
@@ -113,6 +113,14 @@ h3. Work reuse
 
 Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse.  If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run.  This also applies to individual workflow steps.  For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step.  You can disable this behavior with @--disable-reuse at .
 
+h3(#docker). Docker images
+
+Docker images referenced by the workflow must be uploaded to Arvados.  This requires @docker@ to be installed and usable by the user running @arvados-cwl-runner at .  If the image is not present in the local Docker instance, @arvados-cwl-runner@ will first attempt to pull the image using @docker pull@, then upload it.
+
+If there is already a Docker image in Arvados with the same name, it will use the existing image.  In this case, the submitter will not use Docker.
+
+The @--match-submitter-images@ option will check the id of the image in the local Docker instance and compare it to the id of the image already in Arvados with the same name and tag.  If they are different, it will choose the image matching the local image id, which will be uploaded it if necessary.  This helpful for development, if you locally rebuild the image with the 'latest' tag, the @--match-submitter-images@ will ensure that the newer version is used.
+
 h3. Command line options
 
 See "arvados-cwl-runner options":{{site.baseurl}}/user/cwl/cwl-run-options.html
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index efac0e097..7b664ae94 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -152,6 +152,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
                         default=False)
 
+    parser.add_argument("--match-submitter-images", action="store_true",
+                        default=False, dest="match_local_docker",
+                        help="Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.")
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--submit-request-uuid",
                          default=None,
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index ae3c66889..c6a8a53d6 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -246,7 +246,8 @@ class ArvadosContainer(JobBase):
                                                                     runtimeContext.pull_image,
                                                                     runtimeContext.project_uuid,
                                                                     runtimeContext.force_docker_pull,
-                                                                    runtimeContext.tmp_outdir_prefix)
+                                                                    runtimeContext.tmp_outdir_prefix,
+                                                                    runtimeContext.match_local_docker)
 
         network_req, _ = self.get_requirement("NetworkAccess")
         if network_req:
diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py
index 26408317c..04e2a4cff 100644
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -6,6 +6,8 @@ import logging
 import sys
 import threading
 import copy
+import re
+import subprocess
 
 from schema_salad.sourceline import SourceLine
 
@@ -18,8 +20,44 @@ logger = logging.getLogger('arvados.cwl-runner')
 cached_lookups = {}
 cached_lookups_lock = threading.Lock()
 
+def determine_image_id(dockerImageId):
+    for line in (
+        subprocess.check_output(  # nosec
+            ["docker", "images", "--no-trunc", "--all"]
+        )
+        .decode("utf-8")
+        .splitlines()
+    ):
+        try:
+            match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+            split = dockerImageId.split(":")
+            if len(split) == 1:
+                split.append("latest")
+            elif len(split) == 2:
+                #  if split[1] doesn't  match valid tag names, it is a part of repository
+                if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = "latest"
+            elif len(split) == 3:
+                if re.match(r"[\w][\w.-]{0,127}", split[2]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = split[2]
+                    del split[2]
+
+            # check for repository:tag match or image id match
+            if match and (
+                (split[0] == match.group(1) and split[1] == match.group(2))
+                or dockerImageId == match.group(3)
+            ):
+                return match.group(3)
+        except ValueError:
+            pass
+
+    return None
+
+
 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
-                         force_pull, tmp_outdir_prefix):
+                         force_pull, tmp_outdir_prefix, match_local_docker):
     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
 
     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
@@ -46,6 +84,20 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                                                                 image_name=image_name,
                                                                 image_tag=image_tag)
 
+        if images and match_local_docker:
+            local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+            if local_image_id:
+                # find it in the list
+                found = False
+                for i in images:
+                    if i[1]["dockerhash"] == local_image_id:
+                        found = True
+                        images = [i]
+                        break
+                if not found:
+                    # force re-upload.
+                    images = []
+
         if not images:
             # Fetch Docker image if necessary.
             try:
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 1e04dd577..4239dd3b5 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -36,6 +36,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.cluster_target_id = 0
         self.always_submit_runner = False
         self.collection_cache_size = 256
+        self.match_local_docker = False
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 7d6d287a2..ad17950a2 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -461,12 +461,14 @@ def upload_docker(arvrunner, tool):
                     "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
         else:
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs:"+__version__},
                                                        True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             upload_docker(arvrunner, s.embedded_tool)
@@ -503,7 +505,8 @@ def packed_workflow(arvrunner, tool, merged_map):
                 v["http://arvados.org/cwl#dockerCollectionPDH"] = arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, v, True,
                                                                                                              arvrunner.project_uuid,
                                                                                                              arvrunner.runtimeContext.force_docker_pull,
-                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                                                                             arvrunner.runtimeContext.match_local_docker)
             for l in v:
                 visit(v[l], cur_id)
         if isinstance(v, list):
@@ -610,7 +613,8 @@ def arvados_jobs_image(arvrunner, img):
     try:
         return arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid,
                                                           arvrunner.runtimeContext.force_docker_pull,
-                                                          arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                          arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                          arvrunner.runtimeContext.match_local_docker)
     except Exception as e:
         raise Exception("Docker image %s is not available\n%s" % (img, e) )
 
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index e73923595..e126d170b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -36,7 +36,7 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.1.20220210171524',
+          'cwltool==3.1.20220217222804',
           'schema-salad==8.2.20211116214159',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index 1a2bd112f..3f05d5496 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -1014,6 +1014,96 @@ class TestContainer(unittest.TestCase):
                 }))
 
 
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados_cwl.arvdocker.determine_image_id")
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    def test_match_local_docker(self, keepdocker, determine_image_id):
+        arvados_cwl.add_arv_hints()
+        arv_docker_clear_cache()
+
+        runner = mock.MagicMock()
+        runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
+        runner.secret_store = cwltool.secrets.SecretStore()
+        runner.api._rootDesc = {"revision": "20210628"}
+
+        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz4", {"dockerhash": "456"}),
+                                   ("zzzzz-4zz18-zzzzzzzzzzzzzz3", {"dockerhash": "123"})]
+        determine_image_id.side_effect = lambda x: "123"
+        def execute(uuid):
+            ex = mock.MagicMock()
+            lookup = {"zzzzz-4zz18-zzzzzzzzzzzzzz4": {"portable_data_hash": "99999999999999999999999999999994+99"},
+                      "zzzzz-4zz18-zzzzzzzzzzzzzz3": {"portable_data_hash": "99999999999999999999999999999993+99"}}
+            ex.execute.return_value = lookup[uuid]
+            return ex
+        runner.api.collections().get.side_effect = execute
+
+        tool = cmap({
+            "inputs": [],
+            "outputs": [],
+            "baseCommand": "echo",
+            "arguments": [],
+            "id": "",
+            "cwlVersion": "v1.2",
+            "class": "CommandLineTool"
+        })
+
+        loadingContext, runtimeContext = self.helper(runner, True)
+
+        arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+        arvtool.formatgraph = None
+
+        container_request = {
+            'environment': {
+                'HOME': '/var/spool/cwl',
+                'TMPDIR': '/tmp'
+            },
+            'name': 'test_run_True',
+            'runtime_constraints': {
+                'vcpus': 1,
+                'ram': 268435456
+            },
+            'use_existing': True,
+            'priority': 500,
+            'mounts': {
+                '/tmp': {'kind': 'tmp',
+                         "capacity": 1073741824
+                         },
+                '/var/spool/cwl': {'kind': 'tmp',
+                                   "capacity": 1073741824 }
+            },
+            'state': 'Committed',
+            'output_name': 'Output for step test_run_True',
+            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+            'output_path': '/var/spool/cwl',
+            'output_ttl': 0,
+            'container_image': '99999999999999999999999999999994+99',
+            'command': ['echo'],
+            'cwd': '/var/spool/cwl',
+            'scheduling_parameters': {},
+            'properties': {},
+            'secret_mounts': {},
+            'output_storage_classes': ["default"]
+        }
+
+        runtimeContext.match_local_docker = False
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+            runner.api.container_requests().create.assert_called_with(
+                body=JsonDiffMatcher(container_request))
+
+        arv_docker_clear_cache()
+        runtimeContext.match_local_docker = True
+        container_request['container_image'] = '99999999999999999999999999999993+99'
+        container_request['name'] = 'test_run_True_2'
+        container_request['output_name'] = 'Output for step test_run_True_2'
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+            runner.api.container_requests().create.assert_called_with(
+                body=JsonDiffMatcher(container_request))
+
+
 class TestWorkflow(unittest.TestCase):
     def setUp(self):
         cwltool.process._names = set()
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 77f70851e..10443359b 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -67,10 +67,10 @@ def stubs(func):
 
         stubs.keep_client = keep_client2
         stubs.docker_images = {
-            "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", "")],
-            "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", "")],
-            "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", "")],
-            "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", "")],
+            "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", {})],
+            "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", {})],
+            "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", {})],
+            "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", {})],
         }
         def kd(a, b, image_name=None, image_tag=None):
             return stubs.docker_images.get("%s:%s" % (image_name, image_tag), [])
@@ -1062,6 +1062,7 @@ class TestSubmit(unittest.TestCase):
         arvrunner.project_uuid = ""
         api.return_value = mock.MagicMock()
         arvrunner.api = api.return_value
+        arvrunner.runtimeContext.match_local_docker = False
         arvrunner.api.links().list().execute.side_effect = ({"items": [{"created_at": "",
                                                                         "head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
                                                                         "link_class": "docker_image_repo+tag",

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list