[ARVADOS] created: 2.1.0-1925-g6d728a956

Git user git at public.arvados.org
Thu Feb 17 19:50:03 UTC 2022


        at  6d728a9567eda6e256359c3606a5de41f126a59f (commit)


commit 6d728a9567eda6e256359c3606a5de41f126a59f
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 17 14:37:51 2022 -0500

    18773: --match-submitter-images to compare the local docker image with Arvados
    
    Consults local list of Docker images to find the image id and chooses
    that exact one, uploading it if necessary.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 734945c0f..826467cc0 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -152,6 +152,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
                         default=False)
 
+    parser.add_argument("--match-submitter-images", action="store_true",
+                        default=False, dest="match_local_docker",
+                        help="Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.")
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--submit-request-uuid",
                          default=None,
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 3c7e9cfaa..753c2c250 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -246,7 +246,8 @@ class ArvadosContainer(JobBase):
                                                                     runtimeContext.pull_image,
                                                                     runtimeContext.project_uuid,
                                                                     runtimeContext.force_docker_pull,
-                                                                    runtimeContext.tmp_outdir_prefix)
+                                                                    runtimeContext.tmp_outdir_prefix,
+                                                                    runtimeContext.match_local_docker)
 
         network_req, _ = self.get_requirement("NetworkAccess")
         if network_req:
diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py
index 26408317c..04e2a4cff 100644
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -6,6 +6,8 @@ import logging
 import sys
 import threading
 import copy
+import re
+import subprocess
 
 from schema_salad.sourceline import SourceLine
 
@@ -18,8 +20,44 @@ logger = logging.getLogger('arvados.cwl-runner')
 cached_lookups = {}
 cached_lookups_lock = threading.Lock()
 
+def determine_image_id(dockerImageId):
+    for line in (
+        subprocess.check_output(  # nosec
+            ["docker", "images", "--no-trunc", "--all"]
+        )
+        .decode("utf-8")
+        .splitlines()
+    ):
+        try:
+            match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+            split = dockerImageId.split(":")
+            if len(split) == 1:
+                split.append("latest")
+            elif len(split) == 2:
+                #  if split[1] doesn't  match valid tag names, it is a part of repository
+                if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = "latest"
+            elif len(split) == 3:
+                if re.match(r"[\w][\w.-]{0,127}", split[2]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = split[2]
+                    del split[2]
+
+            # check for repository:tag match or image id match
+            if match and (
+                (split[0] == match.group(1) and split[1] == match.group(2))
+                or dockerImageId == match.group(3)
+            ):
+                return match.group(3)
+        except ValueError:
+            pass
+
+    return None
+
+
 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
-                         force_pull, tmp_outdir_prefix):
+                         force_pull, tmp_outdir_prefix, match_local_docker):
     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
 
     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
@@ -46,6 +84,20 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                                                                 image_name=image_name,
                                                                 image_tag=image_tag)
 
+        if images and match_local_docker:
+            local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+            if local_image_id:
+                # find it in the list
+                found = False
+                for i in images:
+                    if i[1]["dockerhash"] == local_image_id:
+                        found = True
+                        images = [i]
+                        break
+                if not found:
+                    # force re-upload.
+                    images = []
+
         if not images:
             # Fetch Docker image if necessary.
             try:
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 1e04dd577..4239dd3b5 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -36,6 +36,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.cluster_target_id = 0
         self.always_submit_runner = False
         self.collection_cache_size = 256
+        self.match_local_docker = False
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 7d6d287a2..ad17950a2 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -461,12 +461,14 @@ def upload_docker(arvrunner, tool):
                     "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
         else:
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs:"+__version__},
                                                        True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             upload_docker(arvrunner, s.embedded_tool)
@@ -503,7 +505,8 @@ def packed_workflow(arvrunner, tool, merged_map):
                 v["http://arvados.org/cwl#dockerCollectionPDH"] = arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, v, True,
                                                                                                              arvrunner.project_uuid,
                                                                                                              arvrunner.runtimeContext.force_docker_pull,
-                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                                                                             arvrunner.runtimeContext.match_local_docker)
             for l in v:
                 visit(v[l], cur_id)
         if isinstance(v, list):
@@ -610,7 +613,8 @@ def arvados_jobs_image(arvrunner, img):
     try:
         return arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid,
                                                           arvrunner.runtimeContext.force_docker_pull,
-                                                          arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                          arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                          arvrunner.runtimeContext.match_local_docker)
     except Exception as e:
         raise Exception("Docker image %s is not available\n%s" % (img, e) )
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list