[ARVADOS] updated: 2.1.0-1960-g5eef457a7

Git user git at public.arvados.org
Mon Feb 21 14:45:44 UTC 2022


Summary of changes:
 .../scripts/create-ebs-volume-nvme.patch           | 63 ++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 tools/compute-images/scripts/create-ebs-volume-nvme.patch

  discards  a82ab3d2f9753481fb593331d58aecdc3df3141d (commit)
  discards  485956a33887efb4ff3c7b2714a2a195e28c7070 (commit)
  discards  e32eba7538014c707c2516cfd86c55230bba3325 (commit)
  discards  fdc23914e8c3934e2c44643a48134485674ec312 (commit)
  discards  da5fb83efeb937846fd9c1e3cd9b7f2c576a4258 (commit)
  discards  bb4f8555b3364d3b90a01f51a6a14ed1e70c4359 (commit)
  discards  aa102093124b75fee1a71571eb1ab7545652827f (commit)
  discards  61b747425ea233ef9b91575a17aa2fabc5965def (commit)
  discards  92ee058f2beb6876348f6373f4fb9be72a56bcf7 (commit)
  discards  77f70e68b72bd0325621eaac5da88c58e99ddfeb (commit)
  discards  75906f69902fd15c559df303aea7b54b4490f391 (commit)
  discards  b6031fa89be6c679ce23d21954de80fe4894d415 (commit)
  discards  608e8f79c3fb5cb7077fce4a0b497c5c93d6d6d0 (commit)
  discards  3d7125ec931701fa7367234f480da5a9fd81ca78 (commit)
  discards  4752421d3d4b3a0f6afe93ce3356961d1d81b494 (commit)
  discards  63645c871246a61a2148b259f10d2fedf30e8df8 (commit)
  discards  d6c1841ea8d87238fa18a673fb524985e826ae19 (commit)
       via  5eef457a768ec4c8426eaa9fd252d77c0a2eddde (commit)
       via  3fac6017ba16634a38d80a49e1ae1a47585e95b9 (commit)
       via  c77a4f8c25aa420b9c2173109601b55b3527a20d (commit)
       via  4018f619affd1b5a010950ccbb7148eaadc82466 (commit)
       via  367aa69a24ee56f77417f4b43b72987179eda809 (commit)
       via  ddce3cd88fad18b39eb3806ac1ec1c2306df1ae0 (commit)
       via  dc77ca59fb2315dbeede74ae901241f8792e0c5b (commit)
       via  d9410492974304b76f16b1c83a99190cd315421f (commit)
       via  bd354c220f5155b4ac539bf889d5813004be1b4b (commit)
       via  d6d3a01e41965d75c45ec187628385d14fa1b73e (commit)
       via  cda738f2e87224ac813a8241d838454a35951bd8 (commit)
       via  fe0b6872a1e3b2f14a039595877c949eb42edbbe (commit)
       via  feee123922e58c7977373dfdb1167b53d80338e3 (commit)
       via  ebc7387c7b443104aa162c4d34fb7a750cc8d5b4 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (a82ab3d2f9753481fb593331d58aecdc3df3141d)
            \
             N -- N -- N (5eef457a768ec4c8426eaa9fd252d77c0a2eddde)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 5eef457a768ec4c8426eaa9fd252d77c0a2eddde
Author: Ward Vandewege <ward at curii.com>
Date:   Mon Feb 21 09:44:10 2022 -0500

    18772: add missing patch file.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/compute-images/scripts/create-ebs-volume-nvme.patch b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
new file mode 100644
index 000000000..1448ae1f2
--- /dev/null
+++ b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
@@ -0,0 +1,63 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+Make the create-ebs-volume script work with nvme devices.
+
+--- a/create-ebs-volume	2022-02-18 15:24:19.866607848 -0500
++++ b/create-ebs-volume	2022-02-18 16:23:17.931870970 -0500
+@@ -149,9 +152,20 @@
+     for letter in ${alphabet[@]}; do
+         # use /dev/xvdb* device names to avoid contention for /dev/sd* and /dev/xvda names
+         # only supported by HVM instances
+-        if [ ! -b "/dev/xvdb${letter}" ]; then
++        if [[ $created_volumes =~ .*/dev/xvdb${letter}.* ]]; then
++            continue
++        fi
+             echo "/dev/xvdb${letter}"
+             break
++    done
++}
++
++numbers=( {1..255} )
++function get_next_logical_nvme_device() {
++    for num in ${numbers[@]}; do
++        if [ ! -b "/dev/nvme${num}n1" ]; then
++            echo "/dev/nvme${num}"
++            break
+         fi
+     done
+ }
+@@ -243,10 +257,12 @@
+     
+     # check if there are available device names
+     local device=$(get_next_logical_device)
++    local nvme_device=$(get_next_logical_nvme_device)
+     if [ -z "$device" ]; then
+         error "no device names available for volume"
+     fi
+     logthis "next available device: $device"
++    logthis "next available nvme device: $nvme_device"
+ 
+     # create the volume
+     local tmpfile=$(mktemp /tmp/ebs-autoscale.create-volume.XXXXXXXXXX)
+@@ -323,8 +339,8 @@
+ 
+     logthis "waiting for volume $volume_id on filesystem"
+     while true; do
+-        if [ -e "$device" ]; then
+-            logthis "volume $volume_id on filesystem as $device"
++        if [ -e "$nvme_device" ]; then
++            logthis "volume $volume_id on filesystem as $nvme_device (aws device $device)"
+             break
+         fi
+         sleep 1
+@@ -338,7 +354,7 @@
+     > /dev/null
+     logthis "volume $volume_id DeleteOnTermination ENABLED"
+ 
+-    echo $device
++    echo "$nvme_device"n1
+ }
+ 
+ create_and_attach_volume

commit 3fac6017ba16634a38d80a49e1ae1a47585e95b9
Author: Ward Vandewege <ward at curii.com>
Date:   Mon Feb 21 09:32:08 2022 -0500

    18772: document the AWS EBS autoscaler support. Refactor the "Build a
           cloud compute node image" documentation page to improve the flow.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
index 89771514e..979bbad25 100644
--- a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
@@ -17,8 +17,11 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 # "Create an SSH keypair":#sshkeypair
 # "Compute image requirements":#requirements
 # "The build script":#building
+# "DNS resolution":#dns-resolution
+# "NVIDIA GPU support":#nvidia
 # "Singularity mksquashfs configuration":#singularity_mksquashfs_configuration
 # "Build an AWS image":#aws
+## "Autoscaling compute node scratch space":#aws-ebs-autoscaler
 # "Build an Azure image":#azure
 
 h2(#introduction). Introduction
@@ -56,12 +59,6 @@ foktmqOY8MyctzFgXBpGTxPliGjqo8OkrOyQP2g+FL7v+Km31Xs61P8=
 </code></pre>
 </notextile>
 
-{% assign show_docker_warning = true %}
-
-{% include 'singularity_mksquashfs_configuration' %}
-
-The desired amount of memory to make available for @mksquashfs@ can be configured in an argument to "the build script":#building. It defaults to @256M at .
-
 h2(#requirements). Compute image requirements
 
 Arvados comes with a build script to automate the creation of a suitable compute node image (see "The build script":#building below). It is provided as a convenience. It is also possible to create a compute node image via other means. These are the requirements:
@@ -101,6 +98,8 @@ Options:
       VPC id for AWS, otherwise packer will pick the default one
   --aws-subnet-id
       Subnet id for AWS otherwise packer will pick the default one for the VPC
+  --aws-ebs-autoscale (default: false)
+      Install the AWS EBS autoscaler daemon.
   --gcp-project-id (default: false, required if building for GCP)
       GCP project id
   --gcp-account-file (default: false, required if building for GCP)
@@ -131,10 +130,29 @@ Options:
       Output debug information
 </code></pre></notextile>
 
-h2(#building). NVIDIA GPU support
+h2(#dns-resolution). DNS resolution
+
+Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should be passed as the value for the @--resolver@ argument to "the build script":#building.
+
+Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
+
+<notextile><pre><code>10.20.30.40     <span class="userinput">ClusterID.example.com</span>
+10.20.30.41     <span class="userinput">keep1.ClusterID.example.com</span>
+10.20.30.42     <span class="userinput">keep2.ClusterID.example.com</span>
+</code></pre></notextile>
+
+Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
+
+h2(#nvidia). NVIDIA GPU support
 
 If you plan on using instance types with NVIDIA GPUs, add @--nvidia-gpu-support@ to the build command line.  Arvados uses the same compute image for both GPU and non-GPU instance types.  The GPU tooling is ignored when using the image with a non-GPU instance type.
 
+{% assign show_docker_warning = true %}
+
+{% include 'singularity_mksquashfs_configuration' %}
+
+The desired amount of memory to make available for @mksquashfs@ can be configured in an argument to "the build script":#building. It defaults to @256M at .
+
 h2(#aws). Build an AWS image
 
 <notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-aws.json \
@@ -155,17 +173,26 @@ For @ClusterID@, fill in your cluster ID. The @VPC@ and @Subnet@ should be confi
 
 @ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
 
-Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
-
-Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
-
-<notextile><pre><code>10.20.30.40     <span class="userinput">ClusterID.example.com</span>
-10.20.30.41     <span class="userinput">keep1.ClusterID.example.com</span>
-10.20.30.42     <span class="userinput">keep2.ClusterID.example.com</span>
+h3(#aws-ebs-autoscaler). Autoscaling compute node scratch space
+
+If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed. The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example:
+
+<notextile><pre><code>    InstanceTypes:
+      c5large:
+        ProviderType: c5.large
+        VCPUs: 2
+        RAM: 4GiB
+        IncludedScratch: 16TB
+        Price: 0.085
+      m5large:
+        ProviderType: m5.large
+        VCPUs: 2
+        RAM: 8GiB
+        IncludedScratch: 16TB
+        Price: 0.096
+...
 </code></pre></notextile>
 
-Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
-
 h2(#azure). Build an Azure image
 
 <notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
@@ -195,14 +222,3 @@ These secrets can be generated from the Azure portal, or with the cli using a co
 </code></pre></notextile>
 
 @ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
-
-Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
-
-Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
-
-<notextile><pre><code>10.20.30.40     <span class="userinput">ClusterID.example.com</span>
-10.20.30.41     <span class="userinput">keep1.ClusterID.example.com</span>
-10.20.30.42     <span class="userinput">keep2.ClusterID.example.com</span>
-</code></pre></notextile>
-
-Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.

commit c77a4f8c25aa420b9c2173109601b55b3527a20d
Author: Ward Vandewege <ward at curii.com>
Date:   Mon Feb 21 09:08:30 2022 -0500

    18772: consistently use bash comparison operators in build.sh
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
index ea7676db1..769b9a5b5 100755
--- a/tools/compute-images/build.sh
+++ b/tools/compute-images/build.sh
@@ -193,7 +193,7 @@ while [ $# -gt 0 ]; do
 done
 
 
-if [[ "$JSON_FILE" == "" ]] || [[ ! -f "$JSON_FILE" ]]; then
+if [[ -z "$JSON_FILE" ]] || [[ ! -f "$JSON_FILE" ]]; then
   echo >&2 "$helpmessage"
   echo >&2
   echo >&2 "ERROR: packer json file not found"
@@ -209,7 +209,7 @@ if [[ -z "$ARVADOS_CLUSTER_ID" ]]; then
   exit 1
 fi
 
-if [[ "$PUBLIC_KEY_FILE" == "" ]] || [[ ! -f "$PUBLIC_KEY_FILE" ]]; then
+if [[ -z "$PUBLIC_KEY_FILE" ]] || [[ ! -f "$PUBLIC_KEY_FILE" ]]; then
   echo >&2 "$helpmessage"
   echo >&2
   echo >&2 "ERROR: public key file file not found"
@@ -228,66 +228,64 @@ fi
 
 EXTRA2=""
 
-if [[ "$AWS_SOURCE_AMI" != "" ]]; then
+if [[ -n "$AWS_SOURCE_AMI" ]]; then
   EXTRA2+=" -var aws_source_ami=$AWS_SOURCE_AMI"
 fi
-if [[ "$AWS_PROFILE" != "" ]]; then
+if [[ -n "$AWS_PROFILE" ]]; then
   EXTRA2+=" -var aws_profile=$AWS_PROFILE"
 fi
-if [[ "$AWS_VPC_ID" != "" ]]; then
+if [[ -n "$AWS_VPC_ID" ]]; then
   EXTRA2+=" -var vpc_id=$AWS_VPC_ID -var associate_public_ip_address=true "
 fi
-if [[ "$AWS_SUBNET_ID" != "" ]]; then
+if [[ -n "$AWS_SUBNET_ID" ]]; then
   EXTRA2+=" -var subnet_id=$AWS_SUBNET_ID -var associate_public_ip_address=true "
 fi
-if [[ "$AWS_DEFAULT_REGION" != "" ]]; then
+if [[ -n "$AWS_DEFAULT_REGION" ]]; then
   EXTRA2+=" -var aws_default_region=$AWS_DEFAULT_REGION"
 fi
-if [[ "$AWS_EBS_AUTOSCALE" != "" ]]; then
+if [[ -n "$AWS_EBS_AUTOSCALE" ]]; then
   EXTRA2+=" -var aws_ebs_autoscale=$AWS_EBS_AUTOSCALE"
 fi
-if [[ "$GCP_PROJECT_ID" != "" ]]; then
+if [[ -n "$GCP_PROJECT_ID" ]]; then
   EXTRA2+=" -var project_id=$GCP_PROJECT_ID"
 fi
-if [[ "$GCP_ACCOUNT_FILE" != "" ]]; then
+if [[ -n "$GCP_ACCOUNT_FILE" ]]; then
   EXTRA2+=" -var account_file=$GCP_ACCOUNT_FILE"
 fi
-if [[ "$GCP_ZONE" != "" ]]; then
+if [[ -n "$GCP_ZONE" ]]; then
   EXTRA2+=" -var zone=$GCP_ZONE"
 fi
-if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
+if [[ -n "$AZURE_RESOURCE_GROUP" ]]; then
   EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
 fi
-if [[ "$AZURE_LOCATION" != "" ]]; then
+if [[ -n "$AZURE_LOCATION" ]]; then
   EXTRA2+=" -var location=$AZURE_LOCATION"
 fi
-if [[ "$AZURE_SKU" != "" ]]; then
+if [[ -n "$AZURE_SKU" ]]; then
   EXTRA2+=" -var image_sku=$AZURE_SKU"
 fi
-if [[ "$AZURE_CLOUD_ENVIRONMENT" != "" ]]; then
+if [[ -n "$AZURE_CLOUD_ENVIRONMENT" ]]; then
   EXTRA2+=" -var cloud_environment_name=$AZURE_CLOUD_ENVIRONMENT"
 fi
-if [[ "$SSH_USER" != "" ]]; then
+if [[ -n "$SSH_USER" ]]; then
   EXTRA2+=" -var ssh_user=$SSH_USER"
 fi
-if [[ "$RESOLVER" != "" ]]; then
+if [[ -n "$RESOLVER" ]]; then
   EXTRA2+=" -var resolver=$RESOLVER"
 fi
-if [[ "$REPOSUFFIX" != "" ]]; then
+if [[ -n "$REPOSUFFIX" ]]; then
   EXTRA2+=" -var reposuffix=$REPOSUFFIX"
 fi
-if [[ "$PUBLIC_KEY_FILE" != "" ]]; then
+if [[ -n "$PUBLIC_KEY_FILE" ]]; then
   EXTRA2+=" -var public_key_file=$PUBLIC_KEY_FILE"
 fi
-if [[ "$MKSQUASHFS_MEM" != "" ]]; then
+if [[ -n "$MKSQUASHFS_MEM" ]]; then
   EXTRA2+=" -var mksquashfs_mem=$MKSQUASHFS_MEM"
 fi
-if [[ "$NVIDIA_GPU_SUPPORT" != "" ]]; then
+if [[ -n "$NVIDIA_GPU_SUPPORT" ]]; then
   EXTRA2+=" -var nvidia_gpu_support=$NVIDIA_GPU_SUPPORT"
 fi
 
-
-
 echo
 packer version
 echo

commit 4018f619affd1b5a010950ccbb7148eaadc82466
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Feb 18 16:00:15 2022 -0500

    18773: Fix tests.test_submit.TestSubmit.test_arvados_jobs_image
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index ce5e8c55c..10443359b 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1062,6 +1062,7 @@ class TestSubmit(unittest.TestCase):
         arvrunner.project_uuid = ""
         api.return_value = mock.MagicMock()
         arvrunner.api = api.return_value
+        arvrunner.runtimeContext.match_local_docker = False
         arvrunner.api.links().list().execute.side_effect = ({"items": [{"created_at": "",
                                                                         "head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
                                                                         "link_class": "docker_image_repo+tag",

commit 367aa69a24ee56f77417f4b43b72987179eda809
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Feb 18 14:24:14 2022 -0500

    18773: Add --match-submitter-images to docs
    
    Also update cwltool dependency for a bugfix.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/doc/user/cwl/cwl-run-options.html.textile.liquid b/doc/user/cwl/cwl-run-options.html.textile.liquid
index a1c102593..d331dad87 100644
--- a/doc/user/cwl/cwl-run-options.html.textile.liquid
+++ b/doc/user/cwl/cwl-run-options.html.textile.liquid
@@ -51,6 +51,7 @@ table(table table-bordered table-condensed).
 |==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
 |==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
 |==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
+|==--match-submitter-images==|Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.|
 |==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one.|
 |==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster|
 |==--name NAME==|Name to use for workflow execution instance.|
diff --git a/doc/user/cwl/cwl-runner.html.textile.liquid b/doc/user/cwl/cwl-runner.html.textile.liquid
index b108de551..07663849a 100644
--- a/doc/user/cwl/cwl-runner.html.textile.liquid
+++ b/doc/user/cwl/cwl-runner.html.textile.liquid
@@ -113,6 +113,14 @@ h3. Work reuse
 
 Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse.  If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run.  This also applies to individual workflow steps.  For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step.  You can disable this behavior with @--disable-reuse at .
 
+h3(#docker). Docker images
+
+Docker images referenced by the workflow must be uploaded to Arvados.  This requires @docker@ to be installed and usable by the user running @arvados-cwl-runner at .  If the image is not present in the local Docker instance, @arvados-cwl-runner@ will first attempt to pull the image using @docker pull@, then upload it.
+
+If there is already a Docker image in Arvados with the same name, it will use the existing image.  In this case, the submitter will not use Docker.
+
+The @--match-submitter-images@ option will check the id of the image in the local Docker instance and compare it to the id of the image already in Arvados with the same name and tag.  If they are different, it will choose the image matching the local image id, which will be uploaded it if necessary.  This helpful for development, if you locally rebuild the image with the 'latest' tag, the @--match-submitter-images@ will ensure that the newer version is used.
+
 h3. Command line options
 
 See "arvados-cwl-runner options":{{site.baseurl}}/user/cwl/cwl-run-options.html
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index e1883d737..e126d170b 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -36,7 +36,7 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.1.20220217190813',
+          'cwltool==3.1.20220217222804',
           'schema-salad==8.2.20211116214159',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',

commit ddce3cd88fad18b39eb3806ac1ec1c2306df1ae0
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Feb 18 11:57:00 2022 -0500

    18773: Add test for match_local_docker behavior
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index 21ae4a7af..72774daba 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -1117,6 +1117,97 @@ class TestContainer(unittest.TestCase):
                 }))
 
 
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados_cwl.arvdocker.determine_image_id")
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    def test_match_local_docker(self, keepdocker, determine_image_id):
+        arvados_cwl.add_arv_hints()
+        arv_docker_clear_cache()
+
+        runner = mock.MagicMock()
+        runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
+        runner.secret_store = cwltool.secrets.SecretStore()
+        runner.api._rootDesc = {"revision": "20210628"}
+
+        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz4", {"dockerhash": "456"}),
+                                   ("zzzzz-4zz18-zzzzzzzzzzzzzz3", {"dockerhash": "123"})]
+        determine_image_id.side_effect = lambda x: "123"
+        def execute(uuid):
+            ex = mock.MagicMock()
+            lookup = {"zzzzz-4zz18-zzzzzzzzzzzzzz4": {"portable_data_hash": "99999999999999999999999999999994+99"},
+                      "zzzzz-4zz18-zzzzzzzzzzzzzz3": {"portable_data_hash": "99999999999999999999999999999993+99"}}
+            ex.execute.return_value = lookup[uuid]
+            return ex
+        runner.api.collections().get.side_effect = execute
+
+        tool = cmap({
+            "inputs": [],
+            "outputs": [],
+            "baseCommand": "echo",
+            "arguments": [],
+            "id": "",
+            "cwlVersion": "v1.2",
+            "class": "CommandLineTool"
+        })
+
+        loadingContext, runtimeContext = self.helper(runner, True)
+
+        arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+        arvtool.formatgraph = None
+
+        container_request = {
+            'environment': {
+                'HOME': '/var/spool/cwl',
+                'TMPDIR': '/tmp'
+            },
+            'name': 'test_run_True',
+            'runtime_constraints': {
+                'vcpus': 1,
+                'ram': 268435456
+            },
+            'use_existing': True,
+            'priority': 500,
+            'mounts': {
+                '/tmp': {'kind': 'tmp',
+                         "capacity": 1073741824
+                         },
+                '/var/spool/cwl': {'kind': 'tmp',
+                                   "capacity": 1073741824 }
+            },
+            'state': 'Committed',
+            'output_name': 'Output for step test_run_True',
+            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+            'output_path': '/var/spool/cwl',
+            'output_ttl': 0,
+            'container_image': '99999999999999999999999999999994+99',
+            'command': ['echo'],
+            'cwd': '/var/spool/cwl',
+            'scheduling_parameters': {},
+            'properties': {},
+            'secret_mounts': {},
+            'output_storage_classes': ["default"]
+        }
+
+        runtimeContext.match_local_docker = False
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+            runner.api.container_requests().create.assert_called_with(
+                body=JsonDiffMatcher(container_request))
+
+        arv_docker_clear_cache()
+        runtimeContext.match_local_docker = True
+        container_request['container_image'] = '99999999999999999999999999999993+99'
+        container_request['name'] = 'test_run_True_2'
+        container_request['output_name'] = 'Output for step test_run_True_2'
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+            runner.api.container_requests().create.assert_called_with(
+                body=JsonDiffMatcher(container_request))
+
+
+
 class TestWorkflow(unittest.TestCase):
     def setUp(self):
         cwltool.process._names = set()
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 8a2aa7e34..ce5e8c55c 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1089,25 +1089,6 @@ class TestSubmit(unittest.TestCase):
                          arvados_cwl.runner.arvados_jobs_image(arvrunner, "arvados/jobs:"+arvados_cwl.__version__))
 
 
-    @stubs
-    def test_match_local_docker(self, stubs):
-        stubs.docker_images["debian:buster-slim"] = [("zzzzz-4zz18-zzzzzzzzzzzzzd6", {"dockerhash": "456"}),
-                                                     ("zzzzz-4zz18-zzzzzzzzzzzzzd5", {"dockerhash": "123"})]
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=containers", "--debug",
-                "tests/tool/submit_tool.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-
-        expect_container = {
-        }
-
-        stubs.api.container_requests().create.assert_called_with(
-            body=JsonDiffMatcher(expect_container))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_container_request_uuid + '\n')
-        self.assertEqual(exited, 0)
-
     @stubs
     def test_submit_secrets(self, stubs):
         exited = arvados_cwl.main(

commit dc77ca59fb2315dbeede74ae901241f8792e0c5b
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 17 15:31:17 2022 -0500

    18773: Test WIP
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index 77f70851e..8a2aa7e34 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -67,10 +67,10 @@ def stubs(func):
 
         stubs.keep_client = keep_client2
         stubs.docker_images = {
-            "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", "")],
-            "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", "")],
-            "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", "")],
-            "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", "")],
+            "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", {})],
+            "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", {})],
+            "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", {})],
+            "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", {})],
         }
         def kd(a, b, image_name=None, image_tag=None):
             return stubs.docker_images.get("%s:%s" % (image_name, image_tag), [])
@@ -1089,6 +1089,25 @@ class TestSubmit(unittest.TestCase):
                          arvados_cwl.runner.arvados_jobs_image(arvrunner, "arvados/jobs:"+arvados_cwl.__version__))
 
 
+    @stubs
+    def test_match_local_docker(self, stubs):
+        stubs.docker_images["debian:buster-slim"] = [("zzzzz-4zz18-zzzzzzzzzzzzzd6", {"dockerhash": "456"}),
+                                                     ("zzzzz-4zz18-zzzzzzzzzzzzzd5", {"dockerhash": "123"})]
+
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+                "tests/tool/submit_tool.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        expect_container = {
+        }
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
+
     @stubs
     def test_submit_secrets(self, stubs):
         exited = arvados_cwl.main(

commit d9410492974304b76f16b1c83a99190cd315421f
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 17 14:37:51 2022 -0500

    18773: --match-submitter-images to compare the local docker image with Arvados
    
    Consults local list of Docker images to find the image id and chooses
    that exact one, uploading it if necessary.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 734945c0f..826467cc0 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -152,6 +152,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
                         default=False)
 
+    parser.add_argument("--match-submitter-images", action="store_true",
+                        default=False, dest="match_local_docker",
+                        help="Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.")
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--submit-request-uuid",
                          default=None,
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 3c7e9cfaa..753c2c250 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -246,7 +246,8 @@ class ArvadosContainer(JobBase):
                                                                     runtimeContext.pull_image,
                                                                     runtimeContext.project_uuid,
                                                                     runtimeContext.force_docker_pull,
-                                                                    runtimeContext.tmp_outdir_prefix)
+                                                                    runtimeContext.tmp_outdir_prefix,
+                                                                    runtimeContext.match_local_docker)
 
         network_req, _ = self.get_requirement("NetworkAccess")
         if network_req:
diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py
index 26408317c..04e2a4cff 100644
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -6,6 +6,8 @@ import logging
 import sys
 import threading
 import copy
+import re
+import subprocess
 
 from schema_salad.sourceline import SourceLine
 
@@ -18,8 +20,44 @@ logger = logging.getLogger('arvados.cwl-runner')
 cached_lookups = {}
 cached_lookups_lock = threading.Lock()
 
+def determine_image_id(dockerImageId):
+    for line in (
+        subprocess.check_output(  # nosec
+            ["docker", "images", "--no-trunc", "--all"]
+        )
+        .decode("utf-8")
+        .splitlines()
+    ):
+        try:
+            match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+            split = dockerImageId.split(":")
+            if len(split) == 1:
+                split.append("latest")
+            elif len(split) == 2:
+                #  if split[1] doesn't  match valid tag names, it is a part of repository
+                if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = "latest"
+            elif len(split) == 3:
+                if re.match(r"[\w][\w.-]{0,127}", split[2]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = split[2]
+                    del split[2]
+
+            # check for repository:tag match or image id match
+            if match and (
+                (split[0] == match.group(1) and split[1] == match.group(2))
+                or dockerImageId == match.group(3)
+            ):
+                return match.group(3)
+        except ValueError:
+            pass
+
+    return None
+
+
 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
-                         force_pull, tmp_outdir_prefix):
+                         force_pull, tmp_outdir_prefix, match_local_docker):
     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
 
     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
@@ -46,6 +84,20 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                                                                 image_name=image_name,
                                                                 image_tag=image_tag)
 
+        if images and match_local_docker:
+            local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+            if local_image_id:
+                # find it in the list
+                found = False
+                for i in images:
+                    if i[1]["dockerhash"] == local_image_id:
+                        found = True
+                        images = [i]
+                        break
+                if not found:
+                    # force re-upload.
+                    images = []
+
         if not images:
             # Fetch Docker image if necessary.
             try:
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 1e04dd577..4239dd3b5 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -36,6 +36,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.cluster_target_id = 0
         self.always_submit_runner = False
         self.collection_cache_size = 256
+        self.match_local_docker = False
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 7d6d287a2..ad17950a2 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -461,12 +461,14 @@ def upload_docker(arvrunner, tool):
                     "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
         else:
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs:"+__version__},
                                                        True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
-                                                       arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                       arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                       arvrunner.runtimeContext.match_local_docker)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             upload_docker(arvrunner, s.embedded_tool)
@@ -503,7 +505,8 @@ def packed_workflow(arvrunner, tool, merged_map):
                 v["http://arvados.org/cwl#dockerCollectionPDH"] = arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, v, True,
                                                                                                              arvrunner.project_uuid,
                                                                                                              arvrunner.runtimeContext.force_docker_pull,
-                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                                                                             arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                                                                             arvrunner.runtimeContext.match_local_docker)
             for l in v:
                 visit(v[l], cur_id)
         if isinstance(v, list):
@@ -610,7 +613,8 @@ def arvados_jobs_image(arvrunner, img):
     try:
         return arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid,
                                                           arvrunner.runtimeContext.force_docker_pull,
-                                                          arvrunner.runtimeContext.tmp_outdir_prefix)
+                                                          arvrunner.runtimeContext.tmp_outdir_prefix,
+                                                          arvrunner.runtimeContext.match_local_docker)
     except Exception as e:
         raise Exception("Docker image %s is not available\n%s" % (img, e) )
 

commit bd354c220f5155b4ac539bf889d5813004be1b4b
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Feb 18 09:50:16 2022 -0500

    Fix arvbox-demo refs #18789
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/arvbox/lib/arvbox/docker/common.sh b/tools/arvbox/lib/arvbox/docker/common.sh
index 5e16264c8..cb41227c9 100644
--- a/tools/arvbox/lib/arvbox/docker/common.sh
+++ b/tools/arvbox/lib/arvbox/docker/common.sh
@@ -13,7 +13,7 @@ export R_LIBS=/var/lib/Rlibs
 export HOME=$(getent passwd arvbox | cut -d: -f6)
 export ARVADOS_CONTAINER_PATH=/var/lib/arvados-arvbox
 export GEM_HOME=$HOME/.gem
-GEMLOCK=$GEM_HOME/gems.lock
+GEMLOCK=$HOME/gems.lock
 
 defaultdev=$(/sbin/ip route|awk '/default/ { print $5 }')
 dockerip=$(/sbin/ip route | grep default | awk '{ print $3 }')

commit d6d3a01e41965d75c45ec187628385d14fa1b73e
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 17 20:36:11 2022 -0500

    Readjust arvbox GEM_HOME to ~/.gem to avoid stepping on base image gems
    
    Instead of replacing the entire /var/lib/arvados/lib/ruby/gems
    directory, mount it in the place of the default user directory that
    'gem' already looks for.
    
    refs #18789
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox
index e7d03677e..e021b442f 100755
--- a/tools/arvbox/bin/arvbox
+++ b/tools/arvbox/bin/arvbox
@@ -143,7 +143,7 @@ docker_run_dev() {
            "--volume=$PG_DATA:/var/lib/postgresql:rw" \
            "--volume=$VAR_DATA:$ARVADOS_CONTAINER_PATH:rw" \
            "--volume=$PASSENGER:/var/lib/passenger:rw" \
-	   "--volume=$GEMS:/var/lib/arvados/lib/ruby/gems:rw" \
+	   "--volume=$GEMS:/var/lib/arvados-arvbox/.gem:rw" \
            "--volume=$PIPCACHE:/var/lib/pip:rw" \
            "--volume=$NPMCACHE:/var/lib/npm:rw" \
            "--volume=$GOSTUFF:/var/lib/gopath:rw" \
diff --git a/tools/arvbox/lib/arvbox/docker/common.sh b/tools/arvbox/lib/arvbox/docker/common.sh
index c44e7c410..5e16264c8 100644
--- a/tools/arvbox/lib/arvbox/docker/common.sh
+++ b/tools/arvbox/lib/arvbox/docker/common.sh
@@ -12,7 +12,8 @@ export npm_config_cache_min=Infinity
 export R_LIBS=/var/lib/Rlibs
 export HOME=$(getent passwd arvbox | cut -d: -f6)
 export ARVADOS_CONTAINER_PATH=/var/lib/arvados-arvbox
-GEMLOCK=/var/lib/arvados/lib/ruby/gems/gems.lock
+export GEM_HOME=$HOME/.gem
+GEMLOCK=$GEM_HOME/gems.lock
 
 defaultdev=$(/sbin/ip route|awk '/default/ { print $5 }')
 dockerip=$(/sbin/ip route | grep default | awk '{ print $3 }')
@@ -63,7 +64,7 @@ else
 fi
 
 run_bundler() {
-    flock $GEMLOCK /var/lib/arvados/bin/gem install --no-document bundler:$BUNDLER_VERSION
+    flock $GEMLOCK /var/lib/arvados/bin/gem install --no-document --user bundler:$BUNDLER_VERSION
     if test -f Gemfile.lock ; then
         frozen=--frozen
     else
diff --git a/tools/arvbox/lib/arvbox/docker/service/ready/run-service b/tools/arvbox/lib/arvbox/docker/service/ready/run-service
index 6ec788589..5007fe0be 100755
--- a/tools/arvbox/lib/arvbox/docker/service/ready/run-service
+++ b/tools/arvbox/lib/arvbox/docker/service/ready/run-service
@@ -63,7 +63,7 @@ fi
 
 if ! [[ -z "$waiting" ]] ; then
     if ps x | grep -v grep | grep "bundle install" > /dev/null; then
-        gemcount=$(ls /var/lib/arvados/lib/ruby/gems/*/gems 2>/dev/null | wc -l)
+        gemcount=$(ls /var/lib/arvados/lib/ruby/gems/*/gems /var/lib/arvados-arvbox/.gem/ruby/*/gems 2>/dev/null | wc -l)
 
         gemlockcount=0
         for l in /usr/src/arvados/services/api/Gemfile.lock \

commit cda738f2e87224ac813a8241d838454a35951bd8
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 17 15:49:45 2022 -0500

    Update cwltool dependency for restored Python 3.6 compatability
    
    refs #18766
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index e73923595..e1883d737 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -36,7 +36,7 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.1.20220210171524',
+          'cwltool==3.1.20220217190813',
           'schema-salad==8.2.20211116214159',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',

commit fe0b6872a1e3b2f14a039595877c949eb42edbbe
Author: Ward Vandewege <ward at curii.com>
Date:   Fri Feb 18 08:34:02 2022 -0500

    18676: make v2 token check more strict, and add the ["GET /"] scope to
           the new anonymous token code path.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/lib/config/load.go b/lib/config/load.go
index 7136c60e4..8d498af17 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -358,7 +358,7 @@ func (ldr *Loader) checkToken(label, token string, mandatory bool, acceptV2 bool
 		if len(tmp) != 3 {
 			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
 		}
-		if strings.Index(token, "v2/") == -1 {
+		if !strings.HasPrefix(token, "v2/") {
 			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
 		}
 		ldr.Logger.Warnf("%s: token is a full V2 token, should just be a secret (remove everything up to and including the last forward slash)", label)
diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb
index f8454029d..c74c1ce5b 100644
--- a/services/api/app/models/api_client_authorization.rb
+++ b/services/api/app/models/api_client_authorization.rb
@@ -130,7 +130,8 @@ class ApiClientAuthorization < ArvadosModel
       return ApiClientAuthorization.new(user: User.find_by_uuid(anonymous_user_uuid),
                                         uuid: Rails.configuration.ClusterID+"-gj3su-anonymouspublic",
                                         api_token: token,
-                                        api_client: anonymous_user_token_api_client)
+                                        api_client: anonymous_user_token_api_client,
+                                        scopes: ['GET /'])
     else
       return nil
     end

commit feee123922e58c7977373dfdb1167b53d80338e3
Author: Ward Vandewege <ward at curii.com>
Date:   Thu Feb 17 13:03:33 2022 -0500

    18676: be a bit more specific in testing for V2 tokens.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/lib/config/load.go b/lib/config/load.go
index e412c87ff..7136c60e4 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -358,6 +358,9 @@ func (ldr *Loader) checkToken(label, token string, mandatory bool, acceptV2 bool
 		if len(tmp) != 3 {
 			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
 		}
+		if strings.Index(token, "v2/") == -1 {
+			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
+		}
 		ldr.Logger.Warnf("%s: token is a full V2 token, should just be a secret (remove everything up to and including the last forward slash)", label)
 		if !acceptableTokenRe.MatchString(tmp[2]) {
 			return fmt.Errorf("%s: unacceptable characters in V2 token secret (only a-z, A-Z, 0-9 are acceptable)", label)

commit ebc7387c7b443104aa162c4d34fb7a750cc8d5b4
Author: Ward Vandewege <ward at curii.com>
Date:   Thu Feb 17 11:07:20 2022 -0500

    18676: tolerate V2 anonymous tokens in config.yml, but generate a
           warning.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 5c2a36f4f..943bc3e0e 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -34,7 +34,7 @@ h2(#main). development main (as of 2022-02-10)
 
 h3. Anonymous token changes
 
-The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary.
+The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary. If the anonymous token in @config.yml@ is specified as a full V2 token, that will now generate a warning - it should be updated to list just the secret (i.e. the part after the last forward slash).
 
 h3. Preemptible instance types are used automatically, if any are configured
 
diff --git a/lib/config/load.go b/lib/config/load.go
index aa7520ca2..e412c87ff 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -299,10 +299,10 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
 		for _, err = range []error{
 			ldr.checkClusterID(fmt.Sprintf("Clusters.%s", id), id, false),
 			ldr.checkClusterID(fmt.Sprintf("Clusters.%s.Login.LoginCluster", id), cc.Login.LoginCluster, true),
-			ldr.checkToken(fmt.Sprintf("Clusters.%s.ManagementToken", id), cc.ManagementToken, true),
-			ldr.checkToken(fmt.Sprintf("Clusters.%s.SystemRootToken", id), cc.SystemRootToken, true),
-			ldr.checkToken(fmt.Sprintf("Clusters.%s.Users.AnonymousUserToken", id), cc.Users.AnonymousUserToken, false),
-			ldr.checkToken(fmt.Sprintf("Clusters.%s.Collections.BlobSigningKey", id), cc.Collections.BlobSigningKey, true),
+			ldr.checkToken(fmt.Sprintf("Clusters.%s.ManagementToken", id), cc.ManagementToken, true, false),
+			ldr.checkToken(fmt.Sprintf("Clusters.%s.SystemRootToken", id), cc.SystemRootToken, true, false),
+			ldr.checkToken(fmt.Sprintf("Clusters.%s.Users.AnonymousUserToken", id), cc.Users.AnonymousUserToken, false, true),
+			ldr.checkToken(fmt.Sprintf("Clusters.%s.Collections.BlobSigningKey", id), cc.Collections.BlobSigningKey, true, false),
 			checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
 			ldr.checkEnum("Containers.LocalKeepLogsToContainerLog", cc.Containers.LocalKeepLogsToContainerLog, "none", "all", "errors"),
 			ldr.checkEmptyKeepstores(cc),
@@ -316,6 +316,11 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
 				return nil, err
 			}
 		}
+		if strings.Count(cc.Users.AnonymousUserToken, "/") == 3 {
+			// V2 token, strip it to just a secret
+			tmp := strings.Split(cc.Users.AnonymousUserToken, "/")
+			cc.Users.AnonymousUserToken = tmp[2]
+		}
 	}
 	return &cfg, nil
 }
@@ -334,7 +339,7 @@ func (ldr *Loader) checkClusterID(label, clusterID string, emptyStringOk bool) e
 var acceptableTokenRe = regexp.MustCompile(`^[a-zA-Z0-9]+$`)
 var acceptableTokenLength = 32
 
-func (ldr *Loader) checkToken(label, token string, mandatory bool) error {
+func (ldr *Loader) checkToken(label, token string, mandatory bool, acceptV2 bool) error {
 	if len(token) == 0 {
 		if !mandatory {
 			// when a token is not mandatory, the acceptable length and content is only checked if its length is non-zero
@@ -345,7 +350,18 @@ func (ldr *Loader) checkToken(label, token string, mandatory bool) error {
 			}
 		}
 	} else if !acceptableTokenRe.MatchString(token) {
-		return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
+		if !acceptV2 {
+			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
+		}
+		// Test for a proper V2 token
+		tmp := strings.SplitN(token, "/", 3)
+		if len(tmp) != 3 {
+			return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
+		}
+		ldr.Logger.Warnf("%s: token is a full V2 token, should just be a secret (remove everything up to and including the last forward slash)", label)
+		if !acceptableTokenRe.MatchString(tmp[2]) {
+			return fmt.Errorf("%s: unacceptable characters in V2 token secret (only a-z, A-Z, 0-9 are acceptable)", label)
+		}
 	} else if len(token) < acceptableTokenLength {
 		if ldr.Logger != nil {
 			ldr.Logger.Warnf("%s: token is too short (should be at least %d characters)", label, acceptableTokenLength)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list