[arvados] created: 2.7.1-20-g5ec8ceec97

Thu Feb 8 14:50:47 UTC 2024

at  5ec8ceec97c75ad8f9c9a850a5c248ebade33198 (commit)


commit 5ec8ceec97c75ad8f9c9a850a5c248ebade33198
Author: Brett Smith <brett.smith at curii.com>
Date:   Fri Feb 2 10:30:10 2024 -0500

    Merge branch '21417-keepdocker-oci-layout'
    
    Closes #21417.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 7b7367080f..6823ee1bea 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -2,34 +2,29 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from builtins import next
 import argparse
 import collections
 import datetime
 import errno
+import fcntl
 import json
+import logging
 import os
 import re
+import subprocess
 import sys
 import tarfile
 import tempfile
-import shutil
-import _strptime
-import fcntl
+
+import ciso8601
 from operator import itemgetter
 from stat import *
 
-import subprocess
-
 import arvados
+import arvados.config
 import arvados.util
 import arvados.commands._util as arv_cmd
 import arvados.commands.put as arv_put
-from arvados.collection import CollectionReader
-import ciso8601
-import logging
-import arvados.config
-
 from arvados._version import __version__
 
 logger = logging.getLogger('arvados.keepdocker')
@@ -356,6 +351,25 @@ def _uuid2pdh(api, uuid):
         select=['portable_data_hash'],
     ).execute()['items'][0]['portable_data_hash']
 
+def load_image_metadata(image_file):
+    """Load an image manifest and config from an archive
+
+    Given an image archive as an open binary file object, this function loads
+    the image manifest and configuration, deserializing each from JSON and
+    returning them in a 2-tuple of dicts.
+    """
+    image_file.seek(0)
+    with tarfile.open(fileobj=image_file) as image_tar:
+        with image_tar.extractfile('manifest.json') as manifest_file:
+            image_manifest_list = json.load(manifest_file)
+        # Because arv-keepdocker only saves one image, there should only be
+        # one manifest.  This extracts that from the list and raises
+        # ValueError if there's not exactly one.
+        image_manifest, = image_manifest_list
+        with image_tar.extractfile(image_manifest['Config']) as config_file:
+            image_config = json.load(config_file)
+    return image_manifest, image_config
+
 def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
     args = arg_parser.parse_args(arguments)
     if api is None:
@@ -532,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
         # Managed properties could be already set
         coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
         coll_properties.update({"docker-image-repo-tag": image_repo_tag})
-
         api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
 
-        # Read the image metadata and make Arvados links from it.
-        image_file.seek(0)
-        image_tar = tarfile.open(fileobj=image_file)
-        image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
-        if image_hash_type:
-            json_filename = raw_image_hash + '.json'
-        else:
-            json_filename = raw_image_hash + '/json'
-        json_file = image_tar.extractfile(image_tar.getmember(json_filename))
-        image_metadata = json.loads(json_file.read().decode('utf-8'))
-        json_file.close()
-        image_tar.close()
+        _, image_metadata = load_image_metadata(image_file)
         link_base = {'head_uuid': coll_uuid, 'properties': {}}
         if 'created' in image_metadata:
             link_base['properties']['image_timestamp'] = image_metadata['created']
diff --git a/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar b/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar
new file mode 100644
index 0000000000..a4b3d86390
Binary files /dev/null and b/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar differ
diff --git a/sdk/python/tests/data/hello-world-ManifestV2.tar b/sdk/python/tests/data/hello-world-ManifestV2.tar
new file mode 100644
index 0000000000..b98e7c7acd
Binary files /dev/null and b/sdk/python/tests/data/hello-world-ManifestV2.tar differ
diff --git a/sdk/python/tests/data/hello-world-README.txt b/sdk/python/tests/data/hello-world-README.txt
new file mode 100644
index 0000000000..8c6a7de31e
--- /dev/null
+++ b/sdk/python/tests/data/hello-world-README.txt
@@ -0,0 +1,25 @@
+The hello-world-*.tar files are archived from the official Docker
+hello-world:latest image available on 2024-02-01,
+sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a.
+<https://github.com/docker-library/hello-world/tree/a2269bdb107d086851a5e3d448cf47770b50bff7>
+
+Copyright (c) 2014 Docker, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/sdk/python/tests/test_arv_keepdocker.py b/sdk/python/tests/test_arv_keepdocker.py
index 526fd68727..9aebc03504 100644
--- a/sdk/python/tests/test_arv_keepdocker.py
+++ b/sdk/python/tests/test_arv_keepdocker.py
@@ -2,23 +2,24 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from __future__ import absolute_import
 import arvados
 import collections
+import collections.abc
 import copy
 import hashlib
+import logging
 import mock
 import os
 import subprocess
 import sys
 import tempfile
 import unittest
-import logging
+from pathlib import Path
+
+import parameterized
 
 import arvados.commands.keepdocker as arv_keepdocker
 from . import arvados_testutil as tutil
-from . import run_test_server
-
 
 class StopTest(Exception):
     pass
@@ -226,3 +227,30 @@ class ArvKeepdockerTestCase(unittest.TestCase, tutil.VersionChecker):
         api().collections().update.assert_called_with(
             uuid=mocked_collection['uuid'],
             body={'properties': updated_properties})
+
+
+ at parameterized.parameterized_class(('filename',), [
+    ('hello-world-ManifestV2.tar',),
+    ('hello-world-ManifestV2-OCILayout.tar',),
+])
+class ImageMetadataTestCase(unittest.TestCase):
+    DATA_PATH = Path(__file__).parent / 'data'
+
+    @classmethod
+    def setUpClass(cls):
+        cls.image_file = (cls.DATA_PATH / cls.filename).open('rb')
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.image_file.close()
+
+    def setUp(self):
+        self.manifest, self.config = arv_keepdocker.load_image_metadata(self.image_file)
+
+    def test_image_manifest(self):
+        self.assertIsInstance(self.manifest, collections.abc.Mapping)
+        self.assertEqual(self.manifest.get('RepoTags'), ['hello-world:latest'])
+
+    def test_image_config(self):
+        self.assertIsInstance(self.config, collections.abc.Mapping)
+        self.assertEqual(self.config.get('created'), '2023-05-02T16:49:27Z')

commit 9f3b192794007f74f71415c3cc4fbe049e98fbc2
Author: Brett Smith <brett.smith at curii.com>
Date:   Fri Feb 2 02:38:14 2024 -0500

    Merge branch '21429-remember-docker-state'
    
    Closes #21429.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
index abc63a2e92..6f0970b17f 100644
--- a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
+++ b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
@@ -22,9 +22,16 @@ ensure_umount() {
 # First make sure docker is not using /tmp, then unmount everything under it.
 if [ -d /etc/sv/docker.io ]
 then
+  # TODO: Actually detect Docker state with runit
+  DOCKER_ACTIVE=true
   sv stop docker.io || service stop docker.io || true
 else
-  systemctl disable --now docker.service docker.socket || true
+  if systemctl --quiet is-active docker.service docker.socket; then
+    systemctl stop docker.service docker.socket || true
+    DOCKER_ACTIVE=true
+  else
+    DOCKER_ACTIVE=false
+  fi
 fi
 
 ensure_umount "$MOUNTPATH/docker/aufs"
@@ -38,13 +45,18 @@ cat <<EOF > /etc/docker/daemon.json
 }
 EOF
 
+if ! $DOCKER_ACTIVE; then
+  # Nothing else to do
+  exit 0
+fi
+
 # restart docker
 if [ -d /etc/sv/docker.io ]
 then
   ## runit
   sv up docker.io
 else
-  systemctl enable --now docker.service docker.socket
+  systemctl start docker.service docker.socket || true
 fi
 
 end=$((SECONDS+60))
diff --git a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh
index a76dc12109..726ff0cdcd 100644
--- a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh
+++ b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh
@@ -119,9 +119,16 @@ mkfs.xfs -f "$CRYPTPATH"
 # First make sure docker is not using /tmp, then unmount everything under it.
 if [ -d /etc/sv/docker.io ]
 then
+  # TODO: Actually detect Docker state with runit
+  DOCKER_ACTIVE=true
   sv stop docker.io || service stop docker.io || true
 else
-  systemctl disable --now docker.service docker.socket || true
+  if systemctl --quiet is-active docker.service docker.socket; then
+    systemctl stop docker.service docker.socket || true
+    DOCKER_ACTIVE=true
+  else
+    DOCKER_ACTIVE=false
+  fi
 fi
 
 ensure_umount "$MOUNTPATH/docker/aufs"
@@ -137,13 +144,18 @@ cat <<EOF > /etc/docker/daemon.json
 }
 EOF
 
+if ! $DOCKER_ACTIVE; then
+  # Nothing else to do
+  exit 0
+fi
+
 # restart docker
 if [ -d /etc/sv/docker.io ]
 then
   ## runit
   sv up docker.io
 else
-  systemctl enable --now docker.service docker.socket || true
+  systemctl start docker.service docker.socket || true
 fi
 
 end=$((SECONDS+60))

commit d955c244e56a93710b4f939c1f9747777a86c846
Author: Tom Clegg <tom at curii.com>
Date:   Tue Jan 16 14:05:50 2024 -0500

    Merge branch '21379-user-activity-remote-collection'
    
    fixes #21379
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index ded96c3121..66d03b2041 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -96,6 +96,7 @@ collectionNameCache = {}
 def getCollectionName(arv, uuid, pdh):
     lookupField = uuid
     filters = [["uuid", "=", uuid]]
+    order = None
     cached = uuid in collectionNameCache
     # look up by uuid if it is available, fall back to look up by pdh
     if uuid is None or len(uuid) != 27:
@@ -105,10 +106,11 @@ def getCollectionName(arv, uuid, pdh):
         # name, if the uuid for the request is not known.
         lookupField = pdh
         filters = [["portable_data_hash", "=", pdh]]
+        order = "created_at"
         cached = pdh in collectionNameCache
 
     if not cached:
-        u = arv.collections().list(filters=filters, order="created_at", limit=1).execute().get("items")
+        u = arv.collections().list(filters=filters, order=order, limit=1, count="none").execute().get("items")
         if len(u) < 1:
             return "(deleted)"
         collectionNameCache[lookupField] = u[0]["name"]

commit dc9a9898c1453912dd067656516b007f63ad4a01
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Jan 9 16:14:14 2024 -0500

    Add arvbox check for fs.inotify.max_user_watches, refs #21349
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox
index 358e61ca57..9effbe0fb2 100755
--- a/tools/arvbox/bin/arvbox
+++ b/tools/arvbox/bin/arvbox
@@ -431,6 +431,14 @@ check() {
             exit 1
         ;;
     esac
+
+    user_watches=$(/usr/sbin/sysctl fs.inotify.max_user_watches)
+    [[ $user_watches =~ fs.inotify.max_user_watches\ =\ ([0-9]+) ]] && value=${BASH_REMATCH[1]}
+    if [[ "$value" -lt 256000 ]] ; then
+	echo "Not enough file system listeners ($value), to fix this run:"
+	echo "sudo sh -c 'echo fs.inotify.max_user_watches=524288 >> /etc/sysctl.d/local.conf && sysctl --system'"
+	exit 1
+    fi
 }
 
 subcmd="$1"

commit 90785a12ee48dedf23228dcd468fdbf8332b2e24
Author: Tom Clegg <tom at curii.com>
Date:   Tue Jan 9 11:04:09 2024 -0500

    Merge branch '21036-keep-balance-metrics'
    
    fixes #21036
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index b7b3fb6123..3b1a10f7aa 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -555,6 +555,10 @@ func (s *runSuite) TestDryRun(c *check.C) {
 	c.Check(bal.stats.trashesDeferred, check.Not(check.Equals), 0)
 	c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
 	c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
+
+	metrics := arvadostest.GatherMetricsAsString(srv.Metrics.reg)
+	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_trash_entries_deferred_count [1-9].*`)
+	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_pull_entries_deferred_count [1-9].*`)
 }
 
 func (s *runSuite) TestCommit(c *check.C) {
@@ -592,6 +596,36 @@ func (s *runSuite) TestCommit(c *check.C) {
 	c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
 	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio [1-9].*`)
 	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio [1-9].*`)
+
+	for _, cat := range []string{
+		"dedup_byte_ratio", "dedup_block_ratio", "collection_bytes",
+		"referenced_bytes", "referenced_blocks", "reference_count",
+		"pull_entries_sent_count",
+		"trash_entries_sent_count",
+	} {
+		c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+` [1-9].*`)
+	}
+
+	for _, cat := range []string{
+		"pull_entries_deferred_count",
+		"trash_entries_deferred_count",
+	} {
+		c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+` 0\n.*`)
+	}
+
+	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="0"} [1-9].*`)
+	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="1"} [1-9].*`)
+	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="9"} 0\n.*`)
+
+	for _, sub := range []string{"replicas", "blocks", "bytes"} {
+		for _, cat := range []string{"needed", "unneeded", "unachievable", "pulling"} {
+			c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_usage_`+sub+`{status="`+cat+`",storage_class="default"} [1-9].*`)
+		}
+		for _, cat := range []string{"total", "garbage", "transient", "overreplicated", "underreplicated", "unachievable", "balanced", "desired", "lost"} {
+			c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+`_`+sub+` [0-9].*`)
+		}
+	}
+	c.Logf("%s", metrics)
 }
 
 func (s *runSuite) TestChunkPrefix(c *check.C) {
diff --git a/services/keep-balance/metrics.go b/services/keep-balance/metrics.go
index 4683b67b98..02cee3955f 100644
--- a/services/keep-balance/metrics.go
+++ b/services/keep-balance/metrics.go
@@ -7,6 +7,7 @@ package keepbalance
 import (
 	"fmt"
 	"net/http"
+	"strconv"
 	"sync"
 
 	"github.com/prometheus/client_golang/prometheus"
@@ -17,18 +18,20 @@ type observer interface{ Observe(float64) }
 type setter interface{ Set(float64) }
 
 type metrics struct {
-	reg         *prometheus.Registry
-	statsGauges map[string]setter
-	observers   map[string]observer
-	setupOnce   sync.Once
-	mtx         sync.Mutex
+	reg            *prometheus.Registry
+	statsGauges    map[string]setter
+	statsGaugeVecs map[string]*prometheus.GaugeVec
+	observers      map[string]observer
+	setupOnce      sync.Once
+	mtx            sync.Mutex
 }
 
 func newMetrics(registry *prometheus.Registry) *metrics {
 	return &metrics{
-		reg:         registry,
-		statsGauges: map[string]setter{},
-		observers:   map[string]observer{},
+		reg:            registry,
+		statsGauges:    map[string]setter{},
+		statsGaugeVecs: map[string]*prometheus.GaugeVec{},
+		observers:      map[string]observer{},
 	}
 }
 
@@ -63,9 +66,24 @@ func (m *metrics) UpdateStats(s balancerStats) {
 		"transient":         {s.unref, "transient (unreferenced, new)"},
 		"overreplicated":    {s.overrep, "overreplicated"},
 		"underreplicated":   {s.underrep, "underreplicated"},
+		"unachievable":      {s.unachievable, "unachievable"},
+		"balanced":          {s.justright, "optimally balanced"},
+		"desired":           {s.desired, "desired"},
 		"lost":              {s.lost, "lost"},
 		"dedup_byte_ratio":  {s.dedupByteRatio(), "deduplication ratio, bytes referenced / bytes stored"},
 		"dedup_block_ratio": {s.dedupBlockRatio(), "deduplication ratio, blocks referenced / blocks stored"},
+		"collection_bytes":  {s.collectionBytes, "total apparent size of all collections"},
+		"referenced_bytes":  {s.collectionBlockBytes, "total size of unique referenced blocks"},
+		"reference_count":   {s.collectionBlockRefs, "block references in all collections"},
+		"referenced_blocks": {s.collectionBlocks, "blocks referenced by any collection"},
+
+		"pull_entries_sent_count":      {s.pulls, "total entries sent in pull lists"},
+		"pull_entries_deferred_count":  {s.pullsDeferred, "total entries deferred (not sent) in pull lists"},
+		"trash_entries_sent_count":     {s.trashes, "total entries sent in trash lists"},
+		"trash_entries_deferred_count": {s.trashesDeferred, "total entries deferred (not sent) in trash lists"},
+
+		"replicated_block_count": {s.replHistogram, "blocks with indicated number of replicas at last count"},
+		"usage":                  {s.classStats, "stored in indicated storage class"},
 	}
 	m.setupOnce.Do(func() {
 		// Register gauge(s) for each balancerStats field.
@@ -87,6 +105,29 @@ func (m *metrics) UpdateStats(s balancerStats) {
 				}
 			case int, int64, float64:
 				addGauge(name, gauge.Help)
+			case []int:
+				// replHistogram
+				gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+					Namespace: "arvados",
+					Name:      name,
+					Subsystem: "keep",
+					Help:      gauge.Help,
+				}, []string{"replicas"})
+				m.reg.MustRegister(gv)
+				m.statsGaugeVecs[name] = gv
+			case map[string]replicationStats:
+				// classStats
+				for _, sub := range []string{"blocks", "bytes", "replicas"} {
+					name := name + "_" + sub
+					gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+						Namespace: "arvados",
+						Name:      name,
+						Subsystem: "keep",
+						Help:      gauge.Help,
+					}, []string{"storage_class", "status"})
+					m.reg.MustRegister(gv)
+					m.statsGaugeVecs[name] = gv
+				}
 			default:
 				panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
 			}
@@ -105,6 +146,38 @@ func (m *metrics) UpdateStats(s balancerStats) {
 			m.statsGauges[name].Set(float64(val))
 		case float64:
 			m.statsGauges[name].Set(float64(val))
+		case []int:
+			// replHistogram
+			for r, n := range val {
+				m.statsGaugeVecs[name].WithLabelValues(strconv.Itoa(r)).Set(float64(n))
+			}
+			// Record zero for higher-than-max-replication
+			// metrics, so we don't incorrectly continue
+			// to report stale metrics.
+			//
+			// For example, if we previously reported n=1
+			// for repl=6, but have since restarted
+			// keep-balance and the most replicated block
+			// now has repl=5, then the repl=6 gauge will
+			// still say n=1 until we clear it explicitly
+			// here.
+			for r := len(val); r < len(val)+4 || r < len(val)*2; r++ {
+				m.statsGaugeVecs[name].WithLabelValues(strconv.Itoa(r)).Set(0)
+			}
+		case map[string]replicationStats:
+			// classStats
+			for class, cs := range val {
+				for label, val := range map[string]blocksNBytes{
+					"needed":       cs.needed,
+					"unneeded":     cs.unneeded,
+					"pulling":      cs.pulling,
+					"unachievable": cs.unachievable,
+				} {
+					m.statsGaugeVecs[name+"_blocks"].WithLabelValues(class, label).Set(float64(val.blocks))
+					m.statsGaugeVecs[name+"_bytes"].WithLabelValues(class, label).Set(float64(val.bytes))
+					m.statsGaugeVecs[name+"_replicas"].WithLabelValues(class, label).Set(float64(val.replicas))
+				}
+			}
 		default:
 			panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
 		}

commit d3c78a1ba3183ddcfd29b58525837dd4d6e0b092
Author: Tom Clegg <tom at curii.com>
Date:   Mon Jan 8 16:39:38 2024 -0500

    Merge branch '21285-max-gw-tunnels'
    
    refs #21285
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/_includes/_hpc_max_gateway_tunnels.liquid b/doc/_includes/_hpc_max_gateway_tunnels.liquid
new file mode 100644
index 0000000000..ba8769c653
--- /dev/null
+++ b/doc/_includes/_hpc_max_gateway_tunnels.liquid
@@ -0,0 +1,18 @@
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+h3(#MaxGatewayTunnels). API.MaxGatewayTunnels
+
+Each Arvados container that runs on your HPC cluster will bring up a long-lived connection to the Arvados controller and keep it open for the entire duration of the container. This connection is used to access real-time container logs from Workbench, and to enable the "container shell":{{site.baseurl}}/install/container-shell-access.html feature.
+
+Set the @MaxGatewayTunnels@ config entry high enough to accommodate the maximum number of containers you expect to run concurrently on your HPC cluster, plus incoming container shell sessions.
+
+<notextile>
+<pre>    API:
+      MaxGatewayTunnels: 2000</pre>
+</notextile>
+
+Also, configure Nginx (and any other HTTP proxies or load balancers running between the HPC and Arvados controller) to allow the expected number of connections, i.e., @MaxConcurrentRequests + MaxQueuedRequests + MaxGatewayTunnels at .
diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 63c124b7c9..fe15518253 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -28,6 +28,14 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
+h2(#2_7_2). v2.7.2 (2024-02-??)
+
+"previous: Upgrading to 2.7.1":#v2_7_1
+
+h3. Check MaxGatewayTunnels config
+
+If you use the LSF or Slurm dispatcher, ensure the new @API.MaxGatewayTunnels@ config entry is high enough to support the size of your cluster. See "LSF docs":{{site.baseurl}}/install/crunch2-lsf/install-dispatch.html#MaxGatewayTunnels or "Slurm docs":{{site.baseurl}}/install/crunch2-slurm/install-dispatch.html#MaxGatewayTunnels for details.
+
 h2(#2_7_1). v2.7.1 (2023-12-12)
 
 "previous: Upgrading to 2.7.0":#v2_7_0
diff --git a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
index d4328d89a3..fc4393d0b6 100644
--- a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
@@ -40,6 +40,8 @@ Add a DispatchLSF entry to the Services section, using the hostname where @arvad
 
 Review the following configuration parameters and adjust as needed.
 
+{% include 'hpc_max_gateway_tunnels' %}
+
 h3(#BsubSudoUser). Containers.LSF.BsubSudoUser
 
 arvados-dispatch-lsf uses @sudo@ to execute @bsub@, for example @sudo -E -u crunch bsub [...]@. This means the @crunch@ account must exist on the hosts where LSF jobs run ("execution hosts"), as well as on the host where you are installing the Arvados LSF dispatcher (the "submission host"). To use a user account other than @crunch@, configure @BsubSudoUser@:
diff --git a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
index 554f53dd38..16af80d127 100644
--- a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
@@ -41,6 +41,8 @@ Add a DispatchSLURM entry to the Services section, using the hostname where @cru
 
 The following configuration parameters are optional.
 
+{% include 'hpc_max_gateway_tunnels' %}
+
 h3(#PollPeriod). Containers.PollInterval
 
 crunch-dispatch-slurm polls the API server periodically for new containers to run.  The @PollInterval@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @ns@, @us@, @ms@, @s@, @m@, or @h at .  For example:
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 844e67bfcc..9004b3f64f 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -231,6 +231,10 @@ Clusters:
       # also effectively limited by MaxConcurrentRailsRequests (see
       # below) because most controller requests proxy through to the
       # RailsAPI service.
+      #
+      # HTTP proxies and load balancers downstream of arvados services
+      # should be configured to allow at least {MaxConcurrentRequest +
+      # MaxQueuedRequests + MaxGatewayTunnels} concurrent requests.
       MaxConcurrentRequests: 64
 
       # Maximum number of concurrent requests to process concurrently
@@ -250,6 +254,13 @@ Clusters:
       # the incoming request queue before returning 503.
       MaxQueueTimeForLockRequests: 2s
 
+      # Maximum number of active gateway tunnel connections. One slot
+      # is consumed by each "container shell" connection. If using an
+      # HPC dispatcher (LSF or Slurm), one slot is consumed by each
+      # running container.  These do not count toward
+      # MaxConcurrentRequests.
+      MaxGatewayTunnels: 1000
+
       # Fraction of MaxConcurrentRequests that can be "log create"
       # messages at any given time.  This is to prevent logging
       # updates from crowding out more important requests.
diff --git a/lib/config/export.go b/lib/config/export.go
index 674b37473e..ce947893fe 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -70,6 +70,7 @@ var whitelist = map[string]bool{
 	"API.LogCreateRequestFraction":             false,
 	"API.MaxConcurrentRailsRequests":           false,
 	"API.MaxConcurrentRequests":                false,
+	"API.MaxGatewayTunnels":                    false,
 	"API.MaxIndexDatabaseRead":                 false,
 	"API.MaxItemsPerResponse":                  true,
 	"API.MaxKeepBlobBuffers":                   false,
diff --git a/lib/controller/rpc/conn.go b/lib/controller/rpc/conn.go
index a8ecc57bba..9f518d9c7a 100644
--- a/lib/controller/rpc/conn.go
+++ b/lib/controller/rpc/conn.go
@@ -482,11 +482,11 @@ func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string,
 		} else {
 			message = fmt.Sprintf("%q", body)
 		}
-		return connresp, fmt.Errorf("server did not provide a tunnel: %s: %s", resp.Status, message)
+		return connresp, httpserver.ErrorWithStatus(fmt.Errorf("server did not provide a tunnel: %s: %s", resp.Status, message), resp.StatusCode)
 	}
 	if strings.ToLower(resp.Header.Get("Upgrade")) != upgradeHeader ||
 		strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
-		return connresp, fmt.Errorf("bad response from server: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection"))
+		return connresp, httpserver.ErrorWithStatus(fmt.Errorf("bad response from server: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection")), http.StatusBadGateway)
 	}
 	connresp.Conn = netconn
 	connresp.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go
index 30f8957a2d..5b68e2c50e 100644
--- a/lib/crunchrun/container_gateway.go
+++ b/lib/crunchrun/container_gateway.go
@@ -220,7 +220,7 @@ func (gw *Gateway) runTunnel(addr string) error {
 		AuthSecret: gw.AuthSecret,
 	})
 	if err != nil {
-		return fmt.Errorf("error creating gateway tunnel: %s", err)
+		return fmt.Errorf("error creating gateway tunnel: %w", err)
 	}
 	mux, err := yamux.Client(tun.Conn, nil)
 	if err != nil {
diff --git a/lib/service/cmd.go b/lib/service/cmd.go
index 725f86f3bd..82e95fe0b4 100644
--- a/lib/service/cmd.go
+++ b/lib/service/cmd.go
@@ -16,6 +16,7 @@ import (
 	_ "net/http/pprof"
 	"net/url"
 	"os"
+	"regexp"
 	"strings"
 	"time"
 
@@ -148,32 +149,13 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 		return 1
 	}
 
-	maxReqs := cluster.API.MaxConcurrentRequests
-	if maxRails := cluster.API.MaxConcurrentRailsRequests; maxRails > 0 &&
-		(maxRails < maxReqs || maxReqs == 0) &&
-		strings.HasSuffix(prog, "controller") {
-		// Ideally, we would accept up to
-		// MaxConcurrentRequests, and apply the
-		// MaxConcurrentRailsRequests limit only for requests
-		// that require calling upstream to RailsAPI. But for
-		// now we make the simplifying assumption that every
-		// controller request causes an upstream RailsAPI
-		// request.
-		maxReqs = maxRails
-	}
 	instrumented := httpserver.Instrument(reg, log,
 		httpserver.HandlerWithDeadline(cluster.API.RequestTimeout.Duration(),
 			httpserver.AddRequestIDs(
 				httpserver.Inspect(reg, cluster.ManagementToken,
 					httpserver.LogRequests(
 						interceptHealthReqs(cluster.ManagementToken, handler.CheckHealth,
-							&httpserver.RequestLimiter{
-								Handler:                    handler,
-								MaxConcurrent:              maxReqs,
-								MaxQueue:                   cluster.API.MaxQueuedRequests,
-								MaxQueueTimeForMinPriority: cluster.API.MaxQueueTimeForLockRequests.Duration(),
-								Priority:                   c.requestPriority,
-								Registry:                   reg}))))))
+							c.requestLimiter(handler, cluster, reg)))))))
 	srv := &httpserver.Server{
 		Server: http.Server{
 			Handler:     ifCollectionInHost(instrumented, instrumented.ServeAPI(cluster.ManagementToken, instrumented)),
@@ -212,7 +194,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 		<-handler.Done()
 		srv.Close()
 	}()
-	go c.requestQueueDumpCheck(cluster, maxReqs, prog, reg, &srv.Server, logger)
+	go c.requestQueueDumpCheck(cluster, prog, reg, &srv.Server, logger)
 	err = srv.Wait()
 	if err != nil {
 		return 1
@@ -221,12 +203,13 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 }
 
 // If SystemLogs.RequestQueueDumpDirectory is set, monitor the
-// server's incoming HTTP request queue size. When it exceeds 90% of
-// API.MaxConcurrentRequests, write the /_inspect/requests data to a
-// JSON file in the specified directory.
-func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, maxReqs int, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) {
+// server's incoming HTTP request limiters. When the number of
+// concurrent requests in any queue ("api" or "tunnel") exceeds 90% of
+// its maximum slots, write the /_inspect/requests data to a JSON file
+// in the specified directory.
+func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) {
 	outdir := cluster.SystemLogs.RequestQueueDumpDirectory
-	if outdir == "" || cluster.ManagementToken == "" || maxReqs < 1 {
+	if outdir == "" || cluster.ManagementToken == "" {
 		return
 	}
 	logger = logger.WithField("worker", "RequestQueueDump")
@@ -237,16 +220,29 @@ func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, maxReqs int, p
 			logger.WithError(err).Warn("error getting metrics")
 			continue
 		}
-		dump := false
+		cur := map[string]int{} // queue label => current
+		max := map[string]int{} // queue label => max
 		for _, mf := range mfs {
-			if mf.Name != nil && *mf.Name == "arvados_concurrent_requests" && len(mf.Metric) == 1 {
-				n := int(mf.Metric[0].GetGauge().GetValue())
-				if n > 0 && n >= maxReqs*9/10 {
-					dump = true
-					break
+			for _, m := range mf.GetMetric() {
+				for _, ml := range m.GetLabel() {
+					if ml.GetName() == "queue" {
+						n := int(m.GetGauge().GetValue())
+						if name := mf.GetName(); name == "arvados_concurrent_requests" {
+							cur[*ml.Value] = n
+						} else if name == "arvados_max_concurrent_requests" {
+							max[*ml.Value] = n
+						}
+					}
 				}
 			}
 		}
+		dump := false
+		for queue, n := range cur {
+			if n > 0 && max[queue] > 0 && n >= max[queue]*9/10 {
+				dump = true
+				break
+			}
+		}
 		if dump {
 			req, err := http.NewRequest("GET", "/_inspect/requests", nil)
 			if err != nil {
@@ -269,6 +265,67 @@ func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, maxReqs int, p
 	}
 }
 
+// Set up a httpserver.RequestLimiter with separate queues/streams for
+// API requests (obeying MaxConcurrentRequests etc) and gateway tunnel
+// requests (obeying MaxGatewayTunnels).
+func (c *command) requestLimiter(handler http.Handler, cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler {
+	maxReqs := cluster.API.MaxConcurrentRequests
+	if maxRails := cluster.API.MaxConcurrentRailsRequests; maxRails > 0 &&
+		(maxRails < maxReqs || maxReqs == 0) &&
+		c.svcName == arvados.ServiceNameController {
+		// Ideally, we would accept up to
+		// MaxConcurrentRequests, and apply the
+		// MaxConcurrentRailsRequests limit only for requests
+		// that require calling upstream to RailsAPI. But for
+		// now we make the simplifying assumption that every
+		// controller request causes an upstream RailsAPI
+		// request.
+		maxReqs = maxRails
+	}
+	rqAPI := &httpserver.RequestQueue{
+		Label:                      "api",
+		MaxConcurrent:              maxReqs,
+		MaxQueue:                   cluster.API.MaxQueuedRequests,
+		MaxQueueTimeForMinPriority: cluster.API.MaxQueueTimeForLockRequests.Duration(),
+	}
+	rqTunnel := &httpserver.RequestQueue{
+		Label:         "tunnel",
+		MaxConcurrent: cluster.API.MaxGatewayTunnels,
+		MaxQueue:      0,
+	}
+	return &httpserver.RequestLimiter{
+		Handler:  handler,
+		Priority: c.requestPriority,
+		Registry: reg,
+		Queue: func(req *http.Request) *httpserver.RequestQueue {
+			if req.Method == http.MethodPost && reTunnelPath.MatchString(req.URL.Path) {
+				return rqTunnel
+			} else {
+				return rqAPI
+			}
+		},
+	}
+}
+
+// reTunnelPath matches paths of API endpoints that go in the "tunnel"
+// queue.
+var reTunnelPath = regexp.MustCompile(func() string {
+	rePathVar := regexp.MustCompile(`{.*?}`)
+	out := ""
+	for _, endpoint := range []arvados.APIEndpoint{
+		arvados.EndpointContainerGatewayTunnel,
+		arvados.EndpointContainerGatewayTunnelCompat,
+		arvados.EndpointContainerSSH,
+		arvados.EndpointContainerSSHCompat,
+	} {
+		if out != "" {
+			out += "|"
+		}
+		out += `\Q/` + rePathVar.ReplaceAllString(endpoint.Path, `\E[^/]*\Q`) + `\E`
+	}
+	return "^(" + out + ")$"
+}())
+
 func (c *command) requestPriority(req *http.Request, queued time.Time) int64 {
 	switch {
 	case req.Method == http.MethodPost && strings.HasPrefix(req.URL.Path, "/arvados/v1/containers/") && strings.HasSuffix(req.URL.Path, "/lock"):
diff --git a/lib/service/cmd_test.go b/lib/service/cmd_test.go
index 08b3a239dc..9ead90019e 100644
--- a/lib/service/cmd_test.go
+++ b/lib/service/cmd_test.go
@@ -17,6 +17,8 @@ import (
 	"net/url"
 	"os"
 	"strings"
+	"sync"
+	"sync/atomic"
 	"testing"
 	"time"
 
@@ -198,15 +200,24 @@ func (*Suite) TestCommand(c *check.C) {
 	c.Check(stderr.String(), check.Matches, `(?ms).*"msg":"CheckHealth called".*`)
 }
 
-func (s *Suite) TestDumpRequestsKeepweb(c *check.C) {
-	s.testDumpRequests(c, arvados.ServiceNameKeepweb, "MaxConcurrentRequests")
+func (s *Suite) TestTunnelPathRegexp(c *check.C) {
+	c.Check(reTunnelPath.MatchString(`/arvados/v1/connect/zzzzz-dz642-aaaaaaaaaaaaaaa/gateway_tunnel`), check.Equals, true)
+	c.Check(reTunnelPath.MatchString(`/arvados/v1/containers/zzzzz-dz642-aaaaaaaaaaaaaaa/gateway_tunnel`), check.Equals, true)
+	c.Check(reTunnelPath.MatchString(`/arvados/v1/connect/zzzzz-dz642-aaaaaaaaaaaaaaa/ssh`), check.Equals, true)
+	c.Check(reTunnelPath.MatchString(`/arvados/v1/containers/zzzzz-dz642-aaaaaaaaaaaaaaa/ssh`), check.Equals, true)
+	c.Check(reTunnelPath.MatchString(`/blah/arvados/v1/containers/zzzzz-dz642-aaaaaaaaaaaaaaa/ssh`), check.Equals, false)
+	c.Check(reTunnelPath.MatchString(`/arvados/v1/containers/zzzzz-dz642-aaaaaaaaaaaaaaa`), check.Equals, false)
 }
 
-func (s *Suite) TestDumpRequestsController(c *check.C) {
-	s.testDumpRequests(c, arvados.ServiceNameController, "MaxConcurrentRailsRequests")
+func (s *Suite) TestRequestLimitsAndDumpRequests_Keepweb(c *check.C) {
+	s.testRequestLimitAndDumpRequests(c, arvados.ServiceNameKeepweb, "MaxConcurrentRequests")
 }
 
-func (*Suite) testDumpRequests(c *check.C, serviceName arvados.ServiceName, maxReqsConfigKey string) {
+func (s *Suite) TestRequestLimitsAndDumpRequests_Controller(c *check.C) {
+	s.testRequestLimitAndDumpRequests(c, arvados.ServiceNameController, "MaxConcurrentRailsRequests")
+}
+
+func (*Suite) testRequestLimitAndDumpRequests(c *check.C, serviceName arvados.ServiceName, maxReqsConfigKey string) {
 	defer func(orig time.Duration) { requestQueueDumpCheckInterval = orig }(requestQueueDumpCheckInterval)
 	requestQueueDumpCheckInterval = time.Second / 10
 
@@ -218,6 +229,7 @@ func (*Suite) testDumpRequests(c *check.C, serviceName arvados.ServiceName, maxR
 	defer cf.Close()
 
 	max := 24
+	maxTunnels := 30
 	fmt.Fprintf(cf, `
 Clusters:
  zzzzz:
@@ -225,7 +237,8 @@ Clusters:
   ManagementToken: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
   API:
    `+maxReqsConfigKey+`: %d
-   MaxQueuedRequests: 0
+   MaxQueuedRequests: 1
+   MaxGatewayTunnels: %d
   SystemLogs: {RequestQueueDumpDirectory: %q}
   Services:
    Controller:
@@ -234,14 +247,18 @@ Clusters:
    WebDAV:
     ExternalURL: "http://localhost:`+port+`"
     InternalURLs: {"http://localhost:`+port+`": {}}
-`, max, tmpdir)
+`, max, maxTunnels, tmpdir)
 	cf.Close()
 
 	started := make(chan bool, max+1)
 	hold := make(chan bool)
 	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		started <- true
-		<-hold
+		if strings.Contains(r.URL.Path, "/ssh") || strings.Contains(r.URL.Path, "/gateway_tunnel") {
+			<-hold
+		} else {
+			started <- true
+			<-hold
+		}
 	})
 	healthCheck := make(chan bool, 1)
 	ctx, cancel := context.WithCancel(context.Background())
@@ -267,15 +284,59 @@ Clusters:
 	}
 	client := http.Client{}
 	deadline := time.Now().Add(time.Second * 2)
+	var activeReqs sync.WaitGroup
+
+	// Start some API reqs
+	var apiResp200, apiResp503 int64
 	for i := 0; i < max+1; i++ {
+		activeReqs.Add(1)
+		go func() {
+			defer activeReqs.Done()
+			target := "http://localhost:" + port + "/testpath"
+			resp, err := client.Get(target)
+			for err != nil && strings.Contains(err.Error(), "dial tcp") && deadline.After(time.Now()) {
+				time.Sleep(time.Second / 100)
+				resp, err = client.Get(target)
+			}
+			if c.Check(err, check.IsNil) {
+				if resp.StatusCode == http.StatusOK {
+					atomic.AddInt64(&apiResp200, 1)
+				} else if resp.StatusCode == http.StatusServiceUnavailable {
+					atomic.AddInt64(&apiResp503, 1)
+				}
+			}
+		}()
+	}
+
+	// Start some gateway tunnel reqs that don't count toward our
+	// API req limit
+	extraTunnelReqs := 20
+	var tunnelResp200, tunnelResp503 int64
+	var paths = []string{
+		"/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", "z1234-dz642-abcdeabcdeabcde", -1),
+		"/" + strings.Replace(arvados.EndpointContainerSSHCompat.Path, "{uuid}", "z1234-dz642-abcdeabcdeabcde", -1),
+		"/" + strings.Replace(arvados.EndpointContainerGatewayTunnel.Path, "{uuid}", "z1234-dz642-abcdeabcdeabcde", -1),
+		"/" + strings.Replace(arvados.EndpointContainerGatewayTunnelCompat.Path, "{uuid}", "z1234-dz642-abcdeabcdeabcde", -1),
+	}
+	for i := 0; i < maxTunnels+extraTunnelReqs; i++ {
+		i := i
+		activeReqs.Add(1)
 		go func() {
-			resp, err := client.Get("http://localhost:" + port + "/testpath")
+			defer activeReqs.Done()
+			target := "http://localhost:" + port + paths[i%len(paths)]
+			resp, err := client.Post(target, "application/octet-stream", nil)
 			for err != nil && strings.Contains(err.Error(), "dial tcp") && deadline.After(time.Now()) {
 				time.Sleep(time.Second / 100)
-				resp, err = client.Get("http://localhost:" + port + "/testpath")
+				resp, err = client.Post(target, "application/octet-stream", nil)
 			}
 			if c.Check(err, check.IsNil) {
-				c.Logf("resp StatusCode %d", resp.StatusCode)
+				if resp.StatusCode == http.StatusOK {
+					atomic.AddInt64(&tunnelResp200, 1)
+				} else if resp.StatusCode == http.StatusServiceUnavailable {
+					atomic.AddInt64(&tunnelResp503, 1)
+				} else {
+					c.Errorf("tunnel response code %d", resp.StatusCode)
+				}
 			}
 		}()
 	}
@@ -284,6 +345,10 @@ Clusters:
 		case <-started:
 		case <-time.After(time.Second):
 			c.Logf("%s", stderr.String())
+			c.Logf("apiResp200 %d", apiResp200)
+			c.Logf("apiResp503 %d", apiResp503)
+			c.Logf("tunnelResp200 %d", tunnelResp200)
+			c.Logf("tunnelResp503 %d", tunnelResp503)
 			c.Fatal("timed out")
 		}
 	}
@@ -300,6 +365,20 @@ Clusters:
 		var loaded []struct{ URL string }
 		err = json.Unmarshal(j, &loaded)
 		c.Check(err, check.IsNil)
+
+		for i := 0; i < len(loaded); i++ {
+			if strings.Contains(loaded[i].URL, "/ssh") || strings.Contains(loaded[i].URL, "/gateway_tunnel") {
+				// Filter out a gateway tunnel req
+				// that doesn't count toward our API
+				// req limit
+				if i < len(loaded)-1 {
+					copy(loaded[i:], loaded[i+1:])
+					i--
+				}
+				loaded = loaded[:len(loaded)-1]
+			}
+		}
+
 		if len(loaded) < max {
 			// Dumped when #requests was >90% but <100% of
 			// limit. If we stop now, we won't be able to
@@ -309,7 +388,7 @@ Clusters:
 			c.Logf("loaded dumped requests, but len %d < max %d -- still waiting", len(loaded), max)
 			continue
 		}
-		c.Check(loaded, check.HasLen, max)
+		c.Check(loaded, check.HasLen, max+1)
 		c.Check(loaded[0].URL, check.Equals, "/testpath")
 		break
 	}
@@ -328,7 +407,8 @@ Clusters:
 		c.Check(err, check.IsNil)
 		switch path {
 		case "/metrics":
-			c.Check(string(buf), check.Matches, `(?ms).*arvados_concurrent_requests `+fmt.Sprintf("%d", max)+`\n.*`)
+			c.Check(string(buf), check.Matches, `(?ms).*arvados_concurrent_requests{queue="api"} `+fmt.Sprintf("%d", max)+`\n.*`)
+			c.Check(string(buf), check.Matches, `(?ms).*arvados_queued_requests{priority="normal",queue="api"} 1\n.*`)
 		case "/_inspect/requests":
 			c.Check(string(buf), check.Matches, `(?ms).*"URL":"/testpath".*`)
 		default:
@@ -336,6 +416,11 @@ Clusters:
 		}
 	}
 	close(hold)
+	activeReqs.Wait()
+	c.Check(int(apiResp200), check.Equals, max+1)
+	c.Check(int(apiResp503), check.Equals, 0)
+	c.Check(int(tunnelResp200), check.Equals, maxTunnels)
+	c.Check(int(tunnelResp503), check.Equals, extraTunnelReqs)
 	cancel()
 }
 
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index e2ad7b089d..e39a1ff0aa 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -102,6 +102,7 @@ type Cluster struct {
 		MaxConcurrentRailsRequests       int
 		MaxConcurrentRequests            int
 		MaxQueuedRequests                int
+		MaxGatewayTunnels                int
 		MaxQueueTimeForLockRequests      Duration
 		LogCreateRequestFraction         float64
 		MaxKeepBlobBuffers               int
diff --git a/sdk/go/httpserver/request_limiter.go b/sdk/go/httpserver/request_limiter.go
index 9d501ab0eb..1e3316ed48 100644
--- a/sdk/go/httpserver/request_limiter.go
+++ b/sdk/go/httpserver/request_limiter.go
@@ -34,13 +34,8 @@ const metricsUpdateInterval = time.Second
 type RequestLimiter struct {
 	Handler http.Handler
 
-	// Maximum number of requests being handled at once. Beyond
-	// this limit, requests will be queued.
-	MaxConcurrent int
-
-	// Maximum number of requests in the queue. Beyond this limit,
-	// the lowest priority requests will return 503.
-	MaxQueue int
+	// Queue determines which queue a request is assigned to.
+	Queue func(req *http.Request) *RequestQueue
 
 	// Priority determines queue ordering. Requests with higher
 	// priority are handled first. Requests with equal priority
@@ -48,11 +43,6 @@ type RequestLimiter struct {
 	// handled FIFO.
 	Priority func(req *http.Request, queued time.Time) int64
 
-	// Return 503 for any request for which Priority() returns
-	// MinPriority if it spends longer than this in the queue
-	// before starting processing.
-	MaxQueueTimeForMinPriority time.Duration
-
 	// "concurrent_requests", "max_concurrent_requests",
 	// "queued_requests", and "max_queued_requests" metrics are
 	// registered with Registry, if it is not nil.
@@ -63,11 +53,32 @@ type RequestLimiter struct {
 	mQueueTimeout *prometheus.SummaryVec
 	mQueueUsage   *prometheus.GaugeVec
 	mtx           sync.Mutex
-	handling      int
-	queue         queue
+	rqs           map[*RequestQueue]bool // all RequestQueues in use
+}
+
+type RequestQueue struct {
+	// Label for metrics. No two queues should have the same label.
+	Label string
+
+	// Maximum number of requests being handled at once. Beyond
+	// this limit, requests will be queued.
+	MaxConcurrent int
+
+	// Maximum number of requests in the queue. Beyond this limit,
+	// the lowest priority requests will return 503.
+	MaxQueue int
+
+	// Return 503 for any request for which Priority() returns
+	// MinPriority if it spends longer than this in the queue
+	// before starting processing.
+	MaxQueueTimeForMinPriority time.Duration
+
+	queue    queue
+	handling int
 }
 
 type qent struct {
+	rq       *RequestQueue
 	queued   time.Time
 	priority int64
 	heappos  int
@@ -121,101 +132,96 @@ func (h *queue) remove(i int) {
 
 func (rl *RequestLimiter) setup() {
 	if rl.Registry != nil {
-		rl.Registry.MustRegister(prometheus.NewGaugeFunc(
-			prometheus.GaugeOpts{
-				Namespace: "arvados",
-				Name:      "concurrent_requests",
-				Help:      "Number of requests in progress",
-			},
-			func() float64 {
-				rl.mtx.Lock()
-				defer rl.mtx.Unlock()
-				return float64(rl.handling)
-			},
-		))
-		rl.Registry.MustRegister(prometheus.NewGaugeFunc(
-			prometheus.GaugeOpts{
-				Namespace: "arvados",
-				Name:      "max_concurrent_requests",
-				Help:      "Maximum number of concurrent requests",
-			},
-			func() float64 { return float64(rl.MaxConcurrent) },
-		))
+		mCurrentReqs := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Name:      "concurrent_requests",
+			Help:      "Number of requests in progress",
+		}, []string{"queue"})
+		rl.Registry.MustRegister(mCurrentReqs)
+		mMaxReqs := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Name:      "max_concurrent_requests",
+			Help:      "Maximum number of concurrent requests",
+		}, []string{"queue"})
+		rl.Registry.MustRegister(mMaxReqs)
+		mMaxQueue := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Name:      "max_queued_requests",
+			Help:      "Maximum number of queued requests",
+		}, []string{"queue"})
+		rl.Registry.MustRegister(mMaxQueue)
 		rl.mQueueUsage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Namespace: "arvados",
 			Name:      "queued_requests",
 			Help:      "Number of requests in queue",
-		}, []string{"priority"})
+		}, []string{"queue", "priority"})
 		rl.Registry.MustRegister(rl.mQueueUsage)
-		rl.Registry.MustRegister(prometheus.NewGaugeFunc(
-			prometheus.GaugeOpts{
-				Namespace: "arvados",
-				Name:      "max_queued_requests",
-				Help:      "Maximum number of queued requests",
-			},
-			func() float64 { return float64(rl.MaxQueue) },
-		))
 		rl.mQueueDelay = prometheus.NewSummaryVec(prometheus.SummaryOpts{
 			Namespace:  "arvados",
 			Name:       "queue_delay_seconds",
 			Help:       "Time spent in the incoming request queue before start of processing",
 			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
-		}, []string{"priority"})
+		}, []string{"queue", "priority"})
 		rl.Registry.MustRegister(rl.mQueueDelay)
 		rl.mQueueTimeout = prometheus.NewSummaryVec(prometheus.SummaryOpts{
 			Namespace:  "arvados",
 			Name:       "queue_timeout_seconds",
 			Help:       "Time spent in the incoming request queue before client timed out or disconnected",
 			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
-		}, []string{"priority"})
+		}, []string{"queue", "priority"})
 		rl.Registry.MustRegister(rl.mQueueTimeout)
 		go func() {
 			for range time.NewTicker(metricsUpdateInterval).C {
-				var low, normal, high int
 				rl.mtx.Lock()
-				for _, ent := range rl.queue {
-					switch {
-					case ent.priority < 0:
-						low++
-					case ent.priority > 0:
-						high++
-					default:
-						normal++
+				for rq := range rl.rqs {
+					var low, normal, high int
+					for _, ent := range rq.queue {
+						switch {
+						case ent.priority < 0:
+							low++
+						case ent.priority > 0:
+							high++
+						default:
+							normal++
+						}
 					}
+					mCurrentReqs.WithLabelValues(rq.Label).Set(float64(rq.handling))
+					mMaxReqs.WithLabelValues(rq.Label).Set(float64(rq.MaxConcurrent))
+					mMaxQueue.WithLabelValues(rq.Label).Set(float64(rq.MaxQueue))
+					rl.mQueueUsage.WithLabelValues(rq.Label, "low").Set(float64(low))
+					rl.mQueueUsage.WithLabelValues(rq.Label, "normal").Set(float64(normal))
+					rl.mQueueUsage.WithLabelValues(rq.Label, "high").Set(float64(high))
 				}
 				rl.mtx.Unlock()
-				rl.mQueueUsage.WithLabelValues("low").Set(float64(low))
-				rl.mQueueUsage.WithLabelValues("normal").Set(float64(normal))
-				rl.mQueueUsage.WithLabelValues("high").Set(float64(high))
 			}
 		}()
 	}
 }
 
 // caller must have lock
-func (rl *RequestLimiter) runqueue() {
+func (rq *RequestQueue) runqueue() {
 	// Handle entries from the queue as capacity permits
-	for len(rl.queue) > 0 && (rl.MaxConcurrent == 0 || rl.handling < rl.MaxConcurrent) {
-		rl.handling++
-		ent := rl.queue.removeMax()
+	for len(rq.queue) > 0 && (rq.MaxConcurrent == 0 || rq.handling < rq.MaxConcurrent) {
+		rq.handling++
+		ent := rq.queue.removeMax()
 		ent.ready <- true
 	}
 }
 
 // If the queue is too full, fail and remove the lowest-priority
 // entry. Caller must have lock. Queue must not be empty.
-func (rl *RequestLimiter) trimqueue() {
-	if len(rl.queue) <= rl.MaxQueue {
+func (rq *RequestQueue) trimqueue() {
+	if len(rq.queue) <= rq.MaxQueue {
 		return
 	}
 	min := 0
-	for i := range rl.queue {
-		if i == 0 || rl.queue.Less(min, i) {
+	for i := range rq.queue {
+		if i == 0 || rq.queue.Less(min, i) {
 			min = i
 		}
 	}
-	rl.queue[min].ready <- false
-	rl.queue.remove(min)
+	rq.queue[min].ready <- false
+	rq.queue.remove(min)
 }
 
 func (rl *RequestLimiter) enqueue(req *http.Request) *qent {
@@ -227,19 +233,24 @@ func (rl *RequestLimiter) enqueue(req *http.Request) *qent {
 		priority = rl.Priority(req, qtime)
 	}
 	ent := &qent{
+		rq:       rl.Queue(req),
 		queued:   qtime,
 		priority: priority,
 		ready:    make(chan bool, 1),
 		heappos:  -1,
 	}
-	if rl.MaxConcurrent == 0 || rl.MaxConcurrent > rl.handling {
+	if rl.rqs == nil {
+		rl.rqs = map[*RequestQueue]bool{}
+	}
+	rl.rqs[ent.rq] = true
+	if ent.rq.MaxConcurrent == 0 || ent.rq.MaxConcurrent > ent.rq.handling {
 		// fast path, skip the queue
-		rl.handling++
+		ent.rq.handling++
 		ent.ready <- true
 		return ent
 	}
-	rl.queue.add(ent)
-	rl.trimqueue()
+	ent.rq.queue.add(ent)
+	ent.rq.trimqueue()
 	return ent
 }
 
@@ -247,7 +258,7 @@ func (rl *RequestLimiter) remove(ent *qent) {
 	rl.mtx.Lock()
 	defer rl.mtx.Unlock()
 	if ent.heappos >= 0 {
-		rl.queue.remove(ent.heappos)
+		ent.rq.queue.remove(ent.heappos)
 		ent.ready <- false
 	}
 }
@@ -255,14 +266,14 @@ func (rl *RequestLimiter) remove(ent *qent) {
 func (rl *RequestLimiter) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 	rl.setupOnce.Do(rl.setup)
 	ent := rl.enqueue(req)
-	SetResponseLogFields(req.Context(), logrus.Fields{"priority": ent.priority})
+	SetResponseLogFields(req.Context(), logrus.Fields{"priority": ent.priority, "queue": ent.rq.Label})
 	if ent.priority == MinPriority {
 		// Note that MaxQueueTime==0 does not cancel a req
 		// that skips the queue, because in that case
 		// rl.enqueue() has already fired ready<-true and
 		// rl.remove() is a no-op.
 		go func() {
-			time.Sleep(rl.MaxQueueTimeForMinPriority)
+			time.Sleep(ent.rq.MaxQueueTimeForMinPriority)
 			rl.remove(ent)
 		}()
 	}
@@ -273,7 +284,7 @@ func (rl *RequestLimiter) ServeHTTP(resp http.ResponseWriter, req *http.Request)
 		// we still need to wait for ent.ready, because
 		// sometimes runqueue() will have already decided to
 		// send true before our rl.remove() call, and in that
-		// case we'll need to decrement rl.handling below.
+		// case we'll need to decrement ent.rq.handling below.
 		ok = <-ent.ready
 	case ok = <-ent.ready:
 	}
@@ -298,7 +309,7 @@ func (rl *RequestLimiter) ServeHTTP(resp http.ResponseWriter, req *http.Request)
 		default:
 			qlabel = "normal"
 		}
-		series.WithLabelValues(qlabel).Observe(time.Now().Sub(ent.queued).Seconds())
+		series.WithLabelValues(ent.rq.Label, qlabel).Observe(time.Now().Sub(ent.queued).Seconds())
 	}
 
 	if !ok {
@@ -308,9 +319,9 @@ func (rl *RequestLimiter) ServeHTTP(resp http.ResponseWriter, req *http.Request)
 	defer func() {
 		rl.mtx.Lock()
 		defer rl.mtx.Unlock()
-		rl.handling--
+		ent.rq.handling--
 		// unblock the next waiting request
-		rl.runqueue()
+		ent.rq.runqueue()
 	}()
 	rl.Handler.ServeHTTP(resp, req)
 }
diff --git a/sdk/go/httpserver/request_limiter_test.go b/sdk/go/httpserver/request_limiter_test.go
index 55f13b4625..7366e1426b 100644
--- a/sdk/go/httpserver/request_limiter_test.go
+++ b/sdk/go/httpserver/request_limiter_test.go
@@ -34,7 +34,11 @@ func newTestHandler() *testHandler {
 
 func (s *Suite) TestRequestLimiter1(c *check.C) {
 	h := newTestHandler()
-	l := RequestLimiter{MaxConcurrent: 1, Handler: h}
+	rq := &RequestQueue{
+		MaxConcurrent: 1}
+	l := RequestLimiter{
+		Queue:   func(*http.Request) *RequestQueue { return rq },
+		Handler: h}
 	var wg sync.WaitGroup
 	resps := make([]*httptest.ResponseRecorder, 10)
 	for i := 0; i < 10; i++ {
@@ -94,7 +98,11 @@ func (s *Suite) TestRequestLimiter1(c *check.C) {
 
 func (*Suite) TestRequestLimiter10(c *check.C) {
 	h := newTestHandler()
-	l := RequestLimiter{MaxConcurrent: 10, Handler: h}
+	rq := &RequestQueue{
+		MaxConcurrent: 10}
+	l := RequestLimiter{
+		Queue:   func(*http.Request) *RequestQueue { return rq },
+		Handler: h}
 	var wg sync.WaitGroup
 	for i := 0; i < 10; i++ {
 		wg.Add(1)
@@ -114,29 +122,32 @@ func (*Suite) TestRequestLimiter10(c *check.C) {
 
 func (*Suite) TestRequestLimiterQueuePriority(c *check.C) {
 	h := newTestHandler()
-	rl := RequestLimiter{
+	rq := &RequestQueue{
 		MaxConcurrent: 1000,
 		MaxQueue:      200,
-		Handler:       h,
+	}
+	rl := RequestLimiter{
+		Handler: h,
+		Queue:   func(*http.Request) *RequestQueue { return rq },
 		Priority: func(r *http.Request, _ time.Time) int64 {
 			p, _ := strconv.ParseInt(r.Header.Get("Priority"), 10, 64)
 			return p
 		}}
 
 	c.Logf("starting initial requests")
-	for i := 0; i < rl.MaxConcurrent; i++ {
+	for i := 0; i < rq.MaxConcurrent; i++ {
 		go func() {
 			rl.ServeHTTP(httptest.NewRecorder(), &http.Request{Header: http.Header{"No-Priority": {"x"}}})
 		}()
 	}
 	c.Logf("waiting for initial requests to consume all MaxConcurrent slots")
-	for i := 0; i < rl.MaxConcurrent; i++ {
+	for i := 0; i < rq.MaxConcurrent; i++ {
 		<-h.inHandler
 	}
 
-	c.Logf("starting %d priority=MinPriority requests (should respond 503 immediately)", rl.MaxQueue)
+	c.Logf("starting %d priority=MinPriority requests (should respond 503 immediately)", rq.MaxQueue)
 	var wgX sync.WaitGroup
-	for i := 0; i < rl.MaxQueue; i++ {
+	for i := 0; i < rq.MaxQueue; i++ {
 		wgX.Add(1)
 		go func() {
 			defer wgX.Done()
@@ -147,13 +158,13 @@ func (*Suite) TestRequestLimiterQueuePriority(c *check.C) {
 	}
 	wgX.Wait()
 
-	c.Logf("starting %d priority=MinPriority requests (should respond 503 after 100 ms)", rl.MaxQueue)
+	c.Logf("starting %d priority=MinPriority requests (should respond 503 after 100 ms)", rq.MaxQueue)
 	// Usage docs say the caller isn't allowed to change fields
 	// after first use, but we secretly know it's OK to change
 	// this field on the fly as long as no requests are arriving
 	// concurrently.
-	rl.MaxQueueTimeForMinPriority = time.Millisecond * 100
-	for i := 0; i < rl.MaxQueue; i++ {
+	rq.MaxQueueTimeForMinPriority = time.Millisecond * 100
+	for i := 0; i < rq.MaxQueue; i++ {
 		wgX.Add(1)
 		go func() {
 			defer wgX.Done()
@@ -162,17 +173,17 @@ func (*Suite) TestRequestLimiterQueuePriority(c *check.C) {
 			rl.ServeHTTP(resp, &http.Request{Header: http.Header{"Priority": {fmt.Sprintf("%d", MinPriority)}}})
 			c.Check(resp.Code, check.Equals, http.StatusServiceUnavailable)
 			elapsed := time.Since(t0)
-			c.Check(elapsed > rl.MaxQueueTimeForMinPriority, check.Equals, true)
-			c.Check(elapsed < rl.MaxQueueTimeForMinPriority*10, check.Equals, true)
+			c.Check(elapsed > rq.MaxQueueTimeForMinPriority, check.Equals, true)
+			c.Check(elapsed < rq.MaxQueueTimeForMinPriority*10, check.Equals, true)
 		}()
 	}
 	wgX.Wait()
 
-	c.Logf("starting %d priority=1 and %d priority=1 requests", rl.MaxQueue, rl.MaxQueue)
+	c.Logf("starting %d priority=1 and %d priority=1 requests", rq.MaxQueue, rq.MaxQueue)
 	var wg1, wg2 sync.WaitGroup
-	wg1.Add(rl.MaxQueue)
-	wg2.Add(rl.MaxQueue)
-	for i := 0; i < rl.MaxQueue*2; i++ {
+	wg1.Add(rq.MaxQueue)
+	wg2.Add(rq.MaxQueue)
+	for i := 0; i < rq.MaxQueue*2; i++ {
 		i := i
 		go func() {
 			pri := (i & 1) + 1
@@ -192,12 +203,12 @@ func (*Suite) TestRequestLimiterQueuePriority(c *check.C) {
 	wg1.Wait()
 
 	c.Logf("allowing initial requests to proceed")
-	for i := 0; i < rl.MaxConcurrent; i++ {
+	for i := 0; i < rq.MaxConcurrent; i++ {
 		h.okToProceed <- struct{}{}
 	}
 
 	c.Logf("allowing queued priority=2 requests to proceed")
-	for i := 0; i < rl.MaxQueue; i++ {
+	for i := 0; i < rq.MaxQueue; i++ {
 		<-h.inHandler
 		h.okToProceed <- struct{}{}
 	}

commit f887a690cfe502ce12c4c9f16a3f1141241ea143
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Jan 5 14:59:56 2024 -0500

    Merge branch '21290-sync-past-versions' refs #21290
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index b4660dbd35..16e85c0dd9 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -329,17 +329,7 @@ class Collection < ArvadosModel
   end
 
   def sync_past_versions
-    updates = self.syncable_updates
-    Collection.where('current_version_uuid = ? AND uuid != ?', self.uuid_before_last_save, self.uuid_before_last_save).each do |c|
-      c.attributes = updates
-      # Use a different validation context to skip the 'past_versions_cannot_be_updated'
-      # validator, as on this case it is legal to update some fields.
-      leave_modified_by_user_alone do
-        leave_modified_at_alone do
-          c.save(context: :update_old_versions)
-        end
-      end
-    end
+    Collection.where('current_version_uuid = ? AND uuid != ?', self.uuid_before_last_save, self.uuid_before_last_save).update_all self.syncable_updates
   end
 
   def versionable_updates?(attrs)

commit dc8f1e6ffdc021548d3013a2f6db45e11b2f451b
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Jan 2 10:02:51 2024 -0500

    Merge branch '21059-signup-email' refs #21059
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb
index c73f31a99d..7def490618 100644
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -31,8 +31,8 @@ class User < ArvadosModel
   after_update :setup_on_activate
 
   before_create :check_auto_admin
-  before_create :set_initial_username, :if => Proc.new {
-    username.nil? and email
+  before_validation :set_initial_username, :if => Proc.new {
+    new_record? && email
   }
   before_create :active_is_not_nil
   after_create :after_ownership_change
@@ -264,8 +264,7 @@ SELECT target_uuid, perm_level
   def setup(repo_name: nil, vm_uuid: nil, send_notification_email: nil)
     newly_invited = Link.where(tail_uuid: self.uuid,
                               head_uuid: all_users_group_uuid,
-                              link_class: 'permission',
-                              name: 'can_read').empty?
+                              link_class: 'permission').empty?
 
     # Add can_read link from this user to "all users" which makes this
     # user "invited", and (depending on config) a link in the opposite
@@ -387,6 +386,10 @@ SELECT target_uuid, perm_level
   end
 
   def set_initial_username(requested: false)
+    if new_record? and requested == false and self.username != nil and self.username != ""
+      requested = self.username
+    end
+
     if (!requested.is_a?(String) || requested.empty?) and email
       email_parts = email.partition("@")
       local_parts = email_parts.first.partition("+")
@@ -606,13 +609,64 @@ SELECT target_uuid, perm_level
 
   def self.update_remote_user remote_user
     remote_user = remote_user.symbolize_keys
+    remote_user_prefix = remote_user[:uuid][0..4]
+
+    # interaction between is_invited and is_active
+    #
+    # either can flag can be nil, true or false
+    #
+    # in all cases, we create the user if they don't exist.
+    #
+    # invited nil, active nil: don't call setup or unsetup.
+    #
+    # invited nil, active false: call unsetup
+    #
+    # invited nil, active true: call setup and activate them.
+    #
+    #
+    # invited false, active nil: call unsetup
+    #
+    # invited false, active false: call unsetup
+    #
+    # invited false, active true: call unsetup
+    #
+    #
+    # invited true, active nil: call setup but don't change is_active
+    #
+    # invited true, active false: call setup but don't change is_active
+    #
+    # invited true, active true: call setup and activate them.
+
+    should_setup = (remote_user_prefix == Rails.configuration.Login.LoginCluster or
+                    Rails.configuration.Users.AutoSetupNewUsers or
+                    Rails.configuration.Users.NewUsersAreActive or
+                    Rails.configuration.RemoteClusters[remote_user_prefix].andand["ActivateUsers"])
+
+    should_activate = (remote_user_prefix == Rails.configuration.Login.LoginCluster or
+                       Rails.configuration.Users.NewUsersAreActive or
+                       Rails.configuration.RemoteClusters[remote_user_prefix].andand["ActivateUsers"])
+
+    remote_should_be_unsetup = (remote_user[:is_invited] == nil && remote_user[:is_active] == false) ||
+                               (remote_user[:is_invited] == false)
+
+    remote_should_be_setup = should_setup && (
+      (remote_user[:is_invited] == nil && remote_user[:is_active] == true) ||
+      (remote_user[:is_invited] == false && remote_user[:is_active] == true) ||
+      (remote_user[:is_invited] == true))
+
+    remote_should_be_active = should_activate && remote_user[:is_invited] != false && remote_user[:is_active] == true
+
     begin
-      user = User.find_or_create_by(uuid: remote_user[:uuid])
+      user = User.create_with(email: remote_user[:email],
+                              username: remote_user[:username],
+                              first_name: remote_user[:first_name],
+                              last_name: remote_user[:last_name],
+                              is_active: remote_should_be_active
+      ).find_or_create_by(uuid: remote_user[:uuid])
     rescue ActiveRecord::RecordNotUnique
       retry
     end
 
-    remote_user_prefix = user.uuid[0..4]
     user.with_lock do
       needupdate = {}
       [:email, :username, :first_name, :last_name, :prefs].each do |k|
@@ -658,29 +712,19 @@ SELECT target_uuid, perm_level
         end
       end
 
-      if user.is_invited && remote_user[:is_invited] == false
-        # Remote user is not "invited" state, they should be unsetup, which
-        # also makes them inactive.
+      if remote_should_be_unsetup
+        # Remote user is not "invited" or "active" state on their home
+        # cluster, so they should be unsetup, which also makes them
+        # inactive.
         user.unsetup
       else
-        if !user.is_invited && remote_user[:is_invited] and
-          (remote_user_prefix == Rails.configuration.Login.LoginCluster or
-           Rails.configuration.Users.AutoSetupNewUsers or
-           Rails.configuration.Users.NewUsersAreActive or
-           Rails.configuration.RemoteClusters[remote_user_prefix].andand["ActivateUsers"])
-          # Remote user is 'invited' and should be set up
+        if !user.is_invited && remote_should_be_setup
           user.setup
         end
 
-        if !user.is_active && remote_user[:is_active] && user.is_invited and
-          (remote_user_prefix == Rails.configuration.Login.LoginCluster or
-           Rails.configuration.Users.NewUsersAreActive or
-           Rails.configuration.RemoteClusters[remote_user_prefix].andand["ActivateUsers"])
+        if !user.is_active && remote_should_be_active
           # remote user is active and invited, we need to activate them
           user.update!(is_active: true)
-        elsif user.is_active && remote_user[:is_active] == false
-          # remote user is not active, we need to de-activate them
-          user.update!(is_active: false)
         end
 
         if remote_user_prefix == Rails.configuration.Login.LoginCluster and
@@ -914,8 +958,9 @@ SELECT target_uuid, perm_level
 
   # Send admin notifications
   def send_admin_notifications
-    AdminNotifier.new_user(self).deliver_now
-    if not self.is_active then
+    if self.is_invited then
+      AdminNotifier.new_user(self).deliver_now
+    else
       AdminNotifier.new_inactive_user(self).deliver_now
     end
   end
diff --git a/services/api/app/views/admin_notifier/new_inactive_user.text.erb b/services/api/app/views/admin_notifier/new_inactive_user.text.erb
index afcf34da71..22298b1ce7 100644
--- a/services/api/app/views/admin_notifier/new_inactive_user.text.erb
+++ b/services/api/app/views/admin_notifier/new_inactive_user.text.erb
@@ -2,15 +2,16 @@
 
 SPDX-License-Identifier: AGPL-3.0 %>
 
+A new user has been created, but not set up.
 
-A new user landed on the inactive user page:
+  <%= @user.full_name %> <<%= @user.email %>> (<%= @user.username %>)
 
-  <%= @user.full_name %> <<%= @user.email %>>
+They will not be able to use Arvados unless set up by an admin.
 
 <% if Rails.configuration.Services.Workbench1.ExternalURL -%>
-Please see workbench for more information:
+Please see Workbench for more information:
 
-  <%= Rails.configuration.Services.Workbench1.ExternalURL %>
+  <%= URI::join(Rails.configuration.Services.Workbench1.ExternalURL, "user/#{@user.uuid}") %>
 
 <% end -%>
 Thanks,
diff --git a/services/api/app/views/admin_notifier/new_user.text.erb b/services/api/app/views/admin_notifier/new_user.text.erb
index 670b84b7c1..920906d833 100644
--- a/services/api/app/views/admin_notifier/new_user.text.erb
+++ b/services/api/app/views/admin_notifier/new_user.text.erb
@@ -2,22 +2,16 @@
 
 SPDX-License-Identifier: AGPL-3.0 %>
 
-<%
-  add_to_message = ''
-  if Rails.configuration.Users.AutoSetupNewUsers
-    add_to_message = @user.is_invited ? ' and setup' : ', but not setup'
-  end
-%>
-A new user has been created<%=add_to_message%>:
+A new user has been created and set up.
 
-  <%= @user.full_name %> <<%= @user.email %>>
+  <%= @user.full_name %> <<%= @user.email %>> (<%= @user.username %>)
 
-This user is <%= @user.is_active ? '' : 'NOT ' %>active.
+They are able to use Arvados.
 
 <% if Rails.configuration.Services.Workbench1.ExternalURL -%>
-Please see workbench for more information:
+Please see Workbench for more information:
 
-  <%= Rails.configuration.Services.Workbench1.ExternalURL %>
+  <%= URI::join(Rails.configuration.Services.Workbench1.ExternalURL, "user/#{@user.uuid}") %>
 
 <% end -%>
 Thanks,
diff --git a/services/api/app/views/user_notifier/account_is_setup.text.erb b/services/api/app/views/user_notifier/account_is_setup.text.erb
index 352ee7754e..3f04db8517 100644
--- a/services/api/app/views/user_notifier/account_is_setup.text.erb
+++ b/services/api/app/views/user_notifier/account_is_setup.text.erb
@@ -2,4 +2,4 @@
 
 SPDX-License-Identifier: AGPL-3.0 %>
 
-<%= ERB.new(Rails.configuration.Users.UserSetupMailText, 0, "-").result(binding) %>
+<%= ERB.new(Rails.configuration.Users.UserSetupMailText, trim_mode: "-").result(binding) %>
diff --git a/services/api/test/integration/remote_user_test.rb b/services/api/test/integration/remote_user_test.rb
index b0d1320fdf..f42fda4150 100644
--- a/services/api/test/integration/remote_user_test.rb
+++ b/services/api/test/integration/remote_user_test.rb
@@ -100,12 +100,15 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
       uuid: 'zbbbb-tpzed-000000000000001',
       email: 'foo at example.com',
       username: 'barney',
+      first_name: "Barney",
+      last_name: "Foo",
       is_admin: true,
       is_active: true,
       is_invited: true,
     }
     @stub_token_status = 200
     @stub_token_scopes = ["all"]
+    ActionMailer::Base.deliveries = []
   end
 
   teardown do
@@ -366,6 +369,12 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
 
   test 'get user from Login cluster' do
     Rails.configuration.Login.LoginCluster = 'zbbbb'
+    email_dest = ActiveSupport::OrderedOptions.new
+    email_dest[:'arvados-admin at example.com'] = ActiveSupport::OrderedOptions.new
+    Rails.configuration.Users.UserNotifierEmailBcc = email_dest
+    Rails.configuration.Users.NewUserNotificationRecipients = email_dest
+    Rails.configuration.Users.NewInactiveUserNotificationRecipients = email_dest
+
     get '/arvados/v1/users/current',
       params: {format: 'json'},
       headers: auth(remote: 'zbbbb')
@@ -375,14 +384,18 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
     assert_equal true, json_response['is_active']
     assert_equal 'foo at example.com', json_response['email']
     assert_equal 'barney', json_response['username']
+
+    assert_equal 2, ActionMailer::Base.deliveries.length
+    assert_equal "Welcome to Arvados - account enabled", ActionMailer::Base.deliveries[0].subject
+    assert_equal "[ARVADOS] New user created notification", ActionMailer::Base.deliveries[1].subject
   end
 
   [true, false].each do |trusted|
     [true, false].each do |logincluster|
-      [true, false].each do |admin|
-        [true, false].each do |active|
+      [true, false, nil].each do |admin|
+        [true, false, nil].each do |active|
           [true, false].each do |autosetup|
-            [true, false].each do |invited|
+            [true, false, nil].each do |invited|
               test "get invited=#{invited}, active=#{active}, admin=#{admin} user from #{if logincluster then "Login" else "peer" end} cluster when AutoSetupNewUsers=#{autosetup} ActivateUsers=#{trusted}" do
                 Rails.configuration.Login.LoginCluster = 'zbbbb' if logincluster
                 Rails.configuration.RemoteClusters['zbbbb'].ActivateUsers = trusted
@@ -400,9 +413,9 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
                     headers: auth(remote: 'zbbbb')
                 assert_response :success
                 assert_equal 'zbbbb-tpzed-000000000000001', json_response['uuid']
-                assert_equal (logincluster && admin && invited && active), json_response['is_admin']
-                assert_equal (invited and (logincluster || trusted || autosetup)), json_response['is_invited']
-                assert_equal (invited and (logincluster || trusted) and active), json_response['is_active']
+                assert_equal (logincluster && !!admin && (invited != false) && !!active), json_response['is_admin']
+                assert_equal ((invited == true || (invited == nil && !!active)) && (logincluster || trusted || autosetup)), json_response['is_invited']
+                assert_equal ((invited != false) && (logincluster || trusted) && !!active), json_response['is_active']
                 assert_equal 'foo at example.com', json_response['email']
                 assert_equal 'barney', json_response['username']
               end
diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb
index 8a41fb3976..6f49b2b109 100644
--- a/services/api/test/unit/user_test.rb
+++ b/services/api/test/unit/user_test.rb
@@ -347,10 +347,12 @@ class UserTest < ActiveSupport::TestCase
   test "create new user with notifications" do
     set_user_from_auth :admin
 
+    Rails.configuration.Users.AutoSetupNewUsers = false
+
     create_user_and_verify_setup_and_notifications true, active_notify_list, inactive_notify_list, nil, nil
     create_user_and_verify_setup_and_notifications true, active_notify_list, empty_notify_list, nil, nil
     create_user_and_verify_setup_and_notifications true, empty_notify_list, empty_notify_list, nil, nil
-    create_user_and_verify_setup_and_notifications false, active_notify_list, inactive_notify_list, nil, nil
+    create_user_and_verify_setup_and_notifications false, empty_notify_list, inactive_notify_list, nil, nil
     create_user_and_verify_setup_and_notifications false, empty_notify_list, inactive_notify_list, nil, nil
     create_user_and_verify_setup_and_notifications false, empty_notify_list, empty_notify_list, nil, nil
   end
@@ -379,13 +381,13 @@ class UserTest < ActiveSupport::TestCase
     [false, empty_notify_list, empty_notify_list, "arvados at example.com", false, false, "arvados2"],
     [true, active_notify_list, inactive_notify_list, "arvados at example.com", false, false, "arvados2"],
     [true, active_notify_list, inactive_notify_list, "root at example.com", true, false, "root2"],
-    [false, active_notify_list, inactive_notify_list, "root at example.com", true, false, "root2"],
+    [false, active_notify_list, empty_notify_list, "root at example.com", true, false, "root2"],
     [true, active_notify_list, inactive_notify_list, "roo_t at example.com", false, true, "root2"],
     [false, empty_notify_list, empty_notify_list, "^^incorrect_format at example.com", true, true, "incorrectformat"],
     [true, active_notify_list, inactive_notify_list, "&4a_d9. at example.com", true, true, "ad9"],
     [true, active_notify_list, inactive_notify_list, "&4a_d9. at example.com", false, false, "ad9"],
-    [false, active_notify_list, inactive_notify_list, "&4a_d9. at example.com", true, true, "ad9"],
-    [false, active_notify_list, inactive_notify_list, "&4a_d9. at example.com", false, false, "ad9"],
+    [false, active_notify_list, empty_notify_list, "&4a_d9. at example.com", true, true, "ad9"],
+    [false, active_notify_list, empty_notify_list, "&4a_d9. at example.com", false, false, "ad9"],
   ].each do |active, new_user_recipients, inactive_recipients, email, auto_setup_vm, auto_setup_repo, expect_username|
     test "create new user with auto setup active=#{active} email=#{email} vm=#{auto_setup_vm} repo=#{auto_setup_repo}" do
       set_user_from_auth :admin

commit e41bfa99c026caee2bf6e6e7288c98d901009de8
Author: Tom Clegg <tom at curii.com>
Date:   Tue Jan 2 09:34:21 2024 -0500

    Merge branch '21206-ws-requesting-container-uuid'
    
    closes #21206
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/ws/session_v0_test.go b/services/ws/session_v0_test.go
index 72668950a5..7d15543c05 100644
--- a/services/ws/session_v0_test.go
+++ b/services/ws/session_v0_test.go
@@ -211,6 +211,41 @@ func (s *v0Suite) TestEventTypeDelete(c *check.C) {
 	}
 }
 
+func (s *v0Suite) TestEventPropertiesFields(c *check.C) {
+	ac := arvados.NewClientFromEnv()
+	ac.AuthToken = s.token
+
+	conn, r, w, err := s.testClient()
+	c.Assert(err, check.IsNil)
+	defer conn.Close()
+
+	c.Check(w.Encode(map[string]interface{}{
+		"method":  "subscribe",
+		"filters": [][]string{{"object_uuid", "=", arvadostest.RunningContainerUUID}},
+	}), check.IsNil)
+	s.expectStatus(c, r, 200)
+
+	err = ac.RequestAndDecode(nil, "POST", "arvados/v1/logs", s.jsonBody("log", map[string]interface{}{
+		"object_uuid": arvadostest.RunningContainerUUID,
+		"event_type":  "update",
+		"properties": map[string]interface{}{
+			"new_attributes": map[string]interface{}{
+				"name":                      "namevalue",
+				"requesting_container_uuid": "uuidvalue",
+				"state":                     "statevalue",
+			},
+		},
+	}), nil)
+	c.Assert(err, check.IsNil)
+
+	lg := s.expectLog(c, r)
+	c.Check(lg.ObjectUUID, check.Equals, arvadostest.RunningContainerUUID)
+	c.Check(lg.EventType, check.Equals, "update")
+	c.Check(lg.Properties["new_attributes"].(map[string]interface{})["requesting_container_uuid"], check.Equals, "uuidvalue")
+	c.Check(lg.Properties["new_attributes"].(map[string]interface{})["name"], check.Equals, "namevalue")
+	c.Check(lg.Properties["new_attributes"].(map[string]interface{})["state"], check.Equals, "statevalue")
+}
+
 // Trashing/deleting a collection produces an "update" event with
 // properties["new_attributes"]["is_trashed"] == true.
 func (s *v0Suite) TestTrashedCollection(c *check.C) {

commit 77981f7cf6b13f63241fdf9c57de70b366a8870d
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu Dec 28 14:38:04 2023 -0500

    Copyedit scoped token creation example
    
    * Mark up user input
    * Use zzzzz cluster id
    
    No issue #.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/doc/admin/scoped-tokens.html.textile.liquid b/doc/admin/scoped-tokens.html.textile.liquid
index 0147aa168b..415f635dcd 100644
--- a/doc/admin/scoped-tokens.html.textile.liquid
+++ b/doc/admin/scoped-tokens.html.textile.liquid
@@ -42,14 +42,14 @@ h2. Creating a scoped token
 
 A scoped token can be created at the command line:
 
-<pre>
-$ arv api_client_authorization create --api-client-authorization '{"scopes": [["GET", "/arvados/v1/collections"], ["GET", "/arvados/v1/collections/"]]}'
+<notextile>
+<pre><code>$ <span class="userinput">arv api_client_authorization create --api-client-authorization '{"scopes": [["GET", "/arvados/v1/collections"], ["GET", "/arvados/v1/collections/"]]}'</span>
 {
- "href":"/api_client_authorizations/x1u39-gj3su-bizbsw0mx5pju3w",
+ "href":"/api_client_authorizations/zzzzz-gj3su-bizbsw0mx5pju3w",
  "kind":"arvados#apiClientAuthorization",
  "etag":"9yk144t0v6cvyp0342exoh2vq",
- "uuid":"x1u39-gj3su-bizbsw0mx5pju3w",
- "owner_uuid":"x1u39-tpzed-fr97h9t4m5jffxs",
+ "uuid":"zzzzz-gj3su-bizbsw0mx5pju3w",
+ "owner_uuid":"zzzzz-tpzed-fr97h9t4m5jffxs",
  "created_at":"2020-03-12T20:36:12.517375422Z",
  "modified_by_client_uuid":null,
  "modified_by_user_uuid":null,
@@ -73,6 +73,7 @@ $ arv api_client_authorization create --api-client-authorization '{"scopes": [["
   ]
  ]
 }
-</pre>
+</code></pre>
+</notextile>
 
 The response will include @api_token@ field which is the newly issued secret token.  It can be passed directly to the API server that issued it, or can be used to construct a @v2@ token.  A @v2@ format token is required if the token will be used to access other clusters in an Arvados federation.  An Arvados @v2@ format token consists of three fields separate by slashes: the prefix @v2@, followed by the token uuid, followed by the token secret.  For example: @v2/x1u39-gj3su-bizbsw0mx5pju3w/5a74htnoqwkhtfo2upekpfbsg04hv7cy5v4nowf7dtpxer086m at .

commit 55796d75eb2bf9dc1952eee5569960dc7bd334cd
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu Dec 28 14:29:30 2023 -0500

    Use full paths for API documentation links
    
    Most of the cross-reference links in our documentation point to a
    specific page. These links are unusual in that they point to a
    directory. Lately the linkchecker has been intermittently complaining
    about them:
    
      URL        `file:///tmp/workspace/developer-run-tests-doc-and-sdk-R/doc/.site/api'
      Name       `API documentation'
      Parent URL file:///tmp/workspace/developer-run-tests-doc-and-sdk-R/doc/.site/admin/scoped-tokens.html, line 306, col 576
      Real URL   file:///tmp/workspace/developer-run-tests-doc-and-sdk-R/doc/.site/api/
      Warning    [file-missing-slash] Added trailing slash to
                 directory.
      Result     Valid: directory
    
    This causes a test failure. Spell out the rest of the link to prevent
    that. No issue #.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/doc/admin/scoped-tokens.html.textile.liquid b/doc/admin/scoped-tokens.html.textile.liquid
index de09b42615..0147aa168b 100644
--- a/doc/admin/scoped-tokens.html.textile.liquid
+++ b/doc/admin/scoped-tokens.html.textile.liquid
@@ -18,9 +18,9 @@ Another example is situations where admin access is required but there is risk o
 
 h2. Defining scopes
 
-A "scope" consists of a HTTP method and API path.  A token can have multiple scopes.  Token scopes act as a whitelist, and the API server checks the HTTP method and the API path of every request against the scopes of the request token.  Scopes are also described on the "API Authorization":{{site.baseurl}}/api/tokens.html#scopes page of the "API documentation":{{site.baseurl}}/api .
+A "scope" consists of a HTTP method and API path.  A token can have multiple scopes.  Token scopes act as a whitelist, and the API server checks the HTTP method and the API path of every request against the scopes of the request token.  Scopes are also described on the "API Authorization":{{site.baseurl}}/api/tokens.html#scopes page of the "API documentation":{{site.baseurl}}/api/index.html.
 
-These examples use @/arvados/v1/collections@, but can be applied to any endpoint.  Consult the "API documentation":{{site.baseurl}}/api to determine the endpoints for specific methods.
+These examples use @/arvados/v1/collections@, but can be applied to any endpoint.  Consult the "API documentation":{{site.baseurl}}/api/index.html to determine the endpoints for specific methods.
 
 The scope @["GET", "/arvados/v1/collections"]@ will allow only GET or HEAD requests for the list of collections.  Any other HTTP method or path (including requests for a specific collection record, eg a request with path @/arvados/v1/collections/zzzzz-4zz18-0123456789abcde@) will return a permission error.
 

commit 75db74e6d1fa136fed63571df6add0a099ae377c
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Dec 19 13:17:13 2023 -0300

    Merge branch '21309-x-crypto-upgrade'. Closes #21309
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/go.mod b/go.mod
index 218e2ddde8..0011d7970f 100644
--- a/go.mod
+++ b/go.mod
@@ -37,7 +37,7 @@ require (
 	github.com/prometheus/client_model v0.3.0
 	github.com/prometheus/common v0.39.0
 	github.com/sirupsen/logrus v1.8.1
-	golang.org/x/crypto v0.16.0
+	golang.org/x/crypto v0.17.0
 	golang.org/x/net v0.19.0
 	golang.org/x/oauth2 v0.11.0
 	golang.org/x/sys v0.15.0
diff --git a/go.sum b/go.sum
index 31ddd88621..fb2fe5e3f0 100644
--- a/go.sum
+++ b/go.sum
@@ -287,8 +287,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY=
-golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
+golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
+golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=

commit 804d066f401bf8370edf13566fe4b219cd391f89
Author: Tom Clegg <tom at curii.com>
Date:   Mon Dec 18 16:19:53 2023 -0500

    Merge branch '21214-dav-virtual-projects'
    
    fixes #21214
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/api/keep-webdav.html.textile.liquid b/doc/api/keep-webdav.html.textile.liquid
index f068a49c2c..e95d523b9d 100644
--- a/doc/api/keep-webdav.html.textile.liquid
+++ b/doc/api/keep-webdav.html.textile.liquid
@@ -35,6 +35,12 @@ The @users@ folder will return a listing of the users for whom the client has pe
 
 In addition to the @/by_id/@ path prefix, the collection or project can be specified using a path prefix of @/c=<uuid or pdh>/@ or (if the cluster is properly configured) as a virtual host.  This is described on "Keep-web URLs":keep-web-urls.html
 
+It is possible for a project or a "filter group":methods/groups.html#filter to appear as its own descendant in the @by_id@ and @users@ tree (a filter group may match itself, its own ancestor, another filter group that matches its ancestor, etc). When this happens, the descendant appears as an empty read-only directory. For example, if filter group @f@ matches its own parent @p@:
+* @/users/example/p/f@ will show the filter group's contents (matched projects and collections).
+* @/users/example/p/f/p@ will appear as an empty directory.
+* @/by_id/uuid_of_f/p@ will show the parent project's contents, including @f at .
+* @/by_id/uuid_of_f/p/f@ will appear as an empty directory.
+
 h3(#auth). Authentication mechanisms
 
 A token can be provided in an Authorization header as a @Bearer@ token:
diff --git a/doc/api/methods/groups.html.textile.liquid b/doc/api/methods/groups.html.textile.liquid
index 02e8bfbb3f..ef8e177f1c 100644
--- a/doc/api/methods/groups.html.textile.liquid
+++ b/doc/api/methods/groups.html.textile.liquid
@@ -46,7 +46,7 @@ The @frozen_by_uuid@ attribute can be cleared by an admin user. It can also be c
 
 The optional @API.FreezeProjectRequiresDescription@ and @API.FreezeProjectRequiresProperties@ configuration settings can be used to prevent users from freezing projects that have empty @description@ and/or specified @properties@ entries.
 
-h3. Filter groups
+h3(#filter). Filter groups
 
 @filter@ groups are virtual groups; they can not own other objects. Filter groups have a special @properties@ field named @filters@, which must be an array of filter conditions. See "list method filters":{{site.baseurl}}/api/methods.html#filters for details on the syntax of valid filters, but keep in mind that the attributes must include the object type (@collections@, @container_requests@, @groups@, @workflows@), separated with a dot from the field to be filtered on.
 
diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go
index 274d207022..430a0d4c9b 100644
--- a/sdk/go/arvados/fs_base.go
+++ b/sdk/go/arvados/fs_base.go
@@ -13,6 +13,7 @@ import (
 	"net/http"
 	"os"
 	"path"
+	"path/filepath"
 	"strings"
 	"sync"
 	"time"
@@ -387,17 +388,28 @@ func (n *treenode) Size() int64 {
 }
 
 func (n *treenode) FileInfo() os.FileInfo {
-	n.Lock()
-	defer n.Unlock()
-	n.fileinfo.size = int64(len(n.inodes))
-	return n.fileinfo
+	n.RLock()
+	defer n.RUnlock()
+	fi := n.fileinfo
+	fi.size = int64(len(n.inodes))
+	return fi
 }
 
 func (n *treenode) Readdir() (fi []os.FileInfo, err error) {
+	// We need RLock to safely read n.inodes, but we must release
+	// it before calling FileInfo() on the child nodes. Otherwise,
+	// we risk deadlock when filter groups A and B match each
+	// other, concurrent Readdir() calls try to RLock them in
+	// opposite orders, and one cannot be RLocked a second time
+	// because a third caller is waiting for a write lock.
 	n.RLock()
-	defer n.RUnlock()
-	fi = make([]os.FileInfo, 0, len(n.inodes))
+	inodes := make([]inode, 0, len(n.inodes))
 	for _, inode := range n.inodes {
+		inodes = append(inodes, inode)
+	}
+	n.RUnlock()
+	fi = make([]os.FileInfo, 0, len(inodes))
+	for _, inode := range inodes {
 		fi = append(fi, inode.FileInfo())
 	}
 	return
@@ -468,7 +480,8 @@ func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*fileha
 		return nil, ErrSyncNotSupported
 	}
 	dirname, name := path.Split(name)
-	parent, err := rlookup(fs.root, dirname)
+	ancestors := map[inode]bool{}
+	parent, err := rlookup(fs.root, dirname, ancestors)
 	if err != nil {
 		return nil, err
 	}
@@ -533,6 +546,24 @@ func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*fileha
 			return nil, err
 		}
 	}
+	// If n and one of its parents/ancestors are [hardlinks to]
+	// the same node (e.g., a filter group that matches itself),
+	// open an "empty directory" node instead, so the inner
+	// hardlink appears empty. This is needed to ensure
+	// Open("a/b/c/x/x").Readdir() appears empty, matching the
+	// behavior of rlookup("a/b/c/x/x/z") => ErrNotExist.
+	if hl, ok := n.(*hardlink); (ok && ancestors[hl.inode]) || ancestors[n] {
+		n = &treenode{
+			fs:     n.FS(),
+			parent: parent,
+			inodes: nil,
+			fileinfo: fileinfo{
+				name:    name,
+				modTime: time.Now(),
+				mode:    0555 | os.ModeDir,
+			},
+		}
+	}
 	return &filehandle{
 		inode:    n,
 		append:   flag&os.O_APPEND != 0,
@@ -551,7 +582,7 @@ func (fs *fileSystem) Create(name string) (File, error) {
 
 func (fs *fileSystem) Mkdir(name string, perm os.FileMode) error {
 	dirname, name := path.Split(name)
-	n, err := rlookup(fs.root, dirname)
+	n, err := rlookup(fs.root, dirname, nil)
 	if err != nil {
 		return err
 	}
@@ -575,7 +606,7 @@ func (fs *fileSystem) Mkdir(name string, perm os.FileMode) error {
 }
 
 func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
-	node, err := rlookup(fs.root, name)
+	node, err := rlookup(fs.root, name, nil)
 	if err != nil {
 		return nil, err
 	}
@@ -704,7 +735,7 @@ func (fs *fileSystem) remove(name string, recursive bool) error {
 	if name == "" || name == "." || name == ".." {
 		return ErrInvalidArgument
 	}
-	dir, err := rlookup(fs.root, dirname)
+	dir, err := rlookup(fs.root, dirname, nil)
 	if err != nil {
 		return err
 	}
@@ -741,9 +772,31 @@ func (fs *fileSystem) MemorySize() int64 {
 // rlookup (recursive lookup) returns the inode for the file/directory
 // with the given name (which may contain "/" separators). If no such
 // file/directory exists, the returned node is nil.
-func rlookup(start inode, path string) (node inode, err error) {
+//
+// The visited map should be either nil or empty. If non-nil, all
+// nodes and hardlink targets visited by the given path will be added
+// to it.
+//
+// If a cycle is detected, the second occurrence of the offending node
+// will be replaced by an empty directory. For example, if "x" is a
+// filter group that matches itself, then rlookup("a/b/c/x") will
+// return the filter group, and rlookup("a/b/c/x/x") will return an
+// empty directory.
+func rlookup(start inode, path string, visited map[inode]bool) (node inode, err error) {
+	if visited == nil {
+		visited = map[inode]bool{}
+	}
 	node = start
+	// Clean up ./ and ../ and double-slashes, but (unlike
+	// filepath.Clean) retain a trailing slash, because looking up
+	// ".../regularfile/" should fail.
+	trailingSlash := strings.HasSuffix(path, "/")
+	path = filepath.Clean(path)
+	if trailingSlash && path != "/" {
+		path += "/"
+	}
 	for _, name := range strings.Split(path, "/") {
+		visited[node] = true
 		if node.IsDir() {
 			if name == "." || name == "" {
 				continue
@@ -761,6 +814,24 @@ func rlookup(start inode, path string) (node inode, err error) {
 		if node == nil || err != nil {
 			break
 		}
+		checknode := node
+		if hardlinked, ok := checknode.(*hardlink); ok {
+			checknode = hardlinked.inode
+		}
+		if visited[checknode] {
+			node = &treenode{
+				fs:     node.FS(),
+				parent: node.Parent(),
+				inodes: nil,
+				fileinfo: fileinfo{
+					name:    name,
+					modTime: time.Now(),
+					mode:    0555 | os.ModeDir,
+				},
+			}
+		} else {
+			visited[checknode] = true
+		}
 	}
 	if node == nil && err == nil {
 		err = os.ErrNotExist
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 84ff69d6bd..052cc1aa37 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -457,7 +457,7 @@ func (fs *collectionFileSystem) Sync() error {
 }
 
 func (fs *collectionFileSystem) Flush(path string, shortBlocks bool) error {
-	node, err := rlookup(fs.fileSystem.root, path)
+	node, err := rlookup(fs.fileSystem.root, path, nil)
 	if err != nil {
 		return err
 	}
diff --git a/sdk/go/arvados/fs_lookup.go b/sdk/go/arvados/fs_lookup.go
index 2bb09995e1..7f22449318 100644
--- a/sdk/go/arvados/fs_lookup.go
+++ b/sdk/go/arvados/fs_lookup.go
@@ -48,7 +48,19 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
 			return nil, err
 		}
 		for _, child := range all {
-			_, err = ln.treenode.Child(child.FileInfo().Name(), func(inode) (inode, error) {
+			var name string
+			if hl, ok := child.(*hardlink); ok && hl.inode == ln {
+				// If child is a hardlink to its
+				// parent, FileInfo()->RLock() will
+				// deadlock, because we already have
+				// the write lock. In this situation
+				// we can safely access the hardlink's
+				// name directly.
+				name = hl.name
+			} else {
+				name = child.FileInfo().Name()
+			}
+			_, err = ln.treenode.Child(name, func(inode) (inode, error) {
 				return child, nil
 			})
 			if err != nil {
diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go
index a68e83945e..df1d06e753 100644
--- a/sdk/go/arvados/fs_project.go
+++ b/sdk/go/arvados/fs_project.go
@@ -35,10 +35,11 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
 		contents = CollectionList{}
 		err = fs.RequestAndDecode(&contents, "GET", "arvados/v1/groups/"+uuid+"/contents", nil, ResourceListParams{
 			Count: "none",
+			Order: "uuid",
 			Filters: []Filter{
 				{"name", "=", strings.Replace(name, subst, "/", -1)},
 				{"uuid", "is_a", []string{"arvados#collection", "arvados#group"}},
-				{"groups.group_class", "=", "project"},
+				{"groups.group_class", "in", []string{"project", "filter"}},
 			},
 			Select: []string{"uuid", "name", "modified_at", "properties"},
 		})
@@ -104,7 +105,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
 			{"uuid", "is_a", class},
 		}
 		if class == "arvados#group" {
-			filters = append(filters, Filter{"group_class", "=", "project"})
+			filters = append(filters, Filter{"groups.group_class", "in", []string{"project", "filter"}})
 		}
 
 		params := ResourceListParams{
diff --git a/sdk/go/arvados/fs_project_test.go b/sdk/go/arvados/fs_project_test.go
index d3dac7a14f..5c2eb33d12 100644
--- a/sdk/go/arvados/fs_project_test.go
+++ b/sdk/go/arvados/fs_project_test.go
@@ -42,61 +42,94 @@ func (sc *spyingClient) RequestAndDecode(dst interface{}, method, path string, b
 func (s *SiteFSSuite) TestFilterGroup(c *check.C) {
 	// Make sure that a collection and group that match the filter are present,
 	// and that a group that does not match the filter is not present.
-	s.fs.MountProject("fg", fixtureThisFilterGroupUUID)
 
-	_, err := s.fs.OpenFile("/fg/baz_file", 0, 0)
-	c.Assert(err, check.IsNil)
+	checkOpen := func(path string, exists bool) {
+		f, err := s.fs.Open(path)
+		if exists {
+			if c.Check(err, check.IsNil) {
+				c.Check(f.Close(), check.IsNil)
+			}
+		} else {
+			c.Check(err, check.Equals, os.ErrNotExist)
+		}
+	}
 
-	_, err = s.fs.OpenFile("/fg/A Subproject", 0, 0)
-	c.Assert(err, check.IsNil)
+	checkDirContains := func(parent, child string, exists bool) {
+		f, err := s.fs.Open(parent)
+		if !c.Check(err, check.IsNil) {
+			return
+		}
+		ents, err := f.Readdir(-1)
+		if !c.Check(err, check.IsNil) {
+			return
+		}
+		for _, ent := range ents {
+			if !exists {
+				c.Check(ent.Name(), check.Not(check.Equals), child)
+				if child == "" {
+					// no children are expected
+					c.Errorf("child %q found in parent %q", child, parent)
+				}
+			} else if ent.Name() == child {
+				return
+			}
+		}
+		if exists {
+			c.Errorf("child %q not found in parent %q", child, parent)
+		}
+	}
 
-	_, err = s.fs.OpenFile("/fg/A Project", 0, 0)
-	c.Assert(err, check.Not(check.IsNil))
+	checkOpen("/users/active/This filter group/baz_file", true)
+	checkOpen("/users/active/This filter group/A Subproject", true)
+	checkOpen("/users/active/This filter group/A Project", false)
+	s.fs.MountProject("fg", fixtureThisFilterGroupUUID)
+	checkOpen("/fg/baz_file", true)
+	checkOpen("/fg/A Subproject", true)
+	checkOpen("/fg/A Project", false)
+	s.fs.MountProject("home", "")
+	checkOpen("/home/A filter group with an is_a collection filter/baz_file", true)
+	checkOpen("/home/A filter group with an is_a collection filter/baz_file/baz", true)
+	checkOpen("/home/A filter group with an is_a collection filter/A Subproject", false)
+	checkOpen("/home/A filter group with an is_a collection filter/A Project", false)
 
 	// An empty filter means everything that is visible should be returned.
+	checkOpen("/users/active/A filter group without filters/baz_file", true)
+	checkOpen("/users/active/A filter group without filters/A Subproject", true)
+	checkOpen("/users/active/A filter group without filters/A Project", true)
 	s.fs.MountProject("fg2", fixtureAFilterGroupTwoUUID)
+	checkOpen("/fg2/baz_file", true)
+	checkOpen("/fg2/A Subproject", true)
+	checkOpen("/fg2/A Project", true)
 
-	_, err = s.fs.OpenFile("/fg2/baz_file", 0, 0)
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.OpenFile("/fg2/A Subproject", 0, 0)
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.OpenFile("/fg2/A Project", 0, 0)
-	c.Assert(err, check.IsNil)
+	// If a filter group matches itself or one of its ancestors,
+	// the matched item appears as an empty directory.
+	checkDirContains("/users/active/A filter group without filters", "A filter group without filters", true)
+	checkOpen("/users/active/A filter group without filters/A filter group without filters", true)
+	checkOpen("/users/active/A filter group without filters/A filter group without filters/baz_file", false)
+	checkDirContains("/users/active/A filter group without filters/A filter group without filters", "", false)
 
 	// An 'is_a' 'arvados#collection' filter means only collections should be returned.
+	checkOpen("/users/active/A filter group with an is_a collection filter/baz_file", true)
+	checkOpen("/users/active/A filter group with an is_a collection filter/baz_file/baz", true)
+	checkOpen("/users/active/A filter group with an is_a collection filter/A Subproject", false)
+	checkOpen("/users/active/A filter group with an is_a collection filter/A Project", false)
 	s.fs.MountProject("fg3", fixtureAFilterGroupThreeUUID)
-
-	_, err = s.fs.OpenFile("/fg3/baz_file", 0, 0)
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.OpenFile("/fg3/A Subproject", 0, 0)
-	c.Assert(err, check.Not(check.IsNil))
+	checkOpen("/fg3/baz_file", true)
+	checkOpen("/fg3/baz_file/baz", true)
+	checkOpen("/fg3/A Subproject", false)
 
 	// An 'exists' 'arvados#collection' filter means only collections with certain properties should be returned.
 	s.fs.MountProject("fg4", fixtureAFilterGroupFourUUID)
-
-	_, err = s.fs.Stat("/fg4/collection with list property with odd values")
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.Stat("/fg4/collection with list property with even values")
-	c.Assert(err, check.IsNil)
+	checkOpen("/fg4/collection with list property with odd values", true)
+	checkOpen("/fg4/collection with list property with even values", true)
+	checkOpen("/fg4/baz_file", false)
 
 	// A 'contains' 'arvados#collection' filter means only collections with certain properties should be returned.
 	s.fs.MountProject("fg5", fixtureAFilterGroupFiveUUID)
-
-	_, err = s.fs.Stat("/fg5/collection with list property with odd values")
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.Stat("/fg5/collection with list property with string value")
-	c.Assert(err, check.IsNil)
-
-	_, err = s.fs.Stat("/fg5/collection with prop2 5")
-	c.Assert(err, check.Not(check.IsNil))
-
-	_, err = s.fs.Stat("/fg5/collection with list property with even values")
-	c.Assert(err, check.Not(check.IsNil))
+	checkOpen("/fg5/collection with list property with odd values", true)
+	checkOpen("/fg5/collection with list property with string value", true)
+	checkOpen("/fg5/collection with prop2 5", false)
+	checkOpen("/fg5/collection with list property with even values", false)
 }
 
 func (s *SiteFSSuite) TestCurrentUserHome(c *check.C) {
diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go
index a4a18837e0..d4f0241682 100644
--- a/sdk/go/arvados/fs_site.go
+++ b/sdk/go/arvados/fs_site.go
@@ -123,6 +123,10 @@ func (fs *customFileSystem) ForwardSlashNameSubstitution(repl string) {
 	fs.forwardSlashNameSubstitution = repl
 }
 
+func (fs *customFileSystem) MemorySize() int64 {
+	return fs.fileSystem.MemorySize() + fs.byIDRoot.MemorySize()
+}
+
 // SiteFileSystem returns a FileSystem that maps collections and other
 // Arvados objects onto a filesystem layout.
 //
@@ -386,3 +390,7 @@ func (hl *hardlink) FileInfo() os.FileInfo {
 	}
 	return fi
 }
+
+func (hl *hardlink) MemorySize() int64 {
+	return 64 + int64(len(hl.name))
+}
diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go
index c7d6b2a464..2c86536b2f 100644
--- a/sdk/go/arvados/fs_site_test.go
+++ b/sdk/go/arvados/fs_site_test.go
@@ -185,6 +185,16 @@ func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) {
 		names = append(names, fi.Name())
 	}
 	c.Check(names, check.DeepEquals, []string{"baz"})
+	f, err = s.fs.Open("/by_id/" + fixtureAProjectUUID + "/A Subproject/baz_file/baz")
+	c.Assert(err, check.IsNil)
+	err = f.Close()
+	c.Assert(err, check.IsNil)
+	_, err = s.fs.Open("/by_id/" + fixtureAProjectUUID + "/A Subproject/baz_file/baz/")
+	c.Assert(err, check.Equals, ErrNotADirectory)
+	_, err = s.fs.Open("/by_id/" + fixtureAProjectUUID + "/A Subproject/baz_file/baz/z")
+	c.Assert(err, check.Equals, ErrNotADirectory)
+	_, err = s.fs.Open("/by_id/" + fixtureAProjectUUID + "/A Subproject/baz_file/baz/..")
+	c.Assert(err, check.Equals, ErrNotADirectory)
 
 	_, err = s.fs.OpenFile("/by_id/"+fixtureNonexistentCollection, os.O_RDWR|os.O_CREATE, 0755)
 	c.Check(err, ErrorIs, ErrInvalidOperation)
diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index ac12f7ae13..3b8a618fea 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -37,8 +37,9 @@ const (
 	StorageClassesDesiredArchiveConfirmedDefault = "zzzzz-4zz18-3t236wr12769qqa"
 	EmptyCollectionUUID                          = "zzzzz-4zz18-gs9ooj1h9sd5mde"
 
-	AProjectUUID    = "zzzzz-j7d0g-v955i6s2oi1cbso"
-	ASubprojectUUID = "zzzzz-j7d0g-axqo7eu9pwvna1x"
+	AProjectUUID     = "zzzzz-j7d0g-v955i6s2oi1cbso"
+	ASubprojectUUID  = "zzzzz-j7d0g-axqo7eu9pwvna1x"
+	AFilterGroupUUID = "zzzzz-j7d0g-thisfiltergroup"
 
 	FooAndBarFilesInDirUUID = "zzzzz-4zz18-foonbarfilesdir"
 	FooAndBarFilesInDirPDH  = "870369fc72738603c2fad16664e50e2d+58"
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index c14789889d..5a12e26e9d 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -1105,6 +1105,17 @@ func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C)
 }
 
 func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
+	// The "ownership cycle" test fixtures are reachable from the
+	// "filter group without filters" group, causing webdav's
+	// walkfs to recurse indefinitely. Avoid that by deleting one
+	// of the bogus fixtures.
+	arv := arvados.NewClientFromEnv()
+	err := arv.RequestAndDecode(nil, "DELETE", "arvados/v1/groups/zzzzz-j7d0g-cx2al9cqkmsf1hs", nil, nil)
+	if err != nil {
+		c.Assert(err, check.FitsTypeOf, &arvados.TransactionError{})
+		c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404)
+	}
+
 	s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
 	authHeader := http.Header{
 		"Authorization": {"OAuth2 " + arvadostest.ActiveToken},
@@ -1241,8 +1252,32 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
 			expect:  []string{"waz"},
 			cutDirs: 2,
 		},
+		{
+			uri:     "download.example.com/users/active/This filter group/",
+			header:  authHeader,
+			expect:  []string{"A Subproject/"},
+			cutDirs: 3,
+		},
+		{
+			uri:     "download.example.com/users/active/This filter group/A Subproject",
+			header:  authHeader,
+			expect:  []string{"baz_file/"},
+			cutDirs: 4,
+		},
+		{
+			uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID,
+			header:  authHeader,
+			expect:  []string{"A Subproject/"},
+			cutDirs: 2,
+		},
+		{
+			uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID + "/A Subproject",
+			header:  authHeader,
+			expect:  []string{"baz_file/"},
+			cutDirs: 3,
+		},
 	} {
-		comment := check.Commentf("HTML: %q => %q", trial.uri, trial.expect)
+		comment := check.Commentf("HTML: %q redir %q => %q", trial.uri, trial.redirect, trial.expect)
 		resp := httptest.NewRecorder()
 		u := mustParseURL("//" + trial.uri)
 		req := &http.Request{
@@ -1278,6 +1313,7 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
 		} else {
 			c.Check(resp.Code, check.Equals, http.StatusOK, comment)
 			for _, e := range trial.expect {
+				e = strings.Replace(e, " ", "%20", -1)
 				c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`, comment)
 			}
 			c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`, comment)
@@ -1310,6 +1346,12 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
 		}
 		resp = httptest.NewRecorder()
 		s.handler.ServeHTTP(resp, req)
+		// This check avoids logging a big XML document in the
+		// event webdav throws a 500 error after sending
+		// headers for a 207.
+		if !c.Check(strings.HasSuffix(resp.Body.String(), "Internal Server Error"), check.Equals, false) {
+			continue
+		}
 		if trial.expect == nil {
 			c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
 		} else {
@@ -1320,6 +1362,7 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
 				} else {
 					e = filepath.Join(u.Path, e)
 				}
+				e = strings.Replace(e, " ", "%20", -1)
 				c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+e+`</D:href>.*`, comment)
 			}
 		}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index b3d0b9b418..dd29c40082 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -476,7 +476,7 @@ func (s *IntegrationSuite) TestMetrics(c *check.C) {
 	c.Check(summaries["request_duration_seconds/get/200"].SampleCount, check.Equals, "3")
 	c.Check(summaries["request_duration_seconds/get/404"].SampleCount, check.Equals, "1")
 	c.Check(summaries["time_to_status_seconds/get/404"].SampleCount, check.Equals, "1")
-	c.Check(gauges["arvados_keepweb_sessions_cached_session_bytes//"].Value, check.Equals, float64(469))
+	c.Check(gauges["arvados_keepweb_sessions_cached_session_bytes//"].Value, check.Equals, float64(624))
 
 	// If the Host header indicates a collection, /metrics.json
 	// refers to a file in the collection -- the metrics handler

commit 69a31d4eb53f520a4accab6ba1e674086adb2efa
Author: Brett Smith <brett.smith at curii.com>
Date:   Mon Dec 18 13:48:50 2023 -0500

    Merge branch '21283-callable-api-module'
    
    Closes #21283.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index e90f381298..83f658201c 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -6,8 +6,8 @@
 This module provides the entire Python SDK for Arvados. The most useful modules
 include:
 
-* arvados.api - After you `import arvados`, you can call `arvados.api.api` as
-  `arvados.api` to construct a client object.
+* arvados.api - After you `import arvados`, you can call `arvados.api` as a
+  shortcut to the client constructor function `arvados.api.api`.
 
 * arvados.collection - The `arvados.collection.Collection` class provides a
   high-level interface to read and write collections. It coordinates sending
@@ -26,15 +26,24 @@ import types
 
 from collections import UserDict
 
-from .api import api, api_from_config, http_cache
+from . import api, errors, util
+from .api import api_from_config, http_cache
 from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
 from arvados.keep import *
 from arvados.stream import *
 from .arvfile import StreamFileReader
 from .logging import log_format, log_date_format, log_handler
 from .retry import RetryLoop
-import arvados.errors as errors
-import arvados.util as util
+
+# Previous versions of the PySDK used to say `from .api import api`.  This
+# made it convenient to call the API client constructor, but difficult to
+# access the rest of the `arvados.api` module. The magic below fixes that
+# bug while retaining backwards compatibility: `arvados.api` is now the
+# module and you can import it normally, but we make that module callable so
+# all the existing code that says `arvados.api('v1', ...)` still works.
+class _CallableAPIModule(api.__class__):
+    __call__ = staticmethod(api.api)
+api.__class__ = _CallableAPIModule
 
 # Override logging module pulled in via `from ... import *`
 # so users can `import arvados.logging`.

commit bf70e3d7cd39fe1d85096d19122e7693159989db
Author: Brett Smith <brett.smith at curii.com>
Date:   Tue Dec 12 08:54:51 2023 -0500

    Merge branch '21277-github-pr-221'
    
    Closes #21277.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingFileRequestBody.java b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingFileRequestBody.java
index 43fcdba5c6..d6eb033ff5 100644
--- a/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingFileRequestBody.java
+++ b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingFileRequestBody.java
@@ -7,12 +7,9 @@
 
 package org.arvados.client.api.client;
 
-import okhttp3.MediaType;
-import okhttp3.RequestBody;
 import okio.BufferedSink;
 import okio.Okio;
 import okio.Source;
-import org.slf4j.Logger;
 
 import java.io.File;
 
@@ -20,32 +17,20 @@ import java.io.File;
  * Based on:
  * {@link} https://gist.github.com/eduardb/dd2dc530afd37108e1ac
  */
-public class CountingFileRequestBody extends RequestBody {
-
-    private static final int SEGMENT_SIZE = 2048; // okio.Segment.SIZE
-    private static final MediaType CONTENT_BINARY = MediaType.parse(com.google.common.net.MediaType.OCTET_STREAM.toString());
-
-    private final File file;
-    private final ProgressListener listener;
+public class CountingFileRequestBody extends CountingRequestBody<File> {
 
     CountingFileRequestBody(final File file, final ProgressListener listener) {
-        this.file = file;
-        this.listener = listener;
+        super(file, listener);
     }
 
     @Override
     public long contentLength() {
-        return file.length();
-    }
-
-    @Override
-    public MediaType contentType() {
-        return CONTENT_BINARY;
+        return requestBodyData.length();
     }
 
     @Override
     public void writeTo(BufferedSink sink) {
-        try (Source source = Okio.source(file)) {
+        try (Source source = Okio.source(requestBodyData)) {
             long total = 0;
             long read;
 
@@ -61,24 +46,4 @@ public class CountingFileRequestBody extends RequestBody {
             //ignore
         }
     }
-
-    static class TransferData {
-
-        private final Logger log = org.slf4j.LoggerFactory.getLogger(TransferData.class);
-        private int progressValue;
-        private long totalSize;
-
-        TransferData(long totalSize) {
-            this.progressValue = 0;
-            this.totalSize = totalSize;
-        }
-
-        void updateTransferProgress(long transferred) {
-            float progress = (transferred / (float) totalSize) * 100;
-            if (progressValue != (int) progress) {
-                progressValue = (int) progress;
-                log.debug("{} / {} / {}%", transferred, totalSize, progressValue);
-            }
-        }
-    }
 }
\ No newline at end of file
diff --git a/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingRequestBody.java b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingRequestBody.java
new file mode 100644
index 0000000000..397a1e2306
--- /dev/null
+++ b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingRequestBody.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) The Arvados Authors. All rights reserved.
+ *
+ * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
+ *
+ */
+
+package org.arvados.client.api.client;
+
+import okhttp3.MediaType;
+import okhttp3.RequestBody;
+import org.slf4j.Logger;
+
+abstract class CountingRequestBody<T> extends RequestBody {
+
+    protected static final int SEGMENT_SIZE = 2048; // okio.Segment.SIZE
+    protected static final MediaType CONTENT_BINARY = MediaType.parse(com.google.common.net.MediaType.OCTET_STREAM.toString());
+
+    protected final ProgressListener listener;
+
+    protected final T requestBodyData;
+
+    CountingRequestBody(T file, final ProgressListener listener) {
+        this.requestBodyData = file;
+        this.listener = listener;
+    }
+
+    @Override
+    public MediaType contentType() {
+        return CONTENT_BINARY;
+    }
+
+    static class TransferData {
+
+        private final Logger log = org.slf4j.LoggerFactory.getLogger(TransferData.class);
+        private int progressValue;
+        private long totalSize;
+
+        TransferData(long totalSize) {
+            this.progressValue = 0;
+            this.totalSize = totalSize;
+        }
+
+        void updateTransferProgress(long transferred) {
+            float progress = (transferred / (float) totalSize) * 100;
+            if (progressValue != (int) progress) {
+                progressValue = (int) progress;
+                log.debug("{} / {} / {}%", transferred, totalSize, progressValue);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingStreamRequestBody.java b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingStreamRequestBody.java
new file mode 100644
index 0000000000..7c39371697
--- /dev/null
+++ b/sdk/java-v2/src/main/java/org/arvados/client/api/client/CountingStreamRequestBody.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) The Arvados Authors. All rights reserved.
+ *
+ * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
+ *
+ */
+
+package org.arvados.client.api.client;
+
+import okio.BufferedSink;
+import okio.Okio;
+import okio.Source;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class CountingStreamRequestBody extends CountingRequestBody<InputStream> {
+
+    CountingStreamRequestBody(final InputStream inputStream, final ProgressListener listener) {
+        super(inputStream, listener);
+    }
+
+    @Override
+    public long contentLength() throws IOException {
+        return requestBodyData.available();
+    }
+
+    @Override
+    public void writeTo(BufferedSink sink) {
+        try (Source source = Okio.source(requestBodyData)) {
+            long total = 0;
+            long read;
+
+            while ((read = source.read(sink.buffer(), SEGMENT_SIZE)) != -1) {
+                total += read;
+                sink.flush();
+                listener.updateProgress(total);
+
+            }
+        } catch (RuntimeException rethrown) {
+            throw rethrown;
+        } catch (Exception ignored) {
+            //ignore
+        }
+    }
+}
\ No newline at end of file
diff --git a/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepServerApiClient.java b/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepServerApiClient.java
index a9306ca2ec..c1525e07a7 100644
--- a/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepServerApiClient.java
+++ b/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepServerApiClient.java
@@ -9,7 +9,7 @@ package org.arvados.client.api.client;
 
 import okhttp3.Request;
 import okhttp3.RequestBody;
-import org.arvados.client.api.client.CountingFileRequestBody.TransferData;
+import org.arvados.client.api.client.CountingRequestBody.TransferData;
 import org.arvados.client.common.Headers;
 import org.arvados.client.config.ConfigProvider;
 import org.slf4j.Logger;
diff --git a/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepWebApiClient.java b/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepWebApiClient.java
index 05d39e9e60..2c3168649f 100644
--- a/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepWebApiClient.java
+++ b/sdk/java-v2/src/main/java/org/arvados/client/api/client/KeepWebApiClient.java
@@ -13,6 +13,7 @@ import okhttp3.RequestBody;
 import org.arvados.client.config.ConfigProvider;
 
 import java.io.File;
+import java.io.InputStream;
 
 public class KeepWebApiClient extends BaseApiClient {
 
@@ -48,6 +49,16 @@ public class KeepWebApiClient extends BaseApiClient {
         return newCall(request);
     }
 
+    public String upload(String collectionUuid, InputStream inputStream, String fileName, ProgressListener progressListener) {
+        RequestBody requestBody = new CountingStreamRequestBody(inputStream, progressListener);
+
+        Request request = getRequestBuilder()
+                .url(getUrlBuilder(collectionUuid, fileName).build())
+                .put(requestBody)
+                .build();
+        return newCall(request);
+    }
+
     private HttpUrl.Builder getUrlBuilder(String collectionUuid, String filePathName) {
         return new HttpUrl.Builder()
                 .scheme(config.getApiProtocol())

commit 95f0646552707440777d76f5e0f6d73d3bea923a
Author: Brett Smith <brett.smith at curii.com>
Date:   Fri Dec 1 16:36:43 2023 -0500

    Merge branch '21219-java-sdk-pr-220'
    
    Closes #21219.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/java-v2/src/main/java/org/arvados/client/config/ExternalConfigProvider.java b/sdk/java-v2/src/main/java/org/arvados/client/config/ExternalConfigProvider.java
index d592b23ac3..e3d706ed0c 100644
--- a/sdk/java-v2/src/main/java/org/arvados/client/config/ExternalConfigProvider.java
+++ b/sdk/java-v2/src/main/java/org/arvados/client/config/ExternalConfigProvider.java
@@ -11,6 +11,10 @@ import java.io.File;
 
 public class ExternalConfigProvider implements ConfigProvider {
 
+    private static final int DEFAULT_CONNECTION_TIMEOUT = 60000;
+    private static final int DEFAULT_READ_TIMEOUT = 60000;
+    private static final int DEFAULT_WRITE_TIMEOUT = 60000;
+
     private boolean apiHostInsecure;
     private String keepWebHost;
     private int keepWebPort;
@@ -41,9 +45,9 @@ public class ExternalConfigProvider implements ConfigProvider {
         this.fileSplitDirectory = fileSplitDirectory;
         this.numberOfCopies = numberOfCopies;
         this.numberOfRetries = numberOfRetries;
-	this.connectTimeout = 60000;
-	this.readTimeout = 60000;
-	this.writeTimeout = 60000;
+	this.connectTimeout = DEFAULT_CONNECTION_TIMEOUT;
+	this.readTimeout = DEFAULT_READ_TIMEOUT;
+	this.writeTimeout = DEFAULT_WRITE_TIMEOUT;
     }
 
     ExternalConfigProvider(boolean apiHostInsecure, String keepWebHost, int keepWebPort, String apiHost, int apiPort,
@@ -156,6 +160,9 @@ public class ExternalConfigProvider implements ConfigProvider {
         private File fileSplitDirectory;
         private int numberOfCopies;
         private int numberOfRetries;
+        private int connectTimeout = DEFAULT_CONNECTION_TIMEOUT;
+        private int readTimeout = DEFAULT_READ_TIMEOUT;
+        private int writeTimeout = DEFAULT_WRITE_TIMEOUT;
 
         ExternalConfigProviderBuilder() {
         }
@@ -215,8 +222,23 @@ public class ExternalConfigProvider implements ConfigProvider {
             return this;
         }
 
+        public ExternalConfigProvider.ExternalConfigProviderBuilder connectTimeout(int connectTimeout) {
+            this.connectTimeout = connectTimeout;
+            return this;
+        }
+
+        public ExternalConfigProvider.ExternalConfigProviderBuilder readTimeout(int readTimeout) {
+            this.readTimeout = readTimeout;
+            return this;
+        }
+
+        public ExternalConfigProvider.ExternalConfigProviderBuilder writeTimeout(int writeTimeout) {
+            this.writeTimeout = writeTimeout;
+            return this;
+        }
+
         public ExternalConfigProvider build() {
-            return new ExternalConfigProvider(apiHostInsecure, keepWebHost, keepWebPort, apiHost, apiPort, apiToken, apiProtocol, fileSplitSize, fileSplitDirectory, numberOfCopies, numberOfRetries);
+            return new ExternalConfigProvider(apiHostInsecure, keepWebHost, keepWebPort, apiHost, apiPort, apiToken, apiProtocol, fileSplitSize, fileSplitDirectory, numberOfCopies, numberOfRetries, connectTimeout, readTimeout, writeTimeout);
         }
 
     }
diff --git a/sdk/java-v2/src/test/java/org/arvados/client/facade/ArvadosFacadeIntegrationTest.java b/sdk/java-v2/src/test/java/org/arvados/client/facade/ArvadosFacadeIntegrationTest.java
index 07269f7e7d..05ba8d1b09 100644
--- a/sdk/java-v2/src/test/java/org/arvados/client/facade/ArvadosFacadeIntegrationTest.java
+++ b/sdk/java-v2/src/test/java/org/arvados/client/facade/ArvadosFacadeIntegrationTest.java
@@ -223,6 +223,9 @@ public class ArvadosFacadeIntegrationTest extends ArvadosClientIntegrationTest {
                 .fileSplitDirectory(CONFIG.getFileSplitDirectory())
                 .numberOfCopies(CONFIG.getNumberOfCopies())
                 .numberOfRetries(CONFIG.getNumberOfRetries())
+                .connectTimeout(CONFIG.getConnectTimeout())
+                .readTimeout(CONFIG.getReadTimeout())
+                .writeTimeout(CONFIG.getWriteTimeout())
                 .build();
     }
 

commit 615111ebd99bab79ecafdf08f392553bcf9fd849
Author: Tom Clegg <tom at curii.com>
Date:   Fri Nov 10 17:18:13 2023 -0500

    20846: Avoid deadlock in unmount-and-replace race.
    
    refs #20846
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/fuse/arvados_fuse/unmount.py b/services/fuse/arvados_fuse/unmount.py
index 12d047a8f3..144c582ddc 100644
--- a/services/fuse/arvados_fuse/unmount.py
+++ b/services/fuse/arvados_fuse/unmount.py
@@ -154,6 +154,16 @@ def unmount(path, subtype=None, timeout=10, recursive=False):
             path = os.path.realpath(path)
             continue
         elif not mounted:
+            if was_mounted:
+                # This appears to avoid a race condition where we
+                # return control to the caller after running
+                # "fusermount -u -z" (see below), the caller (e.g.,
+                # arv-mount --replace) immediately tries to attach a
+                # new fuse mount at the same mount point, the
+                # lazy-unmount process unmounts that _new_ mount while
+                # it is being initialized, and the setup code waits
+                # forever for the new mount to be initialized.
+                time.sleep(1)
             return was_mounted
 
         if attempted:

commit 3bd0d1f3d3274630991682aaa417987f836aaa64
Author: Tom Clegg <tom at curii.com>
Date:   Wed Jan 3 13:58:17 2024 -0500

    21301: Tag 2.7.2+ SDK and CLI gems as incompatible with Ruby 3.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec
index 1ff841acdd..89c2e46ea9 100644
--- a/sdk/cli/arvados-cli.gemspec
+++ b/sdk/cli/arvados-cli.gemspec
@@ -38,7 +38,7 @@ Gem::Specification.new do |s|
   s.files       = ["bin/arv", "bin/arv-tag", "LICENSE-2.0.txt"]
   s.executables << "arv"
   s.executables << "arv-tag"
-  s.required_ruby_version = '>= 2.1.0'
+  s.required_ruby_version = '>= 2.1.0', '< 3'
   s.add_runtime_dependency 'arvados', '>= 1.4.1.20190320201707'
   # Our google-api-client dependency used to be < 0.9, but that could be
   # satisfied by the buggy 0.9.pre*, cf. https://dev.arvados.org/issues/9213
diff --git a/sdk/ruby/arvados.gemspec b/sdk/ruby/arvados.gemspec
index b196a1c33e..a3ae32d87d 100644
--- a/sdk/ruby/arvados.gemspec
+++ b/sdk/ruby/arvados.gemspec
@@ -37,7 +37,7 @@ Gem::Specification.new do |s|
   s.files       = ["lib/arvados.rb", "lib/arvados/google_api_client.rb",
                    "lib/arvados/collection.rb", "lib/arvados/keep.rb",
                    "README", "LICENSE-2.0.txt"]
-  s.required_ruby_version = '>= 1.8.7'
+  s.required_ruby_version = '>= 1.8.7', '< 3'
   s.add_dependency('activesupport', '>= 3')
   s.add_dependency('andand', '~> 1.3', '>= 1.3.3')
   # Our google-api-client dependency used to be < 0.9, but that could be

-----------------------------------------------------------------------


hooks/post-receive
--