[arvados] updated: 2.6.1-6-g34cf94d17

git repository hosting git at public.arvados.org
Mon May 1 16:25:25 UTC 2023


Summary of changes:
 sdk/cwl/arvados_cwl/arvworkflow.py                 | 29 +++++------------
 sdk/cwl/arvados_cwl/util.py                        | 15 +++++++++
 sdk/cwl/tests/test_util.py                         | 18 +++++++++-
 sdk/python/arvados/keep.py                         | 38 +++++++++++++++-------
 sdk/python/setup.py                                |  3 +-
 .../arvados/v1/container_requests_controller.rb    | 14 ++++++--
 .../arvados/v1/containers_controller.rb            | 15 +++++++--
 7 files changed, 92 insertions(+), 40 deletions(-)

       via  34cf94d17eac018904bc139afeac32293cfa6a09 (commit)
       via  9a03f0580c0b3fce6650e89ddba751af367f29ad (commit)
       via  50675131bdb2d46bd11797be7f64bf8a8c3d0150 (commit)
       via  b202135c52a840eb977ac64aacfb8e43dc292365 (commit)
      from  9dd94738ff21ccc2a8c640a6d970cd5e93252dbc (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 34cf94d17eac018904bc139afeac32293cfa6a09
Author: Tom Clegg <tom at curii.com>
Date:   Mon May 1 12:07:45 2023 -0400

    Merge branch '20447-less-table-locking'
    
    fixes #20447
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/api/app/controllers/arvados/v1/container_requests_controller.rb b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
index 75f70b1a2..586567cb2 100644
--- a/services/api/app/controllers/arvados/v1/container_requests_controller.rb
+++ b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
@@ -38,10 +38,18 @@ class Arvados::V1::ContainerRequestsController < ApplicationController
   end
 
   def update
-    # Lock containers table to avoid deadlock in cascading priority update (see #20240)
-    Container.transaction do
-      ActiveRecord::Base.connection.execute "LOCK TABLE containers IN EXCLUSIVE MODE"
+    if (resource_attrs.keys - [:owner_uuid, :name, :description, :properties]).empty?
+      # If no attributes are being updated besides these, there are no
+      # cascading changes to other rows/tables, so we should just use
+      # row locking.
+      @object.reload(lock: true)
       super
+    else
+      # Lock containers table to avoid deadlock in cascading priority update (see #20240)
+      Container.transaction do
+        ActiveRecord::Base.connection.execute "LOCK TABLE containers IN EXCLUSIVE MODE"
+        super
+      end
     end
   end
 end
diff --git a/services/api/app/controllers/arvados/v1/containers_controller.rb b/services/api/app/controllers/arvados/v1/containers_controller.rb
index b06d65a36..83f99bf92 100644
--- a/services/api/app/controllers/arvados/v1/containers_controller.rb
+++ b/services/api/app/controllers/arvados/v1/containers_controller.rb
@@ -29,10 +29,19 @@ class Arvados::V1::ContainersController < ApplicationController
   end
 
   def update
-    # Lock containers table to avoid deadlock in cascading priority update (see #20240)
-    Container.transaction do
-      ActiveRecord::Base.connection.execute "LOCK TABLE containers IN EXCLUSIVE MODE"
+    if (resource_attrs.keys - [:cost, :gateway_address, :output_properties, :progress, :runtime_status]).empty?
+      # If no attributes are being updated besides these, there are no
+      # cascading changes to other rows/tables, so we should just use
+      # row locking.
+      @object.reload(lock: true)
       super
+    else
+      # Lock containers table to avoid deadlock in cascading priority
+      # update (see #20240)
+      Container.transaction do
+        ActiveRecord::Base.connection.execute "LOCK TABLE containers IN EXCLUSIVE MODE"
+        super
+      end
     end
   end
 

commit 9a03f0580c0b3fce6650e89ddba751af367f29ad
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon May 1 11:44:16 2023 -0400

    Merge branch '20462-workflow-prefix' refs #20462
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 895676565..43d23e9b9 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -41,6 +41,7 @@ from .runner import (upload_dependencies, packed_workflow, upload_workflow_colle
 from .pathmapper import ArvPathMapper, trim_listing
 from .arvtool import ArvadosCommandTool, set_cluster_target
 from ._version import __version__
+from .util import common_prefix
 
 from .perf import Perf
 
@@ -235,6 +236,7 @@ def fix_schemadef(req, baseuri, urlexpander, merged_map, jobmapper, pdh):
             merged_map[mm].resolved[r] = rename
     return req
 
+
 def drop_ids(d):
     if isinstance(d, MutableSequence):
         for i, s in enumerate(d):
@@ -280,22 +282,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
     # Find the longest common prefix among all the file names.  We'll
     # use this to recreate the directory structure in a keep
     # collection with correct relative references.
-    n = 7
-    allmatch = True
-    if firstfile:
-        while allmatch:
-            n += 1
-            for f in all_files:
-                if len(f)-1 < n:
-                    n -= 1
-                    allmatch = False
-                    break
-                if f[n] != firstfile[n]:
-                    allmatch = False
-                    break
-
-        while firstfile[n] != "/":
-            n -= 1
+    prefix = common_prefix(firstfile, all_files)
 
     col = arvados.collection.Collection(api_client=arvRunner.api)
 
@@ -332,22 +319,22 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
         update_refs(result, w, tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, "", "")
 
         # Write the updated file to the collection.
-        with col.open(w[n+1:], "wt") as f:
+        with col.open(w[len(prefix):], "wt") as f:
             if export_as_json:
                 json.dump(result, f, indent=4, separators=(',',': '))
             else:
                 yamlloader.dump(result, stream=f)
 
         # Also store a verbatim copy of the original files
-        with col.open(os.path.join("original", w[n+1:]), "wt") as f:
+        with col.open(os.path.join("original", w[len(prefix):]), "wt") as f:
             f.write(text)
 
 
     # Upload files referenced by $include directives, these are used
     # unchanged and don't need to be updated.
     for w in include_files:
-        with col.open(w[n+1:], "wb") as f1:
-            with col.open(os.path.join("original", w[n+1:]), "wb") as f3:
+        with col.open(w[len(prefix):], "wb") as f1:
+            with col.open(os.path.join("original", w[len(prefix):]), "wb") as f3:
                 with open(uri_file_path(w), "rb") as f2:
                     dat = f2.read(65536)
                     while dat:
@@ -361,7 +348,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
     if git_info and git_info.get("http://arvados.org/cwl#gitDescribe"):
         toolname = "%s (%s)" % (toolname, git_info.get("http://arvados.org/cwl#gitDescribe"))
 
-    toolfile = tool.tool["id"][n+1:]
+    toolfile = tool.tool["id"][len(prefix):]
 
     properties = {
         "type": "workflow",
diff --git a/sdk/cwl/arvados_cwl/util.py b/sdk/cwl/arvados_cwl/util.py
index a0c34ea52..299f854ec 100644
--- a/sdk/cwl/arvados_cwl/util.py
+++ b/sdk/cwl/arvados_cwl/util.py
@@ -34,3 +34,18 @@ def get_current_container(api, num_retries=0, logger=None):
             raise e
 
     return current_container
+
+
+def common_prefix(firstfile, all_files):
+    common_parts = firstfile.split('/')
+    common_parts[-1] = ''
+    for f in all_files:
+        f_parts = f.split('/')
+        for index, (a, b) in enumerate(zip(common_parts, f_parts)):
+            if a != b:
+                common_parts = common_parts[:index + 1]
+                common_parts[-1] = ''
+                break
+        if not any(common_parts):
+            break
+    return '/'.join(common_parts)
diff --git a/sdk/cwl/tests/test_util.py b/sdk/cwl/tests/test_util.py
index 1209b88d8..bf3d6fe0e 100644
--- a/sdk/cwl/tests/test_util.py
+++ b/sdk/cwl/tests/test_util.py
@@ -11,6 +11,7 @@ import httplib2
 
 from arvados_cwl.util import *
 from arvados.errors import ApiError
+from arvados_cwl.util import common_prefix
 
 class MockDateTime(datetime.datetime):
     @classmethod
@@ -53,4 +54,19 @@ class TestUtil(unittest.TestCase):
         logger = mock.MagicMock()
 
         current_container = get_current_container(api, num_retries=0, logger=logger)
-        self.assertEqual(current_container, None)
\ No newline at end of file
+        self.assertEqual(current_container, None)
+
+    def test_common_prefix(self):
+        self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar/baz"]), "file:///foo/")
+        self.assertEqual(common_prefix("file:///foo", ["file:///foo", "file:///foo/bar", "file:///foo/bar/"]), "file:///")
+        self.assertEqual(common_prefix("file:///foo/", ["file:///foo/", "file:///foo/bar", "file:///foo/bar/"]), "file:///foo/")
+        self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+        self.assertEqual(common_prefix("file:///foo/bar/", ["file:///foo/bar/", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+        self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+        self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///nope", "file:///foo/quux/q2"]), "file:///")
+        self.assertEqual(common_prefix("file:///blub/foo", ["file:///blub/foo", "file:///blub/foo/bar", "file:///blub/foo/bar/"]), "file:///blub/")
+
+        # sanity check, the subsequent code strips off the prefix so
+        # just confirm the logic doesn't have a fencepost error
+        prefix = "file:///"
+        self.assertEqual("file:///foo/bar"[len(prefix):], "foo/bar")

commit 50675131bdb2d46bd11797be7f64bf8a8c3d0150
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 28 11:31:49 2023 -0400

    Merge branch '20422-cache-slot' refs #20422
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 6804f355a..8658774cb 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -1174,21 +1174,37 @@ class KeepClient(object):
         try:
             locator = KeepLocator(loc_s)
             if method == "GET":
-                slot, first = self.block_cache.reserve_cache(locator.md5sum)
-                if not first:
+                while slot is None:
+                    slot, first = self.block_cache.reserve_cache(locator.md5sum)
+                    if first:
+                        # Fresh and empty "first time it is used" slot
+                        break
                     if prefetch:
-                        # this is request for a prefetch, if it is
-                        # already in flight, return immediately.
-                        # clear 'slot' to prevent finally block from
-                        # calling slot.set()
+                        # this is request for a prefetch to fill in
+                        # the cache, don't need to wait for the
+                        # result, so if it is already in flight return
+                        # immediately.  Clear 'slot' to prevent
+                        # finally block from calling slot.set()
                         slot = None
                         return None
-                    self.hits_counter.add(1)
+
                     blob = slot.get()
-                    if blob is None:
-                        raise arvados.errors.KeepReadError(
-                            "failed to read {}".format(loc_s))
-                    return blob
+                    if blob is not None:
+                        self.hits_counter.add(1)
+                        return blob
+
+                    # If blob is None, this means either
+                    #
+                    # (a) another thread was fetching this block and
+                    # failed with an error or
+                    #
+                    # (b) cache thrashing caused the slot to be
+                    # evicted (content set to None) by another thread
+                    # between the call to reserve_cache() and get().
+                    #
+                    # We'll handle these cases by reserving a new slot
+                    # and then doing a full GET request.
+                    slot = None
 
             self.misses_counter.add(1)
 

commit b202135c52a840eb977ac64aacfb8e43dc292365
Author: Brett Smith <brett.smith at curii.com>
Date:   Wed Apr 26 14:29:16 2023 -0400

    18799: Version PySDK's typing_extensions dependency
    
    3.7.4 added TypedDict, which is exactly what we use.
    Refs #18799, #20446.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index a9f7c80f9..e1c852123 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -54,7 +54,8 @@ setup(name='arvados-python-client',
           'httplib2 >=0.9.2, <0.20.2',
           'pycurl >=7.19.5.1, <7.45.0',
           'ruamel.yaml >=0.15.54, <0.17.22',
-          'setuptools',
+          'setuptools>=40.3.0',
+          'typing_extensions>=3.7.4; python_version<"3.8"',
           'ws4py >=0.4.2',
           'protobuf<4.0.0dev',
           'pyparsing<3',

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list