[arvados] updated: 2.7.0-6355-g1f54a67625

git repository hosting git at public.arvados.org
Sat Apr 6 00:20:30 UTC 2024


Summary of changes:
 sdk/python/arvados/arvfile.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

       via  1f54a6762575e1ff8da9861277dd8e44a7e87caf (commit)
      from  14088c5bd88ede15698ba8f5b8efbf08e18374ce (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 1f54a6762575e1ff8da9861277dd8e44a7e87caf
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 5 20:20:03 2024 -0400

    21639: Wrap around read counter, have slightly less indirection
    
    Also add a comment about the "every 128 read ops" logic.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 666efb078d..e0e972b5c1 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -491,7 +491,7 @@ class _BlockManager(object):
         self._put_queue = None
         self._put_threads = None
         self.lock = threading.Lock()
-        self.prefetch_enabled = True
+        self.prefetch_lookahead = self._keep.num_prefetch_threads
         self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
         self.copies = copies
         self.storage_classes = storage_classes_func or (lambda: [])
@@ -803,7 +803,7 @@ class _BlockManager(object):
         """Initiate a background download of a block.
         """
 
-        if not self.prefetch_enabled:
+        if not self.prefetch_lookahead:
             return
 
         with self.lock:
@@ -1061,11 +1061,25 @@ class ArvadosFile(object):
             if size == 0 or offset >= self.size():
                 return b''
             readsegs = locators_and_ranges(self._segments, offset, size)
+
             prefetch = None
-            if self.parent._my_block_manager()._keep.num_prefetch_threads > 0 and (self._read_counter % 128) == 0:
-                prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads,
-                                               limit=(1+self.parent._my_block_manager()._keep.num_prefetch_threads))
-            self._read_counter += 1
+            prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
+            if prefetch_lookahead:
+                # Doing prefetch on every read() call is surprisingly expensive
+                # when we're trying to deliver data at 600+ MiBps and want
+                # the read() fast path to be as lightweight as possible.
+                #
+                # Only prefetching every 128 read operations
+                # dramatically reduces the overhead while still
+                # getting the benefit of prefetching (e.g. when
+                # reading 128 KiB at a time, it checks for prefetch
+                # every 16 MiB).
+                self._read_counter = (self._read_counter+1) % 128
+                if self._read_counter == 1:
+                    prefetch = locators_and_ranges(self._segments,
+                                                   offset + size,
+                                                   config.KEEP_BLOCK_SIZE * prefetch_lookahead,
+                                                   limit=(1+prefetch_lookahead))
 
         locs = set()
         data = []

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list