[arvados] created: 2.7.0-6354-g14088c5bd8

git repository hosting git at public.arvados.org
Fri Apr 5 23:07:39 UTC 2024


        at  14088c5bd88ede15698ba8f5b8efbf08e18374ce (commit)


commit 14088c5bd88ede15698ba8f5b8efbf08e18374ce
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Fri Apr 5 18:53:37 2024 -0400

    21639: Reenable prefetch, but not on every read()
    
    Only do prefetch every 128 invocations of read().
    
    This should dramatically reduce the overhead of computing prefetch
    while still getting some or moste of the benefits of prefetching.
    
    Indeed, benchmarking suggests that this prefetching strategy, by
    advising the kernel to map blocks into RAM, may actually improve
    throughput on the high end.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 0cc7d25a33..666efb078d 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -825,7 +825,7 @@ class ArvadosFile(object):
     """
 
     __slots__ = ('parent', 'name', '_writers', '_committed',
-                 '_segments', 'lock', '_current_bblock', 'fuse_entry')
+                 '_segments', 'lock', '_current_bblock', 'fuse_entry', '_read_counter')
 
     def __init__(self, parent, name, stream=[], segments=[]):
         """
@@ -846,6 +846,7 @@ class ArvadosFile(object):
         for s in segments:
             self._add_segment(stream, s.locator, s.range_size)
         self._current_bblock = None
+        self._read_counter = 0
 
     def writable(self):
         return self.parent.writable()
@@ -1060,8 +1061,11 @@ class ArvadosFile(object):
             if size == 0 or offset >= self.size():
                 return b''
             readsegs = locators_and_ranges(self._segments, offset, size)
-            if self.parent._my_block_manager()._keep.num_prefetch_threads > 0:
-                prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads, limit=32)
+            prefetch = None
+            if self.parent._my_block_manager()._keep.num_prefetch_threads > 0 and (self._read_counter % 128) == 0:
+                prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads,
+                                               limit=(1+self.parent._my_block_manager()._keep.num_prefetch_threads))
+            self._read_counter += 1
 
         locs = set()
         data = []
@@ -1074,7 +1078,7 @@ class ArvadosFile(object):
             else:
                 break
 
-        if self.parent._my_block_manager()._keep.num_prefetch_threads > 0:
+        if prefetch:
             for lr in prefetch:
                 if lr.locator not in locs:
                     self.parent._my_block_manager().block_prefetch(lr.locator)
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index a824621079..d1be6b931e 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -1181,6 +1181,8 @@ class KeepClient(object):
                         # result, so if it is already in flight return
                         # immediately.  Clear 'slot' to prevent
                         # finally block from calling slot.set()
+                        if slot.ready.is_set():
+                            slot.get()
                         slot = None
                         return None
 
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 1398b92e87..f52121d862 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -490,13 +490,6 @@ class Mount(object):
                                                       disk_cache=self.args.disk_cache,
                                                       disk_cache_dir=self.args.disk_cache_dir)
 
-            # Profiling indicates that prefetching has more of a
-            # negative impact on the read() fast path (by requiring it
-            # to do more work and take additional locks) than benefit.
-            # Also, the kernel does some readahead itself, which has a
-            # similar effect.
-            prefetch_threads = 0
-
             self.api = arvados.safeapi.ThreadSafeApiCache(
                 apiconfig=arvados.config.settings(),
                 api_params={
@@ -504,7 +497,6 @@ class Mount(object):
                 },
                 keep_params={
                     'block_cache': block_cache,
-                    'num_prefetch_threads': prefetch_threads,
                     'num_retries': self.args.retries,
                 },
                 version='v1',

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list