[arvados] updated: 2.7.0-6355-g1f54a67625
git repository hosting
git at public.arvados.org
Sat Apr 6 00:20:30 UTC 2024
Summary of changes:
sdk/python/arvados/arvfile.py | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
via 1f54a6762575e1ff8da9861277dd8e44a7e87caf (commit)
from 14088c5bd88ede15698ba8f5b8efbf08e18374ce (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 1f54a6762575e1ff8da9861277dd8e44a7e87caf
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Apr 5 20:20:03 2024 -0400
21639: Wrap around read counter, have slightly less indirection
Also add a comment about the "every 128 read ops" logic.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 666efb078d..e0e972b5c1 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -491,7 +491,7 @@ class _BlockManager(object):
self._put_queue = None
self._put_threads = None
self.lock = threading.Lock()
- self.prefetch_enabled = True
+ self.prefetch_lookahead = self._keep.num_prefetch_threads
self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
self.copies = copies
self.storage_classes = storage_classes_func or (lambda: [])
@@ -803,7 +803,7 @@ class _BlockManager(object):
"""Initiate a background download of a block.
"""
- if not self.prefetch_enabled:
+ if not self.prefetch_lookahead:
return
with self.lock:
@@ -1061,11 +1061,25 @@ class ArvadosFile(object):
if size == 0 or offset >= self.size():
return b''
readsegs = locators_and_ranges(self._segments, offset, size)
+
prefetch = None
- if self.parent._my_block_manager()._keep.num_prefetch_threads > 0 and (self._read_counter % 128) == 0:
- prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads,
- limit=(1+self.parent._my_block_manager()._keep.num_prefetch_threads))
- self._read_counter += 1
+ prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
+ if prefetch_lookahead:
+ # Doing prefetch on every read() call is surprisingly expensive
+ # when we're trying to deliver data at 600+ MiBps and want
+ # the read() fast path to be as lightweight as possible.
+ #
+ # Only prefetching every 128 read operations
+ # dramatically reduces the overhead while still
+ # getting the benefit of prefetching (e.g. when
+ # reading 128 KiB at a time, it checks for prefetch
+ # every 16 MiB).
+ self._read_counter = (self._read_counter+1) % 128
+ if self._read_counter == 1:
+ prefetch = locators_and_ranges(self._segments,
+ offset + size,
+ config.KEEP_BLOCK_SIZE * prefetch_lookahead,
+ limit=(1+prefetch_lookahead))
locs = set()
data = []
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list