[arvados] created: 2.7.0-6602-ga41610d3f3
git repository hosting
git at public.arvados.org
Tue May 21 15:55:25 UTC 2024
at a41610d3f36852e9abf7eaa03e91d170c1c441b4 (commit)
commit a41610d3f36852e9abf7eaa03e91d170c1c441b4
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon May 20 16:24:41 2024 -0400
21718: Add "return_memoryview" flag to read() and readfrom()
If return_memoryview is true, always return memoryview. This makes
for more consistent read() and readfrom() behavior.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index de20254351..0834b09ea3 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -1041,13 +1041,22 @@ class ArvadosFile(object):
# size == self.size()
pass
- def readfrom(self, offset, size, num_retries, exact=False):
+ def readfrom(self, offset, size, num_retries, exact=False, return_memoryview=False):
"""Read up to `size` bytes from the file starting at `offset`.
- :exact:
- If False (default), return less data than requested if the read
- crosses a block boundary and the next block isn't cached. If True,
- only return less data than requested when hitting EOF.
+ Arguments:
+
+ * exact: bool --- If False (default), return less data than
+ requested if the read crosses a block boundary and the next
+ block isn't cached. If True, only return less data than
+ requested when hitting EOF.
+
+ * return_memoryview: bool -- If False (default) return a
+ `bytes` object, which may entail making a copy in some
+ situations. If True, return a `memoryview` object which may
+ avoid making a copy, but may be incompatible with code
+ expecting a `bytes` object).
+
"""
with self.lock:
@@ -1092,9 +1101,15 @@ class ArvadosFile(object):
locs.add(lr.locator)
if len(data) == 1:
- return data[0]
+ if return_memoryview:
+ return data[0]
+ else:
+ return data[0].tobytes()
else:
- return b''.join(data)
+ if return_memoryview:
+ return memoryview(b''.join(data))
+ else:
+ return b''.join(data)
@must_be_writable
@synchronized
@@ -1259,33 +1274,40 @@ class ArvadosFileReader(ArvadosFileReaderBase):
@_FileLikeObjectBase._before_close
@retry_method
- def read(self, size=None, num_retries=None):
+ def read(self, size=None, num_retries=None, return_memoryview=False):
"""Read up to `size` bytes from the file and return the result.
Starts at the current file position. If `size` is None, read the
entire remainder of the file.
"""
- if size is None:
+ if size is None or size == -1:
data = []
- rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
+ #
+ # specify exact=False, return_memoryview=True here so that we
+ # only copy data once into the final buffer.
+ #
+ rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
while rd:
data.append(rd)
self._filepos += len(rd)
- rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
- return b''.join(data)
+ rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
+ if return_memoryview:
+ return memoryview(b''.join(data))
+ else:
+ return b''.join(data)
else:
- data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
+ data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True, return_memoryview=return_memoryview)
self._filepos += len(data)
return data
@_FileLikeObjectBase._before_close
@retry_method
- def readfrom(self, offset, size, num_retries=None):
+ def readfrom(self, offset, size, num_retries=None, return_memoryview=False):
"""Read up to `size` bytes from the stream, starting at the specified file offset.
This method does not change the file position.
"""
- return self.arvadosfile.readfrom(offset, size, num_retries)
+ return self.arvadosfile.readfrom(offset, size, num_retries, return_memoryview=return_memoryview)
def flush(self):
pass
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 1050d4c093..d539d9006d 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -336,7 +336,7 @@ class RichCollectionBase(CollectionBase):
self,
path: str,
mode: str="r",
- encoding: Optional[str]=None,
+ encoding: Optional[str]=None
) -> IO:
"""Open a file-like object within the collection
@@ -356,6 +356,7 @@ class RichCollectionBase(CollectionBase):
* encoding: str | None --- The text encoding of the file. Only used
when the file is opened in text mode. The default is
platform-dependent.
+
"""
if not re.search(r'^[rwa][bt]?\+?$', mode):
raise errors.ArgumentError("Invalid mode {!r}".format(mode))
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index fce6c9b614..a34d36eb72 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -61,7 +61,7 @@ class FuseArvadosFile(File):
def readfrom(self, off, size, num_retries=0):
with llfuse.lock_released:
- return self.arvfile.readfrom(off, size, num_retries, exact=True)
+ return self.arvfile.readfrom(off, size, num_retries, exact=True, return_memoryview=True)
def writeto(self, off, buf, num_retries=0):
with llfuse.lock_released:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list