[arvados] created: 2.7.0-6602-ga41610d3f3

git repository hosting git at public.arvados.org
Tue May 21 15:55:25 UTC 2024


        at  a41610d3f36852e9abf7eaa03e91d170c1c441b4 (commit)


commit a41610d3f36852e9abf7eaa03e91d170c1c441b4
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon May 20 16:24:41 2024 -0400

    21718: Add "return_memoryview" flag to read() and readfrom()
    
    If return_memoryview is true, always return memoryview.  This makes
    for more consistent read() and readfrom() behavior.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index de20254351..0834b09ea3 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -1041,13 +1041,22 @@ class ArvadosFile(object):
             # size == self.size()
             pass
 
-    def readfrom(self, offset, size, num_retries, exact=False):
+    def readfrom(self, offset, size, num_retries, exact=False, return_memoryview=False):
         """Read up to `size` bytes from the file starting at `offset`.
 
-        :exact:
-         If False (default), return less data than requested if the read
-         crosses a block boundary and the next block isn't cached.  If True,
-         only return less data than requested when hitting EOF.
+        Arguments:
+
+        * exact: bool --- If False (default), return less data than
+         requested if the read crosses a block boundary and the next
+         block isn't cached.  If True, only return less data than
+         requested when hitting EOF.
+
+        * return_memoryview: bool -- If False (default) return a
+          `bytes` object, which may entail making a copy in some
+          situations.  If True, return a `memoryview` object which may
+          avoid making a copy, but may be incompatible with code
+          expecting a `bytes` object).
+
         """
 
         with self.lock:
@@ -1092,9 +1101,15 @@ class ArvadosFile(object):
                     locs.add(lr.locator)
 
         if len(data) == 1:
-            return data[0]
+            if return_memoryview:
+                return data[0]
+            else:
+                return data[0].tobytes()
         else:
-            return b''.join(data)
+            if return_memoryview:
+                return memoryview(b''.join(data))
+            else:
+                return b''.join(data)
 
     @must_be_writable
     @synchronized
@@ -1259,33 +1274,40 @@ class ArvadosFileReader(ArvadosFileReaderBase):
 
     @_FileLikeObjectBase._before_close
     @retry_method
-    def read(self, size=None, num_retries=None):
+    def read(self, size=None, num_retries=None, return_memoryview=False):
         """Read up to `size` bytes from the file and return the result.
 
         Starts at the current file position.  If `size` is None, read the
         entire remainder of the file.
         """
-        if size is None:
+        if size is None or size == -1:
             data = []
-            rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
+            #
+            # specify exact=False, return_memoryview=True here so that we
+            # only copy data once into the final buffer.
+            #
+            rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
             while rd:
                 data.append(rd)
                 self._filepos += len(rd)
-                rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
-            return b''.join(data)
+                rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
+            if return_memoryview:
+                return memoryview(b''.join(data))
+            else:
+                return b''.join(data)
         else:
-            data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
+            data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True, return_memoryview=return_memoryview)
             self._filepos += len(data)
             return data
 
     @_FileLikeObjectBase._before_close
     @retry_method
-    def readfrom(self, offset, size, num_retries=None):
+    def readfrom(self, offset, size, num_retries=None, return_memoryview=False):
         """Read up to `size` bytes from the stream, starting at the specified file offset.
 
         This method does not change the file position.
         """
-        return self.arvadosfile.readfrom(offset, size, num_retries)
+        return self.arvadosfile.readfrom(offset, size, num_retries, return_memoryview=return_memoryview)
 
     def flush(self):
         pass
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 1050d4c093..d539d9006d 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -336,7 +336,7 @@ class RichCollectionBase(CollectionBase):
             self,
             path: str,
             mode: str="r",
-            encoding: Optional[str]=None,
+            encoding: Optional[str]=None
     ) -> IO:
         """Open a file-like object within the collection
 
@@ -356,6 +356,7 @@ class RichCollectionBase(CollectionBase):
         * encoding: str | None --- The text encoding of the file. Only used
           when the file is opened in text mode. The default is
           platform-dependent.
+
         """
         if not re.search(r'^[rwa][bt]?\+?$', mode):
             raise errors.ArgumentError("Invalid mode {!r}".format(mode))
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index fce6c9b614..a34d36eb72 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -61,7 +61,7 @@ class FuseArvadosFile(File):
 
     def readfrom(self, off, size, num_retries=0):
         with llfuse.lock_released:
-            return self.arvfile.readfrom(off, size, num_retries, exact=True)
+            return self.arvfile.readfrom(off, size, num_retries, exact=True, return_memoryview=True)
 
     def writeto(self, off, buf, num_retries=0):
         with llfuse.lock_released:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list