[ARVADOS] created: 4570521e9d639806b1f25c93822d5cf0dbfe42d0

git at public.curoverse.com git at public.curoverse.com
Thu Apr 30 12:41:53 EDT 2015


        at  4570521e9d639806b1f25c93822d5cf0dbfe42d0 (commit)


commit 4570521e9d639806b1f25c93822d5cf0dbfe42d0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Apr 30 11:18:24 2015 -0400

    ArvadosFileReader object always tries to return the exact amount of data asked for.
    
    This is to avoid breaking stuff like gzip that assumes it always gets the exact
    amount of data unless EOF.  (From the Python file docs for read(): Note that
    this method may call the underlying C function fread() more than once in an
    effort to acquire as close to size bytes as possible.) closes #5856

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 7742c45..ff900b7 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -715,8 +715,15 @@ class ArvadosFile(object):
         elif size > self.size():
             raise IOError("truncate() does not support extending the file size")
 
-    def readfrom(self, offset, size, num_retries):
-        """Read upto `size` bytes from the file starting at `offset`."""
+
+    def readfrom(self, offset, size, num_retries, exact=False):
+        """Read upto `size` bytes from the file starting at `offset`.
+
+        :exact:
+         If False (default), return less data than requested if the read
+         crosses a block boundary and the next block isn't cached.  If True,
+         only return less data than requested when hitting EOF.
+        """
 
         with self.lock:
             if size == 0 or offset >= self.size():
@@ -729,7 +736,7 @@ class ArvadosFile(object):
 
         data = []
         for lr in readsegs:
-            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=bool(data))
+            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
             if block:
                 data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
             else:
@@ -868,7 +875,7 @@ class ArvadosFileReader(ArvadosFileReaderBase):
     @retry_method
     def read(self, size, num_retries=None):
         """Read up to `size` bytes from the stream, starting at the current file position."""
-        data = self.arvadosfile.readfrom(self._filepos, size, num_retries)
+        data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
         self._filepos += len(data)
         return data
 
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 3bba841..4e7dd75 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -440,20 +440,30 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
         return ArvadosFileReader(af, "count.txt")
 
+    def test_read_crosses_blocks(self):
+        # read() needs to return all the data requested if possible, even if it
+        # crosses uncached blocks: https://arvados.org/issues/5856
+        sfile = self.make_count_reader(nocache=True)
+        self.assertEqual('12345678', sfile.read(8))
+
     def test_read_returns_first_block(self):
-        # read() calls will be aligned on block boundaries - see #3663.
+        # Override StreamFileReaderTestCase.test_read_returns_first_block
         sfile = self.make_count_reader(nocache=True)
-        self.assertEqual('123', sfile.read(10))
+        self.assertEqual('123', sfile.arvadosfile.readfrom(0, 10, 0))
 
     def test_successive_reads(self):
+        # Override StreamFileReaderTestCase.test_successive_reads
         sfile = self.make_count_reader(nocache=True)
-        for expect in ['123', '456', '789', '']:
-            self.assertEqual(expect, sfile.read(10))
+        self.assertEqual('123', sfile.arvadosfile.readfrom(0, 10, 0))
+        self.assertEqual('456', sfile.arvadosfile.readfrom(3, 10, 0))
+        self.assertEqual('789', sfile.arvadosfile.readfrom(6, 10, 0))
+        self.assertEqual('', sfile.arvadosfile.readfrom(9, 10, 0))
 
     def test_tell_after_block_read(self):
+        # Override StreamFileReaderTestCase.test_tell_after_block_read
         sfile = self.make_count_reader(nocache=True)
-        sfile.read(5)
-        self.assertEqual(3, sfile.tell())
+        self.assertEqual('12345678', sfile.read(8))
+        self.assertEqual(8, sfile.tell())
 
     def test_prefetch(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list