[ARVADOS] created: c807aef7059b253512ba5da5d535e89e800b9c11
git at public.curoverse.com
git at public.curoverse.com
Thu Apr 30 11:29:57 EDT 2015
at c807aef7059b253512ba5da5d535e89e800b9c11 (commit)
commit c807aef7059b253512ba5da5d535e89e800b9c11
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Apr 30 11:18:24 2015 -0400
ArvadosFileReader object always tries to return the exact amount of data asked for.
This is to avoid breaking stuff like gzip that assumes it always gets the exact
amount of data unless EOF. (From the Python file docs for read(): Note that
this method may call the underlying C function fread() more than once in an
effort to acquire as close to size bytes as possible.) closes #5856
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 7742c45..ff900b7 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -715,8 +715,15 @@ class ArvadosFile(object):
elif size > self.size():
raise IOError("truncate() does not support extending the file size")
- def readfrom(self, offset, size, num_retries):
- """Read upto `size` bytes from the file starting at `offset`."""
+
+ def readfrom(self, offset, size, num_retries, exact=False):
+ """Read upto `size` bytes from the file starting at `offset`.
+
+ :exact:
+ If False (default), return less data than requested if the read
+ crosses a block boundary and the next block isn't cached. If True,
+ only return less data than requested when hitting EOF.
+ """
with self.lock:
if size == 0 or offset >= self.size():
@@ -729,7 +736,7 @@ class ArvadosFile(object):
data = []
for lr in readsegs:
- block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=bool(data))
+ block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
if block:
data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
else:
@@ -868,7 +875,7 @@ class ArvadosFileReader(ArvadosFileReaderBase):
@retry_method
def read(self, size, num_retries=None):
"""Read up to `size` bytes from the stream, starting at the current file position."""
- data = self.arvadosfile.readfrom(self._filepos, size, num_retries)
+ data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
self._filepos += len(data)
return data
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 3bba841..9d31c81 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -440,20 +440,11 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
return ArvadosFileReader(af, "count.txt")
- def test_read_returns_first_block(self):
- # read() calls will be aligned on block boundaries - see #3663.
+ def test_read_crosses_blocks(self):
+ # read() needs to return all the data requested if possible, even if it
+ # crosses uncached blocks: https://arvados.org/issues/5856
sfile = self.make_count_reader(nocache=True)
- self.assertEqual('123', sfile.read(10))
-
- def test_successive_reads(self):
- sfile = self.make_count_reader(nocache=True)
- for expect in ['123', '456', '789', '']:
- self.assertEqual(expect, sfile.read(10))
-
- def test_tell_after_block_read(self):
- sfile = self.make_count_reader(nocache=True)
- sfile.read(5)
- self.assertEqual(3, sfile.tell())
+ self.assertEqual('01234567', sfile.read(8))
def test_prefetch(self):
keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list