[ARVADOS] created: 0c7f9737786fd64177f8210585ec620402d0d9b6

Git user git at public.curoverse.com
Thu Mar 17 18:47:35 EDT 2016


        at  0c7f9737786fd64177f8210585ec620402d0d9b6 (commit)


commit 0c7f9737786fd64177f8210585ec620402d0d9b6
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Mar 17 18:03:21 2016 -0400

    7156/7852: Bugfix _filepos tracking in PySDK readline method.
    
    Make sure self._filepos is always in the right position to return
    uncached data before another read method could be called, to avoid
    inconsistent results when interleaving those calls with readline
    calls.
    
    Closes #7156, #7852.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 71af644..b78c63e 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -108,6 +108,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
         cache_pos, cache_data = self._readline_cache
         if self.tell() == cache_pos:
             data = [cache_data]
+            self._filepos += len(cache_data)
         else:
             data = ['']
         data_size = len(data[-1])
@@ -123,6 +124,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
         except ValueError:
             nextline_index = len(data)
         nextline_index = min(nextline_index, size)
+        self._filepos -= len(data) - nextline_index
         self._readline_cache = (self.tell(), data[nextline_index:])
         return data[:nextline_index]
 

commit 35d6cf5e57eeeb90edd158f08a4bbccf0d17baba
Author: radhika <radhika at curoverse.com>
Date:   Wed Feb 3 12:19:54 2016 -0500

    7852: Added one more test "test_readline_then_readall" which does a readline followed by a readall.
    This test also fails exactly like the test "test_readline_then_readlines".

diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index 2c31ef9..624f1b8 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -191,6 +191,12 @@ class StreamFileReaderTestCase(unittest.TestCase):
         data = reader.readlines()
         self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
 
+    def test_readline_then_readall(self):
+        reader = self.make_newlines_reader()
+        data = reader.readline()
+        self.assertEqual('one\n', data)
+        self.assertEqual(''.join(['two\n', '\n', 'three\n', 'four\n', '\n']), ''.join(reader.readall()))
+
 
 class StreamRetryTestMixin(object):
     # Define reader_for(coll_name, **kwargs)

commit 8d4a67a10f27ef3de0c1b7baae2b4c48fdda74e6
Author: radhika <radhika at curoverse.com>
Date:   Mon Jan 25 16:37:24 2016 -0500

    7852: add a failing test that does: readline and then readlines. The expectation would be the second call,
    readlines, would fetch all the remaining lines after the first line. However, the readlines call is only
    fetching the data after the "first block" which was read during the first readline call. It appears that
    self._readline_cache and self._filepos are playing a role here, which are set during the first readline.

diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index 6c3bd61..2c31ef9 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -184,6 +184,13 @@ class StreamFileReaderTestCase(unittest.TestCase):
     def test_bz2_decompression(self):
         self.check_decompression('bz2', bz2.compress)
 
+    def test_readline_then_readlines(self):
+        reader = self.make_newlines_reader()
+        data = reader.readline()
+        self.assertEqual('one\n', data)
+        data = reader.readlines()
+        self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
+
 
 class StreamRetryTestMixin(object):
     # Define reader_for(coll_name, **kwargs)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list