[ARVADOS] updated: 6527508bc378867fd4d5e1af20ffd7868cec5f62

git at public.curoverse.com git at public.curoverse.com
Fri Oct 24 18:17:34 EDT 2014


Summary of changes:
 sdk/python/arvados/stream.py | 66 ++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

       via  6527508bc378867fd4d5e1af20ffd7868cec5f62 (commit)
      from  5fa3808d7587cc7a72acebef991233008f108a0b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 6527508bc378867fd4d5e1af20ffd7868cec5f62
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Oct 24 18:09:27 2014 -0400

    3603: Fix up duplicate readlines() in StreamFileReader.

diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index 52cc6a8..f958ead 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -107,7 +107,7 @@ class StreamFileReader(ArvadosFileBase):
         self._max_segsize = max(seg[1] for seg in segments)
         self._filepos = 0L
         self.num_retries = stream.num_retries
-        self._readline_cache = (-1, '')
+        self._readline_cache = (-1, None, None)
 
     def __iter__(self):
         return self.readlines()
@@ -166,7 +166,9 @@ class StreamFileReader(ArvadosFileBase):
 
     @ArvadosFileBase._before_close
     @retry_method
-    def readall(self, size=2**20, num_retries=None):
+    def readall(self, size=None, num_retries=None):
+        if size is None:
+            size = self._max_segsize
         while True:
             data = self.read(size, num_retries=num_retries)
             if data == '':
@@ -175,43 +177,42 @@ class StreamFileReader(ArvadosFileBase):
 
     @ArvadosFileBase._before_close
     @retry_method
-    def readline(self, num_retries=None):
-        if self.tell() == self._readline_cache[0]:
-            data = [self._readline_cache[1]]
-        else:
-            data = [self.read(self._max_segsize, num_retries=num_retries)]
-        while data[-1] and ('\n' not in data[-1]):
-            data.append(self.read(self._max_segsize, num_retries=num_retries))
+    def readline(self, read_iter=None, num_retries=None):
+        cache_usable = ((self.tell() == self._readline_cache[0]) and
+                        ((read_iter is None) or
+                         (read_iter is self._readline_cache[1])))
+        data = []
+        if cache_usable:
+            read_iter = self._readline_cache[1]
+            data.append(self._readline_cache[2])
+        elif read_iter is None:
+            read_iter = self.readall(num_retries=num_retries)
+        while (not data) or ('\n' not in data[-1]):
+            try:
+                data.append(next(read_iter))
+            except StopIteration:
+                break
         data = ''.join(data)
         try:
             nextline_index = data.index('\n') + 1
         except ValueError:
             nextline_index = len(data)
-        line = data[:nextline_index]
-        rest = data[nextline_index:]
-        self._readline_cache = (self.tell(), rest)
-        return line
-
-    @ArvadosFileBase._before_close
-    @retry_method
-    def readlines(self, num_retries=None):
-        while True:
-            data = self.readline(num_retries=num_retries)
-            if not data:
-                break
-            yield data
+        self._readline_cache = (self.tell(), read_iter, data[nextline_index:])
+        return data[:nextline_index]
 
     @ArvadosFileBase._before_close
     @retry_method
     def decompress(self, decompress, size, num_retries=None):
         for segment in self.readall(size, num_retries):
             data = decompress(segment)
-            if data and data != '':
+            if data:
                 yield data
 
     @ArvadosFileBase._before_close
     @retry_method
-    def readall_decompressed(self, size=2**20, num_retries=None):
+    def readall_decompressed(self, size=None, num_retries=None):
+        if size is None:
+            size = self._max_segsize
         self.seek(0)
         if re.search('\.bz2$', self.name):
             dc = bz2.BZ2Decompressor()
@@ -228,18 +229,11 @@ class StreamFileReader(ArvadosFileBase):
     @retry_method
     def readlines(self, decompress=True, num_retries=None):
         read_func = self.readall_decompressed if decompress else self.readall
-        data = ''
-        for newdata in read_func(num_retries=num_retries):
-            data += newdata
-            sol = 0
-            while True:
-                eol = data.find("\n", sol)
-                if eol < 0:
-                    break
-                yield data[sol:eol+1]
-                sol = eol+1
-            data = data[sol:]
-        if data != '':
+        read_iter = read_func(num_retries=num_retries)
+        while True:
+            data = self.readline(read_iter, num_retries=num_retries)
+            if not data:
+                break
             yield data
 
     def as_manifest(self):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list