[ARVADOS] updated: 6527508bc378867fd4d5e1af20ffd7868cec5f62
git at public.curoverse.com
git at public.curoverse.com
Fri Oct 24 18:17:34 EDT 2014
Summary of changes:
sdk/python/arvados/stream.py | 66 ++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 36 deletions(-)
via 6527508bc378867fd4d5e1af20ffd7868cec5f62 (commit)
from 5fa3808d7587cc7a72acebef991233008f108a0b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 6527508bc378867fd4d5e1af20ffd7868cec5f62
Author: Brett Smith <brett at curoverse.com>
Date: Fri Oct 24 18:09:27 2014 -0400
3603: Fix up duplicate readlines() in StreamFileReader.
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index 52cc6a8..f958ead 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -107,7 +107,7 @@ class StreamFileReader(ArvadosFileBase):
self._max_segsize = max(seg[1] for seg in segments)
self._filepos = 0L
self.num_retries = stream.num_retries
- self._readline_cache = (-1, '')
+ self._readline_cache = (-1, None, None)
def __iter__(self):
return self.readlines()
@@ -166,7 +166,9 @@ class StreamFileReader(ArvadosFileBase):
@ArvadosFileBase._before_close
@retry_method
- def readall(self, size=2**20, num_retries=None):
+ def readall(self, size=None, num_retries=None):
+ if size is None:
+ size = self._max_segsize
while True:
data = self.read(size, num_retries=num_retries)
if data == '':
@@ -175,43 +177,42 @@ class StreamFileReader(ArvadosFileBase):
@ArvadosFileBase._before_close
@retry_method
- def readline(self, num_retries=None):
- if self.tell() == self._readline_cache[0]:
- data = [self._readline_cache[1]]
- else:
- data = [self.read(self._max_segsize, num_retries=num_retries)]
- while data[-1] and ('\n' not in data[-1]):
- data.append(self.read(self._max_segsize, num_retries=num_retries))
+ def readline(self, read_iter=None, num_retries=None):
+ cache_usable = ((self.tell() == self._readline_cache[0]) and
+ ((read_iter is None) or
+ (read_iter is self._readline_cache[1])))
+ data = []
+ if cache_usable:
+ read_iter = self._readline_cache[1]
+ data.append(self._readline_cache[2])
+ elif read_iter is None:
+ read_iter = self.readall(num_retries=num_retries)
+ while (not data) or ('\n' not in data[-1]):
+ try:
+ data.append(next(read_iter))
+ except StopIteration:
+ break
data = ''.join(data)
try:
nextline_index = data.index('\n') + 1
except ValueError:
nextline_index = len(data)
- line = data[:nextline_index]
- rest = data[nextline_index:]
- self._readline_cache = (self.tell(), rest)
- return line
-
- @ArvadosFileBase._before_close
- @retry_method
- def readlines(self, num_retries=None):
- while True:
- data = self.readline(num_retries=num_retries)
- if not data:
- break
- yield data
+ self._readline_cache = (self.tell(), read_iter, data[nextline_index:])
+ return data[:nextline_index]
@ArvadosFileBase._before_close
@retry_method
def decompress(self, decompress, size, num_retries=None):
for segment in self.readall(size, num_retries):
data = decompress(segment)
- if data and data != '':
+ if data:
yield data
@ArvadosFileBase._before_close
@retry_method
- def readall_decompressed(self, size=2**20, num_retries=None):
+ def readall_decompressed(self, size=None, num_retries=None):
+ if size is None:
+ size = self._max_segsize
self.seek(0)
if re.search('\.bz2$', self.name):
dc = bz2.BZ2Decompressor()
@@ -228,18 +229,11 @@ class StreamFileReader(ArvadosFileBase):
@retry_method
def readlines(self, decompress=True, num_retries=None):
read_func = self.readall_decompressed if decompress else self.readall
- data = ''
- for newdata in read_func(num_retries=num_retries):
- data += newdata
- sol = 0
- while True:
- eol = data.find("\n", sol)
- if eol < 0:
- break
- yield data[sol:eol+1]
- sol = eol+1
- data = data[sol:]
- if data != '':
+ read_iter = read_func(num_retries=num_retries)
+ while True:
+ data = self.readline(read_iter, num_retries=num_retries)
+ if not data:
+ break
yield data
def as_manifest(self):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list