[ARVADOS] created: 492074f1c328c7057b4673f84ae9371ad3e8fdf2
git at public.curoverse.com
git at public.curoverse.com
Mon Dec 1 19:30:41 EST 2014
at 492074f1c328c7057b4673f84ae9371ad3e8fdf2 (commit)
commit 492074f1c328c7057b4673f84ae9371ad3e8fdf2
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Dec 1 19:05:58 2014 -0500
Bring back StreamReader.tokens() method used by one_task_per_input_stream(). No issue #
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index c263dd8..cabf1ec 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -319,15 +319,21 @@ class StreamReader(object):
data.append(self._keep.get(locator, num_retries=num_retries)[segmentoffset:segmentoffset+segmentsize])
return ''.join(data)
- def manifest_text(self, strip=False):
- manifest_text = [self.name().replace(' ', '\\040')]
+ def tokens(self, strip=False):
+ tokens = [self.name().replace(' ', '\\040')]
if strip:
for d in self._data_locators:
m = re.match(r'^[0-9a-f]{32}\+\d+', d[LOCATOR])
- manifest_text.append(m.group(0))
+ tokens.append(m.group(0))
else:
- manifest_text.extend([d[LOCATOR] for d in self._data_locators])
- manifest_text.extend([' '.join(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], f.name().replace(' ', '\\040'))
- for seg in f.segments])
- for f in self._files.values()])
- return ' '.join(manifest_text) + '\n'
+ tokens.extend([d[LOCATOR] for d in self._data_locators])
+ for f in self._files.values():
+ for seg in f.segments:
+ tokens.append("{}:{}:{}".format(
+ seg[LOCATOR],
+ seg[BLOCKSIZE],
+ f.name().replace(' ', '\\040')))
+ return tokens
+
+ def manifest_text(self, strip=False):
+ return ' '.join(self.tokens(strip=strip)) + '\n'
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index 08a3d28..d73dbc1 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -243,6 +243,12 @@ class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
reader = StreamReader(line.split())
self.assertEqual(line + '\n', reader.manifest_text())
+ def test_stream_tokens_without_keep_client(self):
+ mtext = self.manifest_for('multilevel_collection_1')
+ for line in mtext.rstrip('\n').split('\n'):
+ reader = StreamReader(line.split())
+ self.assertEqual(line.split(), reader.tokens())
+
class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
def reader_for(self, coll_name, **kwargs):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list