[ARVADOS] created: 492074f1c328c7057b4673f84ae9371ad3e8fdf2

git at public.curoverse.com git at public.curoverse.com
Mon Dec 1 19:30:41 EST 2014


        at  492074f1c328c7057b4673f84ae9371ad3e8fdf2 (commit)


commit 492074f1c328c7057b4673f84ae9371ad3e8fdf2
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Dec 1 19:05:58 2014 -0500

    Bring back StreamReader.tokens() method used by one_task_per_input_stream(). No issue #

diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index c263dd8..cabf1ec 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -319,15 +319,21 @@ class StreamReader(object):
             data.append(self._keep.get(locator, num_retries=num_retries)[segmentoffset:segmentoffset+segmentsize])
         return ''.join(data)
 
-    def manifest_text(self, strip=False):
-        manifest_text = [self.name().replace(' ', '\\040')]
+    def tokens(self, strip=False):
+        tokens = [self.name().replace(' ', '\\040')]
         if strip:
             for d in self._data_locators:
                 m = re.match(r'^[0-9a-f]{32}\+\d+', d[LOCATOR])
-                manifest_text.append(m.group(0))
+                tokens.append(m.group(0))
         else:
-            manifest_text.extend([d[LOCATOR] for d in self._data_locators])
-        manifest_text.extend([' '.join(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], f.name().replace(' ', '\\040'))
-                                        for seg in f.segments])
-                              for f in self._files.values()])
-        return ' '.join(manifest_text) + '\n'
+            tokens.extend([d[LOCATOR] for d in self._data_locators])
+        for f in self._files.values():
+            for seg in f.segments:
+                tokens.append("{}:{}:{}".format(
+                    seg[LOCATOR],
+                    seg[BLOCKSIZE],
+                    f.name().replace(' ', '\\040')))
+        return tokens
+
+    def manifest_text(self, strip=False):
+        return ' '.join(self.tokens(strip=strip)) + '\n'
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index 08a3d28..d73dbc1 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -243,6 +243,12 @@ class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
             reader = StreamReader(line.split())
             self.assertEqual(line + '\n', reader.manifest_text())
 
+    def test_stream_tokens_without_keep_client(self):
+        mtext = self.manifest_for('multilevel_collection_1')
+        for line in mtext.rstrip('\n').split('\n'):
+            reader = StreamReader(line.split())
+            self.assertEqual(line.split(), reader.tokens())
+
 
 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
     def reader_for(self, coll_name, **kwargs):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list