[ARVADOS] updated: d5caf622ea700e4a36c03cff8073abd89bccc796

git at public.curoverse.com git at public.curoverse.com
Mon Oct 27 09:30:36 EDT 2014


Summary of changes:
 sdk/python/arvados/collection.py     | 52 +++++++++++++++------
 sdk/python/arvados/stream.py         | 24 +++++++---
 sdk/python/arvados/util.py           | 16 -------
 sdk/python/tests/test_collections.py | 89 ------------------------------------
 sdk/python/tests/test_stream.py      | 14 ++++++
 5 files changed, 70 insertions(+), 125 deletions(-)

       via  d5caf622ea700e4a36c03cff8073abd89bccc796 (commit)
       via  55a92a2169047b3e4b80d22063424eb45da80651 (commit)
       via  aec1913f949adb5c8c482112d26322f4db5edea2 (commit)
       via  deeb84da688e2ca8c9371b448b0121b1310ece37 (commit)
      from  35ae6cc34dce46c51209696fca3ec4294bb47bfd (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit d5caf622ea700e4a36c03cff8073abd89bccc796
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Oct 27 09:30:19 2014 -0400

    3603: Improve StreamFileReader.readline() readability.

diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index a2c0dac..9f3e2d4 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -186,10 +186,11 @@ class StreamFileReader(ArvadosFileBase):
     @ArvadosFileBase._before_close
     @retry_method
     def readline(self, read_iter=None, num_retries=None):
-        if (((read_iter is None) or (read_iter is self._readline_cache[1])) and
-              (self.tell() == self._readline_cache[0])):
-            read_iter = self._readline_cache[1]
-            data = [self._readline_cache[2]]
+        cache_pos, cache_iter, cache_data = self._readline_cache
+        if (((read_iter is None) or (read_iter is cache_iter)) and
+              (self.tell() == cache_pos)):
+            read_iter = cache_iter
+            data = [cache_data]
         else:
             if read_iter is None:
                 read_iter = self.readall_decompressed(num_retries=num_retries)

commit 55a92a2169047b3e4b80d22063424eb45da80651
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Oct 27 09:23:19 2014 -0400

    3603: Improve Collection.open signatures and document.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 69822e1..fec043a 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -168,24 +168,30 @@ class CollectionReader(CollectionBase):
         self._manifest_text = ''.join([StreamReader(stream, keep=self._my_keep()).manifest_text() for stream in self._streams])
         #print "result", self._manifest_text
 
-    def open(self, stream_name, file_name=None):
+    def open(self, streampath, filename=None):
+        """open(streampath[, filename]) -> file-like object
+
+        Pass in the path of a file to read from the Collection, either as a
+        single string or as two separate stream name and file name arguments.
+        This method returns a file-like object to read that file.
+        """
         self._populate()
-        if file_name is None:
-            stream_name, file_name = split(stream_name)
+        if filename is None:
+            streampath, filename = split(streampath)
         keep_client = self._my_keep()
         for stream_s in self._streams:
             stream = StreamReader(stream_s, keep_client,
                                   num_retries=self.num_retries)
-            if stream.name() == stream_name:
+            if stream.name() == streampath:
                 break
         else:
             raise ValueError("stream '{}' not found in Collection".
-                             format(stream_name))
+                             format(streampath))
         try:
-            return stream.files()[file_name]
+            return stream.files()[filename]
         except KeyError:
             raise ValueError("file '{}' not found in Collection stream '{}'".
-                             format(stream_name, file_name))
+                             format(filename, streampath))
 
     def all_streams(self):
         self._populate()
@@ -369,17 +375,33 @@ class CollectionWriter(CollectionBase):
         while self._data_buffer_len >= self.KEEP_BLOCK_SIZE:
             self.flush_data()
 
-    def open(self, stream_name, file_name=None):
-        if file_name is None:
-            stream_name, file_name = split(stream_name)
+    def open(self, streampath, filename=None):
+        """open(streampath[, filename]) -> file-like object
+
+        Pass in the path of a file to write to the Collection, either as a
+        single string or as two separate stream name and file name arguments.
+        This method returns a file-like object you can write to add it to the
+        Collection.
+
+        You may only have one file object from the Collection open at a time,
+        so be sure to close the object when you're done.  Using the object in
+        a with statement makes that easy::
+
+          with cwriter.open('./doc/page1.txt') as outfile:
+              outfile.write(page1_data)
+          with cwriter.open('./doc/page2.txt') as outfile:
+              outfile.write(page2_data)
+        """
+        if filename is None:
+            streampath, filename = split(streampath)
         if self._last_open and not self._last_open.closed:
             raise errors.AssertionError(
                 "can't open '{}' when '{}' is still open".format(
-                    file_name, self._last_open.name))
-        if stream_name != self.current_stream_name():
-            self.start_new_stream(stream_name)
-        self.set_current_file_name(file_name)
-        self._last_open = _WriterFile(self, file_name)
+                    filename, self._last_open.name))
+        if streampath != self.current_stream_name():
+            self.start_new_stream(streampath)
+        self.set_current_file_name(filename)
+        self._last_open = _WriterFile(self, filename)
         return self._last_open
 
     def flush_data(self):

commit aec1913f949adb5c8c482112d26322f4db5edea2
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Oct 27 09:04:36 2014 -0400

    3603: Remove duplicate stream tests from test_collections.

diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 2d35df8..2c64c40 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -348,71 +348,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [['b', 15, 1, 14],
                                                                        ['c', 5, 0, 1]])
 
-    def test_file_stream(self):
-        content = 'abcdefghijklmnopqrstuvwxyz0123456789'
-        msr = tutil.MockStreamReader('.', content)
-        segments = [[0, 10, 0],
-                    [10, 15, 10],
-                    [25, 5, 25]]
-
-        sfr = arvados.StreamFileReader(msr, segments, "test")
-
-        self.assertEqual(sfr.name(), "test")
-        self.assertEqual(sfr.size(), 30)
-
-        self.assertEqual(sfr.readfrom(0, 30), content[0:30])
-        self.assertEqual(sfr.readfrom(2, 30), content[2:30])
-
-        self.assertEqual(sfr.readfrom(2, 8), content[2:10])
-        self.assertEqual(sfr.readfrom(0, 10), content[0:10])
-
-        self.assertEqual(sfr.tell(), 0)
-        self.assertEqual(sfr.read(5), content[0:5])
-        self.assertEqual(sfr.tell(), 5)
-        self.assertEqual(sfr.read(5), content[5:10])
-        self.assertEqual(sfr.tell(), 10)
-        self.assertEqual(sfr.read(5), content[10:15])
-        self.assertEqual(sfr.tell(), 15)
-        self.assertEqual(sfr.read(5), content[15:20])
-        self.assertEqual(sfr.tell(), 20)
-        self.assertEqual(sfr.read(5), content[20:25])
-        self.assertEqual(sfr.tell(), 25)
-        self.assertEqual(sfr.read(5), content[25:30])
-        self.assertEqual(sfr.tell(), 30)
-        self.assertEqual(sfr.read(5), '')
-        self.assertEqual(sfr.tell(), 30)
-
-        segments = [[26, 10, 0],
-                    [0, 15, 10],
-                    [15, 5, 25]]
-
-        sfr = arvados.StreamFileReader(msr, segments, "test")
-
-        self.assertEqual(sfr.size(), 30)
-
-        self.assertEqual(sfr.readfrom(0, 30), content[26:36] + content[0:20])
-        self.assertEqual(sfr.readfrom(2, 30), content[28:36] + content[0:20])
-
-        self.assertEqual(sfr.readfrom(2, 8), content[28:36])
-        self.assertEqual(sfr.readfrom(0, 10), content[26:36])
-
-        self.assertEqual(sfr.tell(), 0)
-        self.assertEqual(sfr.read(5), content[26:31])
-        self.assertEqual(sfr.tell(), 5)
-        self.assertEqual(sfr.read(5), content[31:36])
-        self.assertEqual(sfr.tell(), 10)
-        self.assertEqual(sfr.read(5), content[0:5])
-        self.assertEqual(sfr.tell(), 15)
-        self.assertEqual(sfr.read(5), content[5:10])
-        self.assertEqual(sfr.tell(), 20)
-        self.assertEqual(sfr.read(5), content[10:15])
-        self.assertEqual(sfr.tell(), 25)
-        self.assertEqual(sfr.read(5), content[15:20])
-        self.assertEqual(sfr.tell(), 30)
-        self.assertEqual(sfr.read(5), '')
-        self.assertEqual(sfr.tell(), 30)
-
-
     class MockKeep(object):
         def __init__(self, content, num_retries=0):
             self.content = content
@@ -444,30 +379,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.assertEqual(sr.readfrom(25, 5), content[25:30])
         self.assertEqual(sr.readfrom(30, 5), '')
 
-    def test_file_reader(self):
-        keepblocks = {'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': 'abcdefghij',
-                      'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': 'klmnopqrstuvwxy',
-                      'cccccccccccccccccccccccccccccccc+5': 'z0123'}
-        mk = self.MockKeep(keepblocks)
-
-        sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:10:foo", "15:10:foo"], mk)
-
-        content = 'abcdefghijpqrstuvwxy'
-
-        f = sr.files()["foo"]
-
-        # f.read() calls will be aligned on block boundaries (as a
-        # result of ticket #3663).
-
-        f.seek(0)
-        self.assertEqual(f.read(20), content[0:10])
-
-        f.seek(0)
-        self.assertEqual(f.read(6), content[0:6])
-        self.assertEqual(f.read(6), content[6:10])
-        self.assertEqual(f.read(6), content[10:16])
-        self.assertEqual(f.read(6), content[16:20])
-
     def test_extract_file(self):
         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index cb7e352..e272845 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -17,6 +17,7 @@ class StreamFileReaderTestCase(unittest.TestCase):
                                 'count.txt')
 
     def test_read_returns_first_block(self):
+        # read() calls will be aligned on block boundaries - see #3663.
         sfile = self.make_count_reader()
         self.assertEqual('123', sfile.read(10))
 
@@ -73,6 +74,19 @@ class StreamFileReaderTestCase(unittest.TestCase):
         sfile.seek(2, os.SEEK_END)
         self.assertEqual(9, sfile.tell())
 
+    def test_size(self):
+        self.assertEqual(9, self.make_count_reader().size())
+
+    def test_tell_after_block_read(self):
+        sfile = self.make_count_reader()
+        sfile.read(5)
+        self.assertEqual(3, sfile.tell())
+
+    def test_tell_after_small_read(self):
+        sfile = self.make_count_reader()
+        sfile.read(1)
+        self.assertEqual(1, sfile.tell())
+
     def test_no_read_after_close(self):
         sfile = self.make_count_reader()
         sfile.close()

commit deeb84da688e2ca8c9371b448b0121b1310ece37
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Oct 27 09:01:32 2014 -0400

    3603: Style improvements.
    
    * Rename util.splitstream to stream.split, and make the signature more
      like os.path.split.
    * NameAttribute becomes _NameAttribute since it's internal.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 97fedb7..69822e1 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -7,7 +7,7 @@ from stat import *
 
 from .arvfile import ArvadosFileBase
 from keep import *
-from stream import *
+from .stream import StreamReader, split
 import config
 import errors
 import util
@@ -170,7 +170,8 @@ class CollectionReader(CollectionBase):
 
     def open(self, stream_name, file_name=None):
         self._populate()
-        stream_name, file_name = util.splitstream(stream_name, file_name)
+        if file_name is None:
+            stream_name, file_name = split(stream_name)
         keep_client = self._my_keep()
         for stream_s in self._streams:
             stream = StreamReader(stream_s, keep_client,
@@ -369,7 +370,8 @@ class CollectionWriter(CollectionBase):
             self.flush_data()
 
     def open(self, stream_name, file_name=None):
-        stream_name, file_name = util.splitstream(stream_name, file_name)
+        if file_name is None:
+            stream_name, file_name = split(stream_name)
         if self._last_open and not self._last_open.closed:
             raise errors.AssertionError(
                 "can't open '{}' when '{}' is still open".format(
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index e99f660..a2c0dac 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -90,9 +90,20 @@ def locators_and_ranges(data_locators, range_start, range_size, debug=False):
         i += 1
     return resp
 
+def split(path):
+    """split(path) -> streamname, filename
+
+    Separate the stream name and file name in a /-separated stream path.
+    If no stream name is available, assume '.'.
+    """
+    try:
+        stream_name, file_name = path.rsplit('/', 1)
+    except ValueError:  # No / in string
+        stream_name, file_name = '.', path
+    return stream_name, file_name
 
 class StreamFileReader(ArvadosFileBase):
-    class NameAttribute(str):
+    class _NameAttribute(str):
         # The Python file API provides a plain .name attribute.
         # Older SDK provided a name() method.
         # This class provides both, for maximum compatibility.
@@ -101,7 +112,7 @@ class StreamFileReader(ArvadosFileBase):
 
 
     def __init__(self, stream, segments, name):
-        super(StreamFileReader, self).__init__(self.NameAttribute(name), 'rb')
+        super(StreamFileReader, self).__init__(self._NameAttribute(name), 'rb')
         self._stream = stream
         self.segments = segments
         self._filepos = 0L
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 9950528..2609f11 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -348,19 +348,3 @@ def list_all(fn, num_retries=0, **kwargs):
         items_available = c['items_available']
         offset = c['offset'] + len(c['items'])
     return items
-
-def splitstream(s, filename=None):
-    """splitstream(s, filename=None) -> streamname, filename
-
-    Normalize a /-separated stream path.
-    If filename is None, extract it from the end of s.
-    If no stream name is available, assume '.'.
-    """
-    if filename is not None:
-        streamname = s or '.'
-    else:
-        try:
-            streamname, filename = s.rsplit('/', 1)
-        except ValueError:  # No / in string
-            streamname, filename = '.', s
-    return streamname, filename

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list