[ARVADOS] updated: 3070d89e1e52cbc42a04560f5db2630c0c4cbd6b

git at public.curoverse.com git at public.curoverse.com
Mon Feb 23 09:42:19 EST 2015


Summary of changes:
 sdk/python/arvados/_ranges.py        |  8 ++--
 sdk/python/arvados/collection.py     | 81 ++++++++++++++++++++++++++----------
 sdk/python/tests/test_arvfile.py     | 13 ++++--
 sdk/python/tests/test_collections.py | 11 +++++
 4 files changed, 82 insertions(+), 31 deletions(-)

       via  3070d89e1e52cbc42a04560f5db2630c0c4cbd6b (commit)
      from  93d38f3cb3d7874ed3723409357d8dd5e8d618a5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 3070d89e1e52cbc42a04560f5db2630c0c4cbd6b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Feb 23 09:44:27 2015 -0500

    4823: Handle edge cases of files named '.' so that the FUSE test passes.  Added
    tests for invalid manifests.  Defer populating CollectionReader streams until
    needed to avoid extra copy of the manifest.

diff --git a/sdk/python/arvados/_ranges.py b/sdk/python/arvados/_ranges.py
index 1862d94..15f0173 100644
--- a/sdk/python/arvados/_ranges.py
+++ b/sdk/python/arvados/_ranges.py
@@ -82,8 +82,8 @@ def locators_and_ranges(data_locators, range_start, range_size):
     if range_size == 0:
         return []
     resp = []
-    range_start = long(range_start)
-    range_size = long(range_size)
+    range_start = range_start
+    range_size = range_size
     range_end = range_start + range_size
 
     i = first_block(data_locators, range_start, range_size)
@@ -149,8 +149,8 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n
     if new_range_size == 0:
         return
 
-    new_range_start = long(new_range_start)
-    new_range_size = long(new_range_size)
+    new_range_start = new_range_start
+    new_range_size = new_range_size
     new_range_end = new_range_start + new_range_size
 
     if len(data_locators) == 0:
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 08b9deb..6d5dd4f 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -526,8 +526,6 @@ class SynchronizedCollectionBase(CollectionBase):
         """
 
         pathcomponents = path.split("/")
-        if pathcomponents[0] == '.':
-            del pathcomponents[0]
 
         if pathcomponents and pathcomponents[0]:
             item = self._items.get(pathcomponents[0])
@@ -567,8 +565,6 @@ class SynchronizedCollectionBase(CollectionBase):
 
         """
         pathcomponents = path.split("/")
-        if pathcomponents[0] == '.':
-            del pathcomponents[0]
 
         if pathcomponents and pathcomponents[0]:
             item = self._items.get(pathcomponents[0])
@@ -721,9 +717,6 @@ class SynchronizedCollectionBase(CollectionBase):
           Specify whether to remove non-empty subcollections (True), or raise an error (False).
         """
         pathcomponents = path.split("/")
-        if pathcomponents[0] == '.':
-            # Remove '.' from the front of the path
-            del pathcomponents[0]
 
         if len(pathcomponents) > 0:
             item = self._items.get(pathcomponents[0])
@@ -994,11 +987,11 @@ class Collection(SynchronizedCollectionBase):
     """
 
     def __init__(self, manifest_locator_or_text=None,
-                 parent=None,
-                 apiconfig=None,
                  api_client=None,
                  keep_client=None,
                  num_retries=None,
+                 parent=None,
+                 apiconfig=None,
                  block_manager=None):
         """Collection constructor.
 
@@ -1200,6 +1193,27 @@ class Collection(SynchronizedCollectionBase):
         """
         return self._api_response
 
+    def find_or_create(self, path, create_type):
+        """See `SynchronizedCollectionBase.find_or_create`"""
+        if path == ".":
+            return self
+        else:
+            return super(Collection, self).find_or_create(path[2:] if path.startswith("./") else path, create_type)
+
+    def find(self, path):
+        """See `SynchronizedCollectionBase.find`"""
+        if path == ".":
+            return self
+        else:
+            return super(Collection, self).find(path[2:] if path.startswith("./") else path)
+
+    def remove(self, path, recursive=False):
+        """See `SynchronizedCollectionBase.remove`"""
+        if path == ".":
+            raise errors.ArgumentError("Cannot remove '.'")
+        else:
+            return super(Collection, self).remove(path[2:] if path.startswith("./") else path, recursive)
+
     @must_be_writable
     @synchronized
     @retry_method
@@ -1306,9 +1320,8 @@ class Collection(SynchronizedCollectionBase):
         if len(self) > 0:
             raise ArgumentError("Can only import manifest into an empty collection")
 
-        into_collection = self
-        save_sync = into_collection.sync_mode()
-        into_collection._sync = None
+        save_sync = self.sync_mode()
+        self._sync = None
 
         STREAM_NAME = 0
         BLOCKS = 1
@@ -1345,8 +1358,12 @@ class Collection(SynchronizedCollectionBase):
                     pos = long(s.group(1))
                     size = long(s.group(2))
                     name = s.group(3).replace('\\040', ' ')
-                    f = into_collection.find_or_create("%s/%s" % (stream_name, name), FILE)
-                    f.add_segment(blocks, pos, size)
+                    filepath = os.path.join(stream_name, name)
+                    f = self.find_or_create(filepath, FILE)
+                    if isinstance(f, ArvadosFile):
+                        f.add_segment(blocks, pos, size)
+                    else:
+                        raise errors.SyntaxError("File %s conflicts with stream of the same name.", filepath)
                 else:
                     # error!
                     raise errors.SyntaxError("Invalid manifest format")
@@ -1355,8 +1372,8 @@ class Collection(SynchronizedCollectionBase):
                 stream_name = None
                 state = STREAM_NAME
 
-        into_collection.set_unmodified()
-        into_collection._sync = save_sync
+        self.set_unmodified()
+        self._sync = save_sync
 
 
 class Subcollection(SynchronizedCollectionBase):
@@ -1422,14 +1439,31 @@ class CollectionReader(Collection):
 
         # Backwards compatability with old CollectionReader
         # all_streams() and all_files()
-        if self._manifest_text:
-            self._streams = [sline.split()
-                             for sline in self._manifest_text.split("\n")
-                             if sline]
-        else:
-            self._streams = []
+        self._streams = None
+
+    def _populate_streams(orig_func):
+        @functools.wraps(orig_func)
+        def populate_streams_wrapper(self, *args, **kwargs):
+            # Defer populating self._streams until needed since it creates a copy of the manifest.
+            if self._streams is None:
+                if self._manifest_text:
+                    self._streams = [sline.split()
+                                     for sline in self._manifest_text.split("\n")
+                                     if sline]
+                else:
+                    self._streams = []
+            return orig_func(self, *args, **kwargs)
+        return populate_streams_wrapper
 
+    @_populate_streams
     def normalize(self):
+        """Normalize the streams returned by `all_streams`.
+
+        This method is kept for backwards compatability and only affects the
+        behavior of `all_streams()` and `all_files()`
+
+        """
+
         # Rearrange streams
         streams = {}
         for s in self.all_streams():
@@ -1444,11 +1478,12 @@ class CollectionReader(Collection):
 
         self._streams = [normalize_stream(s, streams[s])
                          for s in sorted(streams)]
-
+    @_populate_streams
     def all_streams(self):
         return [StreamReader(s, self._my_keep(), num_retries=self.num_retries)
                 for s in self._streams]
 
+    @_populate_streams
     def all_files(self):
         for s in self.all_streams():
             for f in s.all_files():
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index b6e8426..9c67fbd 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -66,11 +66,16 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
             self.assertEqual(writer.size(), 10)
-            writer.seek(5)
-            self.assertEqual("56789", writer.read(8))
+            self.assertEqual("0123456789", writer.read(12))
+
             writer.truncate(8)
-            writer.seek(5, os.SEEK_SET)
-            self.assertEqual("567", writer.read(8))
+
+            # Make sure reading off the end doesn't break
+            self.assertEqual("", writer.read(12))
+
+            self.assertEqual(writer.size(), 8)
+            writer.seek(0, os.SEEK_SET)
+            self.assertEqual("01234567", writer.read(12))
 
             self.assertEqual(None, c._manifest_locator)
             self.assertEqual(True, c.modified())
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index aebfbd0..bc94f0d 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -817,6 +817,17 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
 
+    def test_init_manifest_with_collision(self):
+        m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
+./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
+"""
+        with self.assertRaises(IOError):
+            self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
+
+    def test_init_manifest_with_error(self):
+        m1 = """. 0:43:md5sum.txt"""
+        with self.assertRaises(arvados.errors.ArgumentError):
+            self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
 
     def test_remove(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list