[ARVADOS] updated: 5109db362064053ed6711169d6c414b2cb4e22fb

git at public.curoverse.com git at public.curoverse.com
Tue Dec 30 14:50:28 EST 2014


Summary of changes:
 sdk/python/arvados/arvfile.py    |  93 +++++----
 sdk/python/arvados/collection.py | 118 +++++++-----
 sdk/python/tests/test_arvfile.py | 395 +++++++++++++++++++++++++++++----------
 3 files changed, 424 insertions(+), 182 deletions(-)

       via  5109db362064053ed6711169d6c414b2cb4e22fb (commit)
      from  02e9754a68a5816458d517b8f5012530cf17ebba (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 5109db362064053ed6711169d6c414b2cb4e22fb
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Dec 30 14:51:55 2014 -0500

    3198: Many tests.  Fixed lots of bugs.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 1342601..8369272 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -251,10 +251,9 @@ class BlockManager(object):
         self._bufferblocks = {}
         self._put_queue = None
         self._put_errors = None
-        self._threads = None
-        self._continue_worker = True
+        self._put_threads = None
         self._prefetch_queue = None
-        self._prefetch_thread = None
+        self._prefetch_threads = None
 
     def alloc_bufferblock(self, blockid=None, starting_size=2**14):
         if blockid is None:
@@ -263,25 +262,49 @@ class BlockManager(object):
         self._bufferblocks[bb.blockid] = bb
         return bb
 
+    def stop_threads(self):
+        if self._put_threads is not None:
+            for t in self._put_threads:
+                self._put_queue.put(None)
+            for t in self._put_threads:
+                t.join()
+        self._put_threads = None
+        self._put_queue = None
+        self._put_errors = None
+
+        if self._prefetch_threads is not None:
+            for t in self._prefetch_threads:
+                self._prefetch_queue.put(None)
+            for t in self._prefetch_threads:
+                t.join()
+        self._prefetch_threads = None
+        self._prefetch_queue = None
+
     def commit_bufferblock(self, block):
         def worker(self):
-            while self._continue_worker:
+            while True:
                 try:
                     b = self._put_queue.get()
-                    b._locator = self._keep.put(item)
+                    if b is None:
+                        return
+                    b._locator = self._keep.put(b.buffer_view[0:b.write_pointer].tobytes())
                     b.state = BufferBlock.COMMITTED
                     b.buffer_view = None
                     b.buffer_block = None
                 except Exception as e:
-                    self._error.put(e)
+                    print e
+                    self._put_errors.put(e)
                 finally:
-                    self._queue.task_done()
+                    if self._put_queue is not None:
+                        self._put_queue.task_done()
 
-        if self._threads is None:
+        if self._put_threads is None:
             self._put_queue = Queue.Queue()
             self._put_errors = Queue.Queue()
-            self._threads = [threading.Thread(target=worker, args=(self,)),
-                             threading.Thread(target=worker, args=(self,))]
+            self._put_threads = [threading.Thread(target=worker, args=(self,)),
+                                threading.Thread(target=worker, args=(self,))]
+            self._put_threads[0].start()
+            self._put_threads[1].start()
 
         block.state = BufferBlock.PENDING
         self._put_queue.put(block)
@@ -296,42 +319,46 @@ class BlockManager(object):
         return self._keep.get(locator, num_retries=num_retries)
 
     def commit_all(self):
-        for k,v in self._bufferblocks:
+        for k,v in self._bufferblocks.items():
             if v.state == BufferBlock.WRITABLE:
                 self.commit_bufferblock(v)
-        self._put_queue.join()
-        if not self._errors.empty():
-            e = []
-            try:
-                while True:
-                    e.append(self._errors.get(False))
-            except Queue.Empty:
-                pass
-            raise AsyncKeepWriteErrors(e)
+        if self._put_queue is not None:
+            self._put_queue.join()
+            if not self._put_errors.empty():
+                e = []
+                try:
+                    while True:
+                        e.append(self._put_errors.get(False))
+                except Queue.Empty:
+                    pass
+                raise AsyncKeepWriteErrors(e)
 
     def block_prefetch(self, locator):
-        def worker(keep, loc):
-            while self._continue_worker:
+        def worker(self):
+            while True:
                 try:
                     b = self._prefetch_queue.get()
-                    keep.get(loc)
+                    if b is None:
+                        return
+                    self._keep.get(b)
                 except:
                     pass
 
         if locator in self._bufferblocks:
             return
-        if self._prefetch_thread is None:
+        if self._prefetch_threads is None:
             self._prefetch_queue = Queue.Queue()
-            self._prefetch_thread = threading.Thread(target=worker, args=(self,))
+            self._prefetch_threads = [threading.Thread(target=worker, args=(self,))]
+            self._prefetch_threads[0].start()
         self._prefetch_queue.put(locator)
 
 class ArvadosFile(object):
-    def __init__(self, block_manager, stream=[], segments=[], keep=None):
+    def __init__(self, parent, stream=[], segments=[], keep=None):
         '''
         stream: a list of Range objects representing a block stream
         segments: a list of Range objects representing segments
         '''
-        self.bbm = block_manager
+        self.parent = parent
         self._modified = True
         self._segments = []
         for s in segments:
@@ -371,12 +398,12 @@ class ArvadosFile(object):
         data = []
 
         for lr in locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE):
-            self.bbm.block_prefetch(lr.locator)
+            self.parent._my_block_manager().block_prefetch(lr.locator)
 
         for lr in locators_and_ranges(self._segments, offset, size):
             # TODO: if data is empty, wait on block get, otherwise only
             # get more data if the block is already in the cache.
-            data.append(self.bbm.get_block(lr.locator, num_retries=num_retries)[lr.segment_offset:lr.segment_offset+lr.segment_size])
+            data.append(self.parent._my_block_manager().get_block(lr.locator, num_retries=num_retries)[lr.segment_offset:lr.segment_offset+lr.segment_size])
         return ''.join(data)
 
     def _repack_writes(self):
@@ -395,7 +422,7 @@ class ArvadosFile(object):
         if write_total < self._current_bblock.size():
             # There is more data in the buffer block than is actually accounted for by segments, so
             # re-pack into a new buffer by copying over to a new buffer block.
-            new_bb = self.bbm.alloc_bufferblock(self._current_bblock.blockid, starting_size=write_total)
+            new_bb = self.parent._my_block_manager().alloc_bufferblock(self._current_bblock.blockid, starting_size=write_total)
             for t in bufferblock_segs:
                 new_bb.append(self._current_bblock.buffer_view[t.segment_offset:t.segment_offset+t.range_size].tobytes())
                 t.segment_offset = new_bb.size() - t.range_size
@@ -415,13 +442,13 @@ class ArvadosFile(object):
         self._modified = True
 
         if self._current_bblock is None or self._current_bblock.state != BufferBlock.WRITABLE:
-            self._current_bblock = self.bbm.alloc_bufferblock()
+            self._current_bblock = self.parent._my_block_manager().alloc_bufferblock()
 
         if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
             self._repack_writes()
             if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
-                self.bbm.commit_bufferblock(self._current_bblock)
-                self._current_bblock = self.bbm.alloc_bufferblock()
+                self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
+                self._current_bblock = self.parent._my_block_manager().alloc_bufferblock()
 
         self._current_bblock.append(data)
         replace_range(self._segments, offset, len(data), self._current_bblock.blockid, self._current_bblock.write_pointer - len(data))
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 85c7ad7..6602ed1 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -2,6 +2,7 @@ import functools
 import logging
 import os
 import re
+import errno
 
 from collections import deque
 from stat import *
@@ -640,20 +641,20 @@ class ResumableCollectionWriter(CollectionWriter):
 
 
 class Collection(CollectionBase):
-    def __init__(self, manifest_locator_or_text=None, api_client=None,
+    def __init__(self, parent=None, manifest_locator_or_text=None, api_client=None,
                  keep_client=None, num_retries=0, block_manager=None):
 
+        self._parent = parent
         self._items = None
         self._api_client = api_client
         self._keep_client = keep_client
+        self._block_manager = block_manager
+
         self.num_retries = num_retries
         self._manifest_locator = None
         self._manifest_text = None
         self._api_response = None
 
-        if block_manager is None:
-            self.block_manager = BlockManager(keep_client)
-
         if manifest_locator_or_text:
             if re.match(util.keep_locator_pattern, manifest_locator_or_text):
                 self._manifest_locator = manifest_locator_or_text
@@ -665,6 +666,29 @@ class Collection(CollectionBase):
                 raise errors.ArgumentError(
                     "Argument to CollectionReader must be a manifest or a collection UUID")
 
+    def _my_api(self):
+        if self._api_client is None:
+            if self._parent is not None:
+                return self._parent._my_api()
+            self._api_client = arvados.api('v1')
+            self._keep_client = None  # Make a new one with the new api.
+        return self._api_client
+
+    def _my_keep(self):
+        if self._keep_client is None:
+            if self._parent is not None:
+                return self._parent._my_keep()
+            self._keep_client = KeepClient(api_client=self._my_api(),
+                                           num_retries=self.num_retries)
+        return self._keep_client
+
+    def _my_block_manager(self):
+        if self._block_manager is None:
+            if self._parent is not None:
+                return self._parent._my_block_manager()
+            self._block_manager = BlockManager(self._my_keep())
+        return self._block_manager
+
     def _populate_from_api_server(self):
         # As in KeepClient itself, we must wait until the last
         # possible moment to instantiate an API client, in order to
@@ -674,10 +698,7 @@ class Collection(CollectionBase):
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
         try:
-            if self._api_client is None:
-                self._api_client = arvados.api('v1')
-                self._keep_client = None  # Make a new one with the new api.
-            self._api_response = self._api_client.collections().get(
+            self._api_response = self._my_api().collections().get(
                 uuid=self._manifest_locator).execute(
                     num_retries=self.num_retries)
             self._manifest_text = self._api_response['manifest_text']
@@ -742,7 +763,9 @@ class Collection(CollectionBase):
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
-        self.save()
+        self.save(no_locator=True)
+        if self._block_manager is not None:
+            self._block_manager.stop_threads()
 
     @_populate_first
     def find(self, path, create=False):
@@ -756,13 +779,13 @@ class Collection(CollectionBase):
                 # item must be a file
                 if item is None and create:
                     # create new file
-                    item = ArvadosFile(self.block_manager, keep=self._keep_client)
+                    item = ArvadosFile(self, keep=self._keep_client)
                     self._items[p[0]] = item
                 return item
             else:
                 if item is None and create:
                     # create new collection
-                    item = Collection(api_client=self._api_client, keep=self._keep_client, num_retries=self.num_retries, block_manager=self.block_manager)
+                    item = Collection(parent=self, num_retries=self.num_retries)
                     self._items[p[0]] = item
                 del p[0]
                 return item.find("/".join(p), create=create)
@@ -787,9 +810,9 @@ class Collection(CollectionBase):
 
         f = self.find(path, create=create)
         if f is None:
-            raise ArgumentError("File not found")
+            raise IOError((errno.ENOENT, "File not found"))
         if not isinstance(f, ArvadosFile):
-            raise ArgumentError("Path must refer to a file.")
+            raise IOError((errno.EISDIR, "Path must refer to a file."))
 
         if mode[0] == "w":
             f.truncate(0)
@@ -821,15 +844,11 @@ class Collection(CollectionBase):
 
     @_populate_first
     def __getitem__(self, k):
-        r = self.find(k)
-        if r:
-            return r
-        else:
-            raise KeyError(k)
+        return self._items[k]
 
     @_populate_first
     def __contains__(self, k):
-        return self.find(k) is not None
+        return k in self._items
 
     @_populate_first
     def __len__(self):
@@ -837,21 +856,7 @@ class Collection(CollectionBase):
 
     @_populate_first
     def __delitem__(self, p):
-        p = path.split("/")
-        if p[0] == '.':
-            del p[0]
-
-        if len(p) > 0:
-            item = self._items.get(p[0])
-            if item is None:
-                raise NotFoundError()
-            if len(p) == 1:
-                del self._items[p[0]]
-            else:
-                del p[0]
-                del item["/".join(p)]
-        else:
-            raise NotFoundError()
+        del self._items[p]
 
     @_populate_first
     def keys(self):
@@ -866,6 +871,28 @@ class Collection(CollectionBase):
         return self._items.items()
 
     @_populate_first
+    def exists(self, path):
+        return self.find(path) != None
+
+    @_populate_first
+    def remove(self, path):
+        p = path.split("/")
+        if p[0] == '.':
+            del p[0]
+
+        if len(p) > 0:
+            item = self._items.get(p[0])
+            if item is None:
+                raise IOError((errno.ENOENT, "File not found"))
+            if len(p) == 1:
+                del self._items[p[0]]
+            else:
+                del p[0]
+                item.remove("/".join(p))
+        else:
+            raise IOError((errno.ENOENT, "File not found"))
+
+    @_populate_first
     def manifest_text(self, strip=False, normalize=False):
         if self.modified() or self._manifest_text is None or normalize:
             return export_manifest(self, stream_name=".", portable_locators=strip)
@@ -880,31 +907,29 @@ class Collection(CollectionBase):
         return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
     @_populate_first
-    def commit_bufferblocks(self):
-        pass
-
-    @_populate_first
-    def save(self):
+    def save(self, no_locator=False):
         if self.modified():
+            self._my_block_manager().commit_all()
             self._my_keep().put(self.manifest_text(strip=True))
-            if re.match(util.collection_uuid_pattern, self._manifest_locator):
-                self._api_response = self._api_client.collections().update(
+            if self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator):
+                self._api_response = self._my_api().collections().update(
                     uuid=self._manifest_locator,
                     body={'manifest_text': self.manifest_text(strip=False)}
                     ).execute(
                         num_retries=self.num_retries)
-            else:
+            elif not no_locator:
                 raise AssertionError("Collection manifest_locator must be a collection uuid.  Use save_as() for new collections.")
             self.set_unmodified()
 
     @_populate_first
     def save_as(self, name, owner_uuid=None):
+        self._my_block_manager().commit_all()
         self._my_keep().put(self.manifest_text(strip=True))
         body = {"manifest_text": self.manifest_text(strip=False),
                 "name": name}
         if owner_uuid:
             body["owner_uuid"] = owner_uuid
-        self._api_response = self._api_client.collections().create(body=body).execute(num_retries=self.num_retries)
+        self._api_response = self._my_api().collections().create(body=body).execute(num_retries=self.num_retries)
         self._manifest_locator = self._api_response["uuid"]
         self.set_unmodified()
 
@@ -976,12 +1001,13 @@ def export_manifest(item, stream_name=".", portable_locators=False):
             for s in v._segments:
                 loc = s.locator
                 if loc.startswith("bufferblock"):
-                    loc = v.bbm._bufferblocks[loc].locator()
+                    loc = v.parent._my_block_manager()._bufferblocks[loc].locator()
                 st.append(LocatorAndRange(loc, locator_block_size(loc),
                                      s.segment_offset, s.range_size))
             stream[k] = st
-        buf += ' '.join(normalize_stream(stream_name, stream))
-        buf += "\n"
+        if stream:
+            buf += ' '.join(normalize_stream(stream_name, stream))
+            buf += "\n"
         for k in [s for s in sorted_keys if isinstance(item[s], Collection)]:
             buf += export_manifest(item[k], stream_name=os.path.join(stream_name, k))
     elif isinstance(item, ArvadosFile):
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 9edb2c5..7ccdf32 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -18,133 +18,322 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
     class MockKeep(object):
         def __init__(self, blocks):
             self.blocks = blocks
+            self.requests = []
         def get(self, locator, num_retries=0):
-            return self.blocks[locator]
+            self.requests.append(locator)
+            return self.blocks.get(locator)
         def put(self, data):
             pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
             self.blocks[pdh] = str(data)
             return pdh
 
+    class MockApi(object):
+        def __init__(self, b, r):
+            self.b = b
+            self.r = r
+        class MockCollections(object):
+            def __init__(self, b, r):
+                self.b = b
+                self.r = r
+            class Execute(object):
+                def __init__(self, r):
+                    self.r = r
+                def execute(self, num_retries=None):
+                    return self.r
+            def create(self, body=None):
+                if body != self.b:
+                    raise Exception("Body %s does not match expectation %s" % (body, self.b))
+                return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.r)
+            def update(self, uuid=None, body=None):
+                return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.r)
+        def collections(self):
+            return ArvadosFileWriterTestCase.MockApi.MockCollections(self.b, self.r)
+
+
     def test_truncate(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual(writer.size(), 10)
-        writer.seek(5)
-        self.assertEqual("56789", writer.read(8))
-        writer.truncate(8)
-        writer.seek(5, os.SEEK_SET)
-        self.assertEqual("567", writer.read(8))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate",
+                                                 "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             api_client=api, keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 10)
+            writer.seek(5)
+            self.assertEqual("56789", writer.read(8))
+            writer.truncate(8)
+            writer.seek(5, os.SEEK_SET)
+            self.assertEqual("567", writer.read(8))
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            c.save_as("test_truncate")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
 
     def test_append(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        writer.seek(5, os.SEEK_SET)
-        self.assertEqual("56789", writer.read(8))
-        writer.seek(10, os.SEEK_SET)
-        writer.write("foo")
-        self.assertEqual(writer.size(), 13)
-        writer.seek(5, os.SEEK_SET)
-        self.assertEqual("56789foo", writer.read(8))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
+                                                 "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             api_client=api, keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            writer.seek(5, os.SEEK_SET)
+            self.assertEqual("56789", writer.read(8))
+            writer.seek(10, os.SEEK_SET)
+            writer.write("foo")
+            self.assertEqual(writer.size(), 13)
+            writer.seek(5, os.SEEK_SET)
+            self.assertEqual("56789foo", writer.read(8))
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            self.assertEqual(None, keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
+            c.save_as("test_append")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
+            self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
 
     def test_write0(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual("0123456789", writer.readfrom(0, 13))
-        writer.seek(0, os.SEEK_SET)
-        writer.write("foo")
-        self.assertEqual(writer.size(), 10)
-        self.assertEqual("foo3456789", writer.readfrom(0, 13))
-        self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual("0123456789", writer.readfrom(0, 13))
+            writer.seek(0, os.SEEK_SET)
+            writer.write("foo")
+            self.assertEqual(writer.size(), 10)
+            self.assertEqual("foo3456789", writer.readfrom(0, 13))
+            self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", export_manifest(c))
 
     def test_write1(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual("0123456789", writer.readfrom(0, 13))
-        writer.seek(3, os.SEEK_SET)
-        writer.write("foo")
-        self.assertEqual(writer.size(), 10)
-        self.assertEqual("012foo6789", writer.readfrom(0, 13))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual("0123456789", writer.readfrom(0, 13))
+            writer.seek(3, os.SEEK_SET)
+            writer.write("foo")
+            self.assertEqual(writer.size(), 10)
+            self.assertEqual("012foo6789", writer.readfrom(0, 13))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", export_manifest(c))
 
     def test_write2(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual("0123456789", writer.readfrom(0, 13))
-        writer.seek(7, os.SEEK_SET)
-        writer.write("foo")
-        self.assertEqual(writer.size(), 10)
-        self.assertEqual("0123456foo", writer.readfrom(0, 13))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual("0123456789", writer.readfrom(0, 13))
+            writer.seek(7, os.SEEK_SET)
+            writer.write("foo")
+            self.assertEqual(writer.size(), 10)
+            self.assertEqual("0123456foo", writer.readfrom(0, 13))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", export_manifest(c))
 
     def test_write3(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual("012345678901234", writer.readfrom(0, 15))
-        writer.seek(7, os.SEEK_SET)
-        writer.write("foobar")
-        self.assertEqual(writer.size(), 20)
-        self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual("012345678901234", writer.readfrom(0, 15))
+            writer.seek(7, os.SEEK_SET)
+            writer.write("foobar")
+            self.assertEqual(writer.size(), 20)
+            self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", export_manifest(c))
 
     def test_write4(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        self.assertEqual("012301230123", writer.readfrom(0, 15))
-        writer.seek(2, os.SEEK_SET)
-        writer.write("abcdefg")
-        self.assertEqual(writer.size(), 12)
-        self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual("012301230123", writer.readfrom(0, 15))
+            writer.seek(2, os.SEEK_SET)
+            writer.write("abcdefg")
+            self.assertEqual(writer.size(), 12)
+            self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", export_manifest(c))
 
     def test_write_large(self):
-        c = import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
-                              keep=ArvadosFileWriterTestCase.MockKeep({}))
-        writer = c.open("count.txt", "r+")
-        text = ''.join(["0123456789" for a in xrange(0, 100)])
-        for b in xrange(0, 100000):
-            writer.write(text)
-        self.assertEqual(writer.size(), 100000000)
-        self.assertEqual(". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
+                                                 "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            text = ''.join(["0123456789" for a in xrange(0, 100)])
+            for b in xrange(0, 100000):
+                writer.write(text)
+            self.assertEqual(writer.size(), 100000000)
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            c.save_as("test_write_large")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
 
     def test_write_rewrite0(self):
-        c = import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
-                              keep=ArvadosFileWriterTestCase.MockKeep({}))
-        writer = c.open("count.txt", "r+")
-        for b in xrange(0, 10):
-            writer.seek(0, os.SEEK_SET)
-            writer.write("0123456789")
-        writer.arvadosfile._repack_writes()
-        self.assertEqual(writer.size(), 10)
-        self.assertEqual("0123456789", writer.readfrom(0, 20))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            for b in xrange(0, 10):
+                writer.seek(0, os.SEEK_SET)
+                writer.write("0123456789")
+            writer.arvadosfile._repack_writes()
+            self.assertEqual(writer.size(), 10)
+            self.assertEqual("0123456789", writer.readfrom(0, 20))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", export_manifest(c))
 
     def test_write_rewrite1(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        for b in xrange(0, 10):
-            writer.seek(10, os.SEEK_SET)
-            writer.write("abcdefghij")
-        writer.arvadosfile._repack_writes()
-        self.assertEqual(writer.size(), 20)
-        self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            for b in xrange(0, 10):
+                writer.seek(10, os.SEEK_SET)
+                writer.write("abcdefghij")
+            writer.arvadosfile._repack_writes()
+            self.assertEqual(writer.size(), 20)
+            self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", export_manifest(c))
 
     def test_write_rewrite2(self):
-        c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
-                              keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
-        writer = c.open("count.txt", "r+")
-        for b in xrange(0, 10):
-            writer.seek(5, os.SEEK_SET)
-            writer.write("abcdefghij")
-        writer.arvadosfile._repack_writes()
-        self.assertEqual(writer.size(), 15)
-        self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", export_manifest(c))
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
+                             keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            for b in xrange(0, 10):
+                writer.seek(5, os.SEEK_SET)
+                writer.write("abcdefghij")
+            writer.arvadosfile._repack_writes()
+            self.assertEqual(writer.size(), 15)
+            self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", export_manifest(c))
+
+    def test_write_large_rewrite0(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
+                                                 "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep=keep) as c:
+            writer = c.open("count.txt", "r+")
+            text = ''.join(["0123456789" for a in xrange(0, 100)])
+            for b in xrange(0, 100000):
+                writer.write(text)
+            writer.seek(0, os.SEEK_SET)
+            writer.write("foo")
+            self.assertEqual(writer.size(), 100000000)
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            c.save_as("test_write_large")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
+
+    def test_create(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with arvados.Collection(api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "w+")
+            self.assertEqual(writer.size(), 0)
+            writer.write("01234567")
+            self.assertEqual(writer.size(), 8)
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            c.save_as("test_create")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
+            self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+
+
+    def test_create_subdir(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
+                                                 "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with arvados.Collection(api_client=api, keep_client=keep) as c:
+            writer = c.open("foo/bar/count.txt", "w+")
+            writer.write("01234567")
+            c.save_as("test_create")
+
+    def test_overwrite(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             api_client=api, keep=keep) as c:
+            writer = c.open("count.txt", "w+")
+            self.assertEqual(writer.size(), 0)
+            writer.write("01234567")
+            self.assertEqual(writer.size(), 8)
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            c.save_as("test_overwrite")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
+
+    def test_file_not_found(self):
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
+            with self.assertRaises(IOError):
+                writer = c.open("nocount.txt", "r")
+
+    def test_cannot_open_directory(self):
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
+            with self.assertRaises(IOError):
+                writer = c.open(".", "r")
+
+    def test_create_multiple(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n"},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0"})
+        with arvados.Collection(api_client=api, keep_client=keep) as c:
+            w1 = c.open("count1.txt", "w")
+            w2 = c.open("count2.txt", "w")
+            w1.write("01234567")
+            w2.write("abcdefgh")
+            self.assertEqual(w1.size(), 8)
+            self.assertEqual(w2.size(), 8)
+
+            self.assertEqual(None, c._manifest_locator)
+            self.assertEqual(True, c.modified())
+            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            c.save_as("test_create_multiple")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+            self.assertEqual(False, c.modified())
+            self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+
+    def test_remove(self):
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n') as c:
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", export_manifest(c))
+            self.assertTrue("count1.txt" in c)
+            c.remove("count1.txt")
+            self.assertFalse("count1.txt" in c)
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", export_manifest(c))
+
+    def test_remove_in_subdir(self):
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') as c:
+            c.remove("foo/count2.txt")
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", export_manifest(c))
+
+    def test_remove_subdir(self):
+        with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') as c:
+            c.remove("foo")
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", export_manifest(c))
+
+    def test_prefetch(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
+        with import_manifest(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep=keep) as c:
+            r = c.open("count.txt", "r")
+            self.assertEqual("0123", r.read(4))
+        self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", "2e9ec317e197819358fbc43afca7d837+8", "e8dc4081b13434b45189a720b77b6818+8"], keep.requests)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list