[ARVADOS] updated: 5109db362064053ed6711169d6c414b2cb4e22fb
git at public.curoverse.com
git at public.curoverse.com
Tue Dec 30 14:50:28 EST 2014
Summary of changes:
sdk/python/arvados/arvfile.py | 93 +++++----
sdk/python/arvados/collection.py | 118 +++++++-----
sdk/python/tests/test_arvfile.py | 395 +++++++++++++++++++++++++++++----------
3 files changed, 424 insertions(+), 182 deletions(-)
via 5109db362064053ed6711169d6c414b2cb4e22fb (commit)
from 02e9754a68a5816458d517b8f5012530cf17ebba (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 5109db362064053ed6711169d6c414b2cb4e22fb
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Tue Dec 30 14:51:55 2014 -0500
3198: Many tests. Fixed lots of bugs.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 1342601..8369272 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -251,10 +251,9 @@ class BlockManager(object):
self._bufferblocks = {}
self._put_queue = None
self._put_errors = None
- self._threads = None
- self._continue_worker = True
+ self._put_threads = None
self._prefetch_queue = None
- self._prefetch_thread = None
+ self._prefetch_threads = None
def alloc_bufferblock(self, blockid=None, starting_size=2**14):
if blockid is None:
@@ -263,25 +262,49 @@ class BlockManager(object):
self._bufferblocks[bb.blockid] = bb
return bb
+ def stop_threads(self):
+ if self._put_threads is not None:
+ for t in self._put_threads:
+ self._put_queue.put(None)
+ for t in self._put_threads:
+ t.join()
+ self._put_threads = None
+ self._put_queue = None
+ self._put_errors = None
+
+ if self._prefetch_threads is not None:
+ for t in self._prefetch_threads:
+ self._prefetch_queue.put(None)
+ for t in self._prefetch_threads:
+ t.join()
+ self._prefetch_threads = None
+ self._prefetch_queue = None
+
def commit_bufferblock(self, block):
def worker(self):
- while self._continue_worker:
+ while True:
try:
b = self._put_queue.get()
- b._locator = self._keep.put(item)
+ if b is None:
+ return
+ b._locator = self._keep.put(b.buffer_view[0:b.write_pointer].tobytes())
b.state = BufferBlock.COMMITTED
b.buffer_view = None
b.buffer_block = None
except Exception as e:
- self._error.put(e)
+ print e
+ self._put_errors.put(e)
finally:
- self._queue.task_done()
+ if self._put_queue is not None:
+ self._put_queue.task_done()
- if self._threads is None:
+ if self._put_threads is None:
self._put_queue = Queue.Queue()
self._put_errors = Queue.Queue()
- self._threads = [threading.Thread(target=worker, args=(self,)),
- threading.Thread(target=worker, args=(self,))]
+ self._put_threads = [threading.Thread(target=worker, args=(self,)),
+ threading.Thread(target=worker, args=(self,))]
+ self._put_threads[0].start()
+ self._put_threads[1].start()
block.state = BufferBlock.PENDING
self._put_queue.put(block)
@@ -296,42 +319,46 @@ class BlockManager(object):
return self._keep.get(locator, num_retries=num_retries)
def commit_all(self):
- for k,v in self._bufferblocks:
+ for k,v in self._bufferblocks.items():
if v.state == BufferBlock.WRITABLE:
self.commit_bufferblock(v)
- self._put_queue.join()
- if not self._errors.empty():
- e = []
- try:
- while True:
- e.append(self._errors.get(False))
- except Queue.Empty:
- pass
- raise AsyncKeepWriteErrors(e)
+ if self._put_queue is not None:
+ self._put_queue.join()
+ if not self._put_errors.empty():
+ e = []
+ try:
+ while True:
+ e.append(self._put_errors.get(False))
+ except Queue.Empty:
+ pass
+ raise AsyncKeepWriteErrors(e)
def block_prefetch(self, locator):
- def worker(keep, loc):
- while self._continue_worker:
+ def worker(self):
+ while True:
try:
b = self._prefetch_queue.get()
- keep.get(loc)
+ if b is None:
+ return
+ self._keep.get(b)
except:
pass
if locator in self._bufferblocks:
return
- if self._prefetch_thread is None:
+ if self._prefetch_threads is None:
self._prefetch_queue = Queue.Queue()
- self._prefetch_thread = threading.Thread(target=worker, args=(self,))
+ self._prefetch_threads = [threading.Thread(target=worker, args=(self,))]
+ self._prefetch_threads[0].start()
self._prefetch_queue.put(locator)
class ArvadosFile(object):
- def __init__(self, block_manager, stream=[], segments=[], keep=None):
+ def __init__(self, parent, stream=[], segments=[], keep=None):
'''
stream: a list of Range objects representing a block stream
segments: a list of Range objects representing segments
'''
- self.bbm = block_manager
+ self.parent = parent
self._modified = True
self._segments = []
for s in segments:
@@ -371,12 +398,12 @@ class ArvadosFile(object):
data = []
for lr in locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE):
- self.bbm.block_prefetch(lr.locator)
+ self.parent._my_block_manager().block_prefetch(lr.locator)
for lr in locators_and_ranges(self._segments, offset, size):
# TODO: if data is empty, wait on block get, otherwise only
# get more data if the block is already in the cache.
- data.append(self.bbm.get_block(lr.locator, num_retries=num_retries)[lr.segment_offset:lr.segment_offset+lr.segment_size])
+ data.append(self.parent._my_block_manager().get_block(lr.locator, num_retries=num_retries)[lr.segment_offset:lr.segment_offset+lr.segment_size])
return ''.join(data)
def _repack_writes(self):
@@ -395,7 +422,7 @@ class ArvadosFile(object):
if write_total < self._current_bblock.size():
# There is more data in the buffer block than is actually accounted for by segments, so
# re-pack into a new buffer by copying over to a new buffer block.
- new_bb = self.bbm.alloc_bufferblock(self._current_bblock.blockid, starting_size=write_total)
+ new_bb = self.parent._my_block_manager().alloc_bufferblock(self._current_bblock.blockid, starting_size=write_total)
for t in bufferblock_segs:
new_bb.append(self._current_bblock.buffer_view[t.segment_offset:t.segment_offset+t.range_size].tobytes())
t.segment_offset = new_bb.size() - t.range_size
@@ -415,13 +442,13 @@ class ArvadosFile(object):
self._modified = True
if self._current_bblock is None or self._current_bblock.state != BufferBlock.WRITABLE:
- self._current_bblock = self.bbm.alloc_bufferblock()
+ self._current_bblock = self.parent._my_block_manager().alloc_bufferblock()
if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
self._repack_writes()
if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
- self.bbm.commit_bufferblock(self._current_bblock)
- self._current_bblock = self.bbm.alloc_bufferblock()
+ self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
+ self._current_bblock = self.parent._my_block_manager().alloc_bufferblock()
self._current_bblock.append(data)
replace_range(self._segments, offset, len(data), self._current_bblock.blockid, self._current_bblock.write_pointer - len(data))
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 85c7ad7..6602ed1 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -2,6 +2,7 @@ import functools
import logging
import os
import re
+import errno
from collections import deque
from stat import *
@@ -640,20 +641,20 @@ class ResumableCollectionWriter(CollectionWriter):
class Collection(CollectionBase):
- def __init__(self, manifest_locator_or_text=None, api_client=None,
+ def __init__(self, parent=None, manifest_locator_or_text=None, api_client=None,
keep_client=None, num_retries=0, block_manager=None):
+ self._parent = parent
self._items = None
self._api_client = api_client
self._keep_client = keep_client
+ self._block_manager = block_manager
+
self.num_retries = num_retries
self._manifest_locator = None
self._manifest_text = None
self._api_response = None
- if block_manager is None:
- self.block_manager = BlockManager(keep_client)
-
if manifest_locator_or_text:
if re.match(util.keep_locator_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
@@ -665,6 +666,29 @@ class Collection(CollectionBase):
raise errors.ArgumentError(
"Argument to CollectionReader must be a manifest or a collection UUID")
+ def _my_api(self):
+ if self._api_client is None:
+ if self._parent is not None:
+ return self._parent._my_api()
+ self._api_client = arvados.api('v1')
+ self._keep_client = None # Make a new one with the new api.
+ return self._api_client
+
+ def _my_keep(self):
+ if self._keep_client is None:
+ if self._parent is not None:
+ return self._parent._my_keep()
+ self._keep_client = KeepClient(api_client=self._my_api(),
+ num_retries=self.num_retries)
+ return self._keep_client
+
+ def _my_block_manager(self):
+ if self._block_manager is None:
+ if self._parent is not None:
+ return self._parent._my_block_manager()
+ self._block_manager = BlockManager(self._my_keep())
+ return self._block_manager
+
def _populate_from_api_server(self):
# As in KeepClient itself, we must wait until the last
# possible moment to instantiate an API client, in order to
@@ -674,10 +698,7 @@ class Collection(CollectionBase):
# clause, just like any other Collection lookup
# failure. Return an exception, or None if successful.
try:
- if self._api_client is None:
- self._api_client = arvados.api('v1')
- self._keep_client = None # Make a new one with the new api.
- self._api_response = self._api_client.collections().get(
+ self._api_response = self._my_api().collections().get(
uuid=self._manifest_locator).execute(
num_retries=self.num_retries)
self._manifest_text = self._api_response['manifest_text']
@@ -742,7 +763,9 @@ class Collection(CollectionBase):
return self
def __exit__(self, exc_type, exc_value, traceback):
- self.save()
+ self.save(no_locator=True)
+ if self._block_manager is not None:
+ self._block_manager.stop_threads()
@_populate_first
def find(self, path, create=False):
@@ -756,13 +779,13 @@ class Collection(CollectionBase):
# item must be a file
if item is None and create:
# create new file
- item = ArvadosFile(self.block_manager, keep=self._keep_client)
+ item = ArvadosFile(self, keep=self._keep_client)
self._items[p[0]] = item
return item
else:
if item is None and create:
# create new collection
- item = Collection(api_client=self._api_client, keep=self._keep_client, num_retries=self.num_retries, block_manager=self.block_manager)
+ item = Collection(parent=self, num_retries=self.num_retries)
self._items[p[0]] = item
del p[0]
return item.find("/".join(p), create=create)
@@ -787,9 +810,9 @@ class Collection(CollectionBase):
f = self.find(path, create=create)
if f is None:
- raise ArgumentError("File not found")
+ raise IOError((errno.ENOENT, "File not found"))
if not isinstance(f, ArvadosFile):
- raise ArgumentError("Path must refer to a file.")
+ raise IOError((errno.EISDIR, "Path must refer to a file."))
if mode[0] == "w":
f.truncate(0)
@@ -821,15 +844,11 @@ class Collection(CollectionBase):
@_populate_first
def __getitem__(self, k):
- r = self.find(k)
- if r:
- return r
- else:
- raise KeyError(k)
+ return self._items[k]
@_populate_first
def __contains__(self, k):
- return self.find(k) is not None
+ return k in self._items
@_populate_first
def __len__(self):
@@ -837,21 +856,7 @@ class Collection(CollectionBase):
@_populate_first
def __delitem__(self, p):
- p = path.split("/")
- if p[0] == '.':
- del p[0]
-
- if len(p) > 0:
- item = self._items.get(p[0])
- if item is None:
- raise NotFoundError()
- if len(p) == 1:
- del self._items[p[0]]
- else:
- del p[0]
- del item["/".join(p)]
- else:
- raise NotFoundError()
+ del self._items[p]
@_populate_first
def keys(self):
@@ -866,6 +871,28 @@ class Collection(CollectionBase):
return self._items.items()
@_populate_first
+ def exists(self, path):
+ return self.find(path) != None
+
+ @_populate_first
+ def remove(self, path):
+ p = path.split("/")
+ if p[0] == '.':
+ del p[0]
+
+ if len(p) > 0:
+ item = self._items.get(p[0])
+ if item is None:
+ raise IOError((errno.ENOENT, "File not found"))
+ if len(p) == 1:
+ del self._items[p[0]]
+ else:
+ del p[0]
+ item.remove("/".join(p))
+ else:
+ raise IOError((errno.ENOENT, "File not found"))
+
+ @_populate_first
def manifest_text(self, strip=False, normalize=False):
if self.modified() or self._manifest_text is None or normalize:
return export_manifest(self, stream_name=".", portable_locators=strip)
@@ -880,31 +907,29 @@ class Collection(CollectionBase):
return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
@_populate_first
- def commit_bufferblocks(self):
- pass
-
- @_populate_first
- def save(self):
+ def save(self, no_locator=False):
if self.modified():
+ self._my_block_manager().commit_all()
self._my_keep().put(self.manifest_text(strip=True))
- if re.match(util.collection_uuid_pattern, self._manifest_locator):
- self._api_response = self._api_client.collections().update(
+ if self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator):
+ self._api_response = self._my_api().collections().update(
uuid=self._manifest_locator,
body={'manifest_text': self.manifest_text(strip=False)}
).execute(
num_retries=self.num_retries)
- else:
+ elif not no_locator:
raise AssertionError("Collection manifest_locator must be a collection uuid. Use save_as() for new collections.")
self.set_unmodified()
@_populate_first
def save_as(self, name, owner_uuid=None):
+ self._my_block_manager().commit_all()
self._my_keep().put(self.manifest_text(strip=True))
body = {"manifest_text": self.manifest_text(strip=False),
"name": name}
if owner_uuid:
body["owner_uuid"] = owner_uuid
- self._api_response = self._api_client.collections().create(body=body).execute(num_retries=self.num_retries)
+ self._api_response = self._my_api().collections().create(body=body).execute(num_retries=self.num_retries)
self._manifest_locator = self._api_response["uuid"]
self.set_unmodified()
@@ -976,12 +1001,13 @@ def export_manifest(item, stream_name=".", portable_locators=False):
for s in v._segments:
loc = s.locator
if loc.startswith("bufferblock"):
- loc = v.bbm._bufferblocks[loc].locator()
+ loc = v.parent._my_block_manager()._bufferblocks[loc].locator()
st.append(LocatorAndRange(loc, locator_block_size(loc),
s.segment_offset, s.range_size))
stream[k] = st
- buf += ' '.join(normalize_stream(stream_name, stream))
- buf += "\n"
+ if stream:
+ buf += ' '.join(normalize_stream(stream_name, stream))
+ buf += "\n"
for k in [s for s in sorted_keys if isinstance(item[s], Collection)]:
buf += export_manifest(item[k], stream_name=os.path.join(stream_name, k))
elif isinstance(item, ArvadosFile):
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 9edb2c5..7ccdf32 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -18,133 +18,322 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
class MockKeep(object):
def __init__(self, blocks):
self.blocks = blocks
+ self.requests = []
def get(self, locator, num_retries=0):
- return self.blocks[locator]
+ self.requests.append(locator)
+ return self.blocks.get(locator)
def put(self, data):
pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
self.blocks[pdh] = str(data)
return pdh
+ class MockApi(object):
+ def __init__(self, b, r):
+ self.b = b
+ self.r = r
+ class MockCollections(object):
+ def __init__(self, b, r):
+ self.b = b
+ self.r = r
+ class Execute(object):
+ def __init__(self, r):
+ self.r = r
+ def execute(self, num_retries=None):
+ return self.r
+ def create(self, body=None):
+ if body != self.b:
+ raise Exception("Body %s does not match expectation %s" % (body, self.b))
+ return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.r)
+ def update(self, uuid=None, body=None):
+ return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.r)
+ def collections(self):
+ return ArvadosFileWriterTestCase.MockApi.MockCollections(self.b, self.r)
+
+
def test_truncate(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual(writer.size(), 10)
- writer.seek(5)
- self.assertEqual("56789", writer.read(8))
- writer.truncate(8)
- writer.seek(5, os.SEEK_SET)
- self.assertEqual("567", writer.read(8))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate",
+ "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ api_client=api, keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual(writer.size(), 10)
+ writer.seek(5)
+ self.assertEqual("56789", writer.read(8))
+ writer.truncate(8)
+ writer.seek(5, os.SEEK_SET)
+ self.assertEqual("567", writer.read(8))
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ c.save_as("test_truncate")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
def test_append(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- writer.seek(5, os.SEEK_SET)
- self.assertEqual("56789", writer.read(8))
- writer.seek(10, os.SEEK_SET)
- writer.write("foo")
- self.assertEqual(writer.size(), 13)
- writer.seek(5, os.SEEK_SET)
- self.assertEqual("56789foo", writer.read(8))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ api_client=api, keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ writer.seek(5, os.SEEK_SET)
+ self.assertEqual("56789", writer.read(8))
+ writer.seek(10, os.SEEK_SET)
+ writer.write("foo")
+ self.assertEqual(writer.size(), 13)
+ writer.seek(5, os.SEEK_SET)
+ self.assertEqual("56789foo", writer.read(8))
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ self.assertEqual(None, keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
+ c.save_as("test_append")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
+ self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
def test_write0(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
- writer.seek(0, os.SEEK_SET)
- writer.write("foo")
- self.assertEqual(writer.size(), 10)
- self.assertEqual("foo3456789", writer.readfrom(0, 13))
- self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer.seek(0, os.SEEK_SET)
+ writer.write("foo")
+ self.assertEqual(writer.size(), 10)
+ self.assertEqual("foo3456789", writer.readfrom(0, 13))
+ self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", export_manifest(c))
def test_write1(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
- writer.seek(3, os.SEEK_SET)
- writer.write("foo")
- self.assertEqual(writer.size(), 10)
- self.assertEqual("012foo6789", writer.readfrom(0, 13))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer.seek(3, os.SEEK_SET)
+ writer.write("foo")
+ self.assertEqual(writer.size(), 10)
+ self.assertEqual("012foo6789", writer.readfrom(0, 13))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", export_manifest(c))
def test_write2(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
- writer.seek(7, os.SEEK_SET)
- writer.write("foo")
- self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456foo", writer.readfrom(0, 13))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer.seek(7, os.SEEK_SET)
+ writer.write("foo")
+ self.assertEqual(writer.size(), 10)
+ self.assertEqual("0123456foo", writer.readfrom(0, 13))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", export_manifest(c))
def test_write3(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual("012345678901234", writer.readfrom(0, 15))
- writer.seek(7, os.SEEK_SET)
- writer.write("foobar")
- self.assertEqual(writer.size(), 20)
- self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual("012345678901234", writer.readfrom(0, 15))
+ writer.seek(7, os.SEEK_SET)
+ writer.write("foobar")
+ self.assertEqual(writer.size(), 20)
+ self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", export_manifest(c))
def test_write4(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- self.assertEqual("012301230123", writer.readfrom(0, 15))
- writer.seek(2, os.SEEK_SET)
- writer.write("abcdefg")
- self.assertEqual(writer.size(), 12)
- self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ self.assertEqual("012301230123", writer.readfrom(0, 15))
+ writer.seek(2, os.SEEK_SET)
+ writer.write("abcdefg")
+ self.assertEqual(writer.size(), 12)
+ self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", export_manifest(c))
def test_write_large(self):
- c = import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
- keep=ArvadosFileWriterTestCase.MockKeep({}))
- writer = c.open("count.txt", "r+")
- text = ''.join(["0123456789" for a in xrange(0, 100)])
- for b in xrange(0, 100000):
- writer.write(text)
- self.assertEqual(writer.size(), 100000000)
- self.assertEqual(". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
+ "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+ api_client=api, keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ text = ''.join(["0123456789" for a in xrange(0, 100)])
+ for b in xrange(0, 100000):
+ writer.write(text)
+ self.assertEqual(writer.size(), 100000000)
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ c.save_as("test_write_large")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
def test_write_rewrite0(self):
- c = import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
- keep=ArvadosFileWriterTestCase.MockKeep({}))
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
- writer.seek(0, os.SEEK_SET)
- writer.write("0123456789")
- writer.arvadosfile._repack_writes()
- self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456789", writer.readfrom(0, 20))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ for b in xrange(0, 10):
+ writer.seek(0, os.SEEK_SET)
+ writer.write("0123456789")
+ writer.arvadosfile._repack_writes()
+ self.assertEqual(writer.size(), 10)
+ self.assertEqual("0123456789", writer.readfrom(0, 20))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", export_manifest(c))
def test_write_rewrite1(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
- writer.seek(10, os.SEEK_SET)
- writer.write("abcdefghij")
- writer.arvadosfile._repack_writes()
- self.assertEqual(writer.size(), 20)
- self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ for b in xrange(0, 10):
+ writer.seek(10, os.SEEK_SET)
+ writer.write("abcdefghij")
+ writer.arvadosfile._repack_writes()
+ self.assertEqual(writer.size(), 20)
+ self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", export_manifest(c))
def test_write_rewrite2(self):
- c = import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
- keep=ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
- writer.seek(5, os.SEEK_SET)
- writer.write("abcdefghij")
- writer.arvadosfile._repack_writes()
- self.assertEqual(writer.size(), 15)
- self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", export_manifest(c))
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
+ keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ for b in xrange(0, 10):
+ writer.seek(5, os.SEEK_SET)
+ writer.write("abcdefghij")
+ writer.arvadosfile._repack_writes()
+ self.assertEqual(writer.size(), 15)
+ self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", export_manifest(c))
+
+ def test_write_large_rewrite0(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
+ "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with import_manifest('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+ api_client=api, keep=keep) as c:
+ writer = c.open("count.txt", "r+")
+ text = ''.join(["0123456789" for a in xrange(0, 100)])
+ for b in xrange(0, 100000):
+ writer.write(text)
+ writer.seek(0, os.SEEK_SET)
+ writer.write("foo")
+ self.assertEqual(writer.size(), 100000000)
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ c.save_as("test_write_large")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
+
+ def test_create(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with arvados.Collection(api_client=api, keep_client=keep) as c:
+ writer = c.open("count.txt", "w+")
+ self.assertEqual(writer.size(), 0)
+ writer.write("01234567")
+ self.assertEqual(writer.size(), 8)
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+ c.save_as("test_create")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
+ self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+
+
+ def test_create_subdir(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
+ "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with arvados.Collection(api_client=api, keep_client=keep) as c:
+ writer = c.open("foo/bar/count.txt", "w+")
+ writer.write("01234567")
+ c.save_as("test_create")
+
+ def test_overwrite(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+ api_client=api, keep=keep) as c:
+ writer = c.open("count.txt", "w+")
+ self.assertEqual(writer.size(), 0)
+ writer.write("01234567")
+ self.assertEqual(writer.size(), 8)
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ c.save_as("test_overwrite")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
+
+ def test_file_not_found(self):
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
+ with self.assertRaises(IOError):
+ writer = c.open("nocount.txt", "r")
+
+ def test_cannot_open_directory(self):
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
+ with self.assertRaises(IOError):
+ writer = c.open(".", "r")
+
+ def test_create_multiple(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({})
+ api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n"},
+ {"uuid":"zzzzz-4zz18-mockcollection0"})
+ with arvados.Collection(api_client=api, keep_client=keep) as c:
+ w1 = c.open("count1.txt", "w")
+ w2 = c.open("count2.txt", "w")
+ w1.write("01234567")
+ w2.write("abcdefgh")
+ self.assertEqual(w1.size(), 8)
+ self.assertEqual(w2.size(), 8)
+
+ self.assertEqual(None, c._manifest_locator)
+ self.assertEqual(True, c.modified())
+ self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+ c.save_as("test_create_multiple")
+ self.assertEqual("zzzzz-4zz18-mockcollection0", c._manifest_locator)
+ self.assertEqual(False, c.modified())
+ self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+
+ def test_remove(self):
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n') as c:
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", export_manifest(c))
+ self.assertTrue("count1.txt" in c)
+ c.remove("count1.txt")
+ self.assertFalse("count1.txt" in c)
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", export_manifest(c))
+
+ def test_remove_in_subdir(self):
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') as c:
+ c.remove("foo/count2.txt")
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", export_manifest(c))
+
+ def test_remove_subdir(self):
+ with import_manifest('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') as c:
+ c.remove("foo")
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", export_manifest(c))
+
+ def test_prefetch(self):
+ keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
+ with import_manifest(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep=keep) as c:
+ r = c.open("count.txt", "r")
+ self.assertEqual("0123", r.read(4))
+ self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", "2e9ec317e197819358fbc43afca7d837+8", "e8dc4081b13434b45189a720b77b6818+8"], keep.requests)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list