[ARVADOS] updated: 31568ea331306a574be758fc60b090ecee3bc005
Git user
git at public.curoverse.com
Mon Oct 10 07:55:33 EDT 2016
Summary of changes:
sdk/python/arvados/arvfile.py | 28 ++++++++++++++++++----------
sdk/python/tests/test_collections.py | 2 +-
2 files changed, 19 insertions(+), 11 deletions(-)
via 31568ea331306a574be758fc60b090ecee3bc005 (commit)
via c9180edfa0d7306b9a533a0ed15dd90eca5cfae1 (commit)
via 84f2296c5c6563f2d61a208d5c427d98003bfecd (commit)
via 3513c7def7eacdeef16c355f1b9be93830dcf946 (commit)
via 14a9cddd966bd1035c48e5fbac5065555ad7bb92 (commit)
via 800a66e4d96c1fb341d643549d871d36e598ea31 (commit)
from bf6ef981cea7e923a085c0a9231cebb379c7560a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 31568ea331306a574be758fc60b090ecee3bc005
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon Oct 10 08:45:31 2016 -0300
9701: Added clarifying comments to the small block searching list comprehension.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index a23f453..5bac10e 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -555,6 +555,9 @@ class _BlockManager(object):
def repack_small_blocks(self, force=False, sync=False):
"""Packs small blocks together before uploading"""
# Search blocks ready for getting packed together before being committed to Keep.
+ # A WRITABLE block always has an owner.
+ # A WRITABLE block with its owner.closed() implies that it's
+ # size is <= KEEP_BLOCK_SIZE/2.
small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
if len(small_blocks) <= 1:
# Not enough small blocks for repacking
commit c9180edfa0d7306b9a533a0ed15dd90eca5cfae1
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon Oct 10 08:40:17 2016 -0300
9701: Better bufferblock id generation.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index c72225b..a23f453 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -11,6 +11,7 @@ import errno
import re
import logging
import collections
+import uuid
from .errors import KeepWriteError, AssertionError, ArgumentError
from .keep import KeepLocator
@@ -432,7 +433,7 @@ class _BlockManager(object):
"""
if blockid is None:
- blockid = "bufferblock%i" % len(self._bufferblocks)
+ blockid = "%s" % uuid.uuid4()
bufferblock = _BufferBlock(blockid, starting_capacity=starting_capacity, owner=owner)
self._bufferblocks[bufferblock.blockid] = bufferblock
return bufferblock
@@ -562,7 +563,7 @@ class _BlockManager(object):
# Check if there are enough small blocks for filling up one in full
pending_write_size = sum([b.size() for b in small_blocks])
if force or (pending_write_size >= config.KEEP_BLOCK_SIZE):
- new_bb = _BufferBlock("bufferblock%i" % len(self._bufferblocks), 2**14, None)
+ new_bb = _BufferBlock("%s" % uuid.uuid4(), 2**14, None)
self._bufferblocks[new_bb.blockid] = new_bb
while len(small_blocks) > 0 and (new_bb.write_pointer + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
bb = small_blocks.pop(0)
commit 84f2296c5c6563f2d61a208d5c427d98003bfecd
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon Oct 10 08:33:40 2016 -0300
9701: Superfluous variable eliminated
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 85366d2..c72225b 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -564,13 +564,14 @@ class _BlockManager(object):
if force or (pending_write_size >= config.KEEP_BLOCK_SIZE):
new_bb = _BufferBlock("bufferblock%i" % len(self._bufferblocks), 2**14, None)
self._bufferblocks[new_bb.blockid] = new_bb
- size = 0
- while len(small_blocks) > 0 and (size + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
+ while len(small_blocks) > 0 and (new_bb.write_pointer + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
bb = small_blocks.pop(0)
- size += bb.size()
arvfile = bb.owner
new_bb.append(bb.buffer_view[0:bb.write_pointer].tobytes())
- arvfile.set_segments([Range(new_bb.blockid, 0, bb.size(), size-bb.size())])
+ arvfile.set_segments([Range(new_bb.blockid,
+ 0,
+ bb.size(),
+ new_bb.write_pointer - bb.size())])
bb.clear()
del self._bufferblocks[bb.blockid]
self.commit_bufferblock(new_bb, sync=sync)
commit 3513c7def7eacdeef16c355f1b9be93830dcf946
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon Oct 10 08:18:05 2016 -0300
9701: Use a collection.OrderedDict instead of a simple dict to hold bufferblocks so that the packed files order is consistent. Updated related test.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 3d5e921..85366d2 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -10,6 +10,7 @@ import copy
import errno
import re
import logging
+import collections
from .errors import KeepWriteError, AssertionError, ArgumentError
from .keep import KeepLocator
@@ -405,7 +406,7 @@ class _BlockManager(object):
def __init__(self, keep, copies=None):
"""keep: KeepClient object to use"""
self._keep = keep
- self._bufferblocks = {}
+ self._bufferblocks = collections.OrderedDict()
self._put_queue = None
self._put_threads = None
self._prefetch_queue = None
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 0767f2a..8d3e9d6 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1119,7 +1119,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
big.write("x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
self.assertEqual(
c.manifest_text("."),
- '. 2d303c138c118af809f39319e5d507e9+34603008 e62e558e58131771aae2fd0175cdbf2a+13 0:34603008:bigfile.txt 34603011:10:count.txt 34603008:3:foo.txt\n')
+ '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
commit 14a9cddd966bd1035c48e5fbac5065555ad7bb92
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Fri Oct 7 17:59:16 2016 -0300
9701: Simplifying small bufferblock query
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 7100d05..3d5e921 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -552,8 +552,8 @@ class _BlockManager(object):
@synchronized
def repack_small_blocks(self, force=False, sync=False):
"""Packs small blocks together before uploading"""
- # Search blocks ready for getting packed together before being committed to Keep
- small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner and b.owner.closed() and b.owner.size() <= (config.KEEP_BLOCK_SIZE / 2)]
+ # Search blocks ready for getting packed together before being committed to Keep.
+ small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
if len(small_blocks) <= 1:
# Not enough small blocks for repacking
return
commit 800a66e4d96c1fb341d643549d871d36e598ea31
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Fri Oct 7 17:27:41 2016 -0300
9701: Set repack_small_blocks() method as @synchronized
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index a043bee..7100d05 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -549,6 +549,7 @@ class _BlockManager(object):
def __exit__(self, exc_type, exc_value, traceback):
self.stop_threads()
+ @synchronized
def repack_small_blocks(self, force=False, sync=False):
"""Packs small blocks together before uploading"""
# Search blocks ready for getting packed together before being committed to Keep
@@ -652,8 +653,9 @@ class _BlockManager(object):
are uploaded. Raises KeepWriteError() if any blocks failed to upload.
"""
+ self.repack_small_blocks(force=True, sync=True)
+
with self.lock:
- self.repack_small_blocks(force=True, sync=True)
items = self._bufferblocks.items()
for k,v in items:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list