[ARVADOS] updated: 31568ea331306a574be758fc60b090ecee3bc005

Git user git at public.curoverse.com
Mon Oct 10 07:55:33 EDT 2016


Summary of changes:
 sdk/python/arvados/arvfile.py        | 28 ++++++++++++++++++----------
 sdk/python/tests/test_collections.py |  2 +-
 2 files changed, 19 insertions(+), 11 deletions(-)

       via  31568ea331306a574be758fc60b090ecee3bc005 (commit)
       via  c9180edfa0d7306b9a533a0ed15dd90eca5cfae1 (commit)
       via  84f2296c5c6563f2d61a208d5c427d98003bfecd (commit)
       via  3513c7def7eacdeef16c355f1b9be93830dcf946 (commit)
       via  14a9cddd966bd1035c48e5fbac5065555ad7bb92 (commit)
       via  800a66e4d96c1fb341d643549d871d36e598ea31 (commit)
      from  bf6ef981cea7e923a085c0a9231cebb379c7560a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 31568ea331306a574be758fc60b090ecee3bc005
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon Oct 10 08:45:31 2016 -0300

    9701: Added clarifying comments to the small block searching list comprehension.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index a23f453..5bac10e 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -555,6 +555,9 @@ class _BlockManager(object):
     def repack_small_blocks(self, force=False, sync=False):
         """Packs small blocks together before uploading"""
         # Search blocks ready for getting packed together before being committed to Keep.
+        # A WRITABLE block always has an owner.
+        # A WRITABLE block with its owner.closed() implies that it's
+        # size is <= KEEP_BLOCK_SIZE/2.
         small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
         if len(small_blocks) <= 1:
             # Not enough small blocks for repacking

commit c9180edfa0d7306b9a533a0ed15dd90eca5cfae1
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon Oct 10 08:40:17 2016 -0300

    9701: Better bufferblock id generation.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index c72225b..a23f453 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -11,6 +11,7 @@ import errno
 import re
 import logging
 import collections
+import uuid
 
 from .errors import KeepWriteError, AssertionError, ArgumentError
 from .keep import KeepLocator
@@ -432,7 +433,7 @@ class _BlockManager(object):
 
         """
         if blockid is None:
-            blockid = "bufferblock%i" % len(self._bufferblocks)
+            blockid = "%s" % uuid.uuid4()
         bufferblock = _BufferBlock(blockid, starting_capacity=starting_capacity, owner=owner)
         self._bufferblocks[bufferblock.blockid] = bufferblock
         return bufferblock
@@ -562,7 +563,7 @@ class _BlockManager(object):
         # Check if there are enough small blocks for filling up one in full
         pending_write_size = sum([b.size() for b in small_blocks])
         if force or (pending_write_size >= config.KEEP_BLOCK_SIZE):
-            new_bb = _BufferBlock("bufferblock%i" % len(self._bufferblocks), 2**14, None)
+            new_bb = _BufferBlock("%s" % uuid.uuid4(), 2**14, None)
             self._bufferblocks[new_bb.blockid] = new_bb
             while len(small_blocks) > 0 and (new_bb.write_pointer + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
                 bb = small_blocks.pop(0)

commit 84f2296c5c6563f2d61a208d5c427d98003bfecd
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon Oct 10 08:33:40 2016 -0300

    9701: Superfluous variable eliminated

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 85366d2..c72225b 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -564,13 +564,14 @@ class _BlockManager(object):
         if force or (pending_write_size >= config.KEEP_BLOCK_SIZE):
             new_bb = _BufferBlock("bufferblock%i" % len(self._bufferblocks), 2**14, None)
             self._bufferblocks[new_bb.blockid] = new_bb
-            size = 0
-            while len(small_blocks) > 0 and (size + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
+            while len(small_blocks) > 0 and (new_bb.write_pointer + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
                 bb = small_blocks.pop(0)
-                size += bb.size()
                 arvfile = bb.owner
                 new_bb.append(bb.buffer_view[0:bb.write_pointer].tobytes())
-                arvfile.set_segments([Range(new_bb.blockid, 0, bb.size(), size-bb.size())])
+                arvfile.set_segments([Range(new_bb.blockid,
+                                            0,
+                                            bb.size(),
+                                            new_bb.write_pointer - bb.size())])
                 bb.clear()
                 del self._bufferblocks[bb.blockid]
             self.commit_bufferblock(new_bb, sync=sync)

commit 3513c7def7eacdeef16c355f1b9be93830dcf946
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon Oct 10 08:18:05 2016 -0300

    9701: Use a collection.OrderedDict instead of a simple dict to hold bufferblocks so that the packed files order is consistent. Updated related test.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 3d5e921..85366d2 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -10,6 +10,7 @@ import copy
 import errno
 import re
 import logging
+import collections
 
 from .errors import KeepWriteError, AssertionError, ArgumentError
 from .keep import KeepLocator
@@ -405,7 +406,7 @@ class _BlockManager(object):
     def __init__(self, keep, copies=None):
         """keep: KeepClient object to use"""
         self._keep = keep
-        self._bufferblocks = {}
+        self._bufferblocks = collections.OrderedDict()
         self._put_queue = None
         self._put_threads = None
         self._prefetch_queue = None
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 0767f2a..8d3e9d6 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1119,7 +1119,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
             big.write("x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
         self.assertEqual(
             c.manifest_text("."),
-            '. 2d303c138c118af809f39319e5d507e9+34603008 e62e558e58131771aae2fd0175cdbf2a+13 0:34603008:bigfile.txt 34603011:10:count.txt 34603008:3:foo.txt\n')
+            '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):

commit 14a9cddd966bd1035c48e5fbac5065555ad7bb92
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Fri Oct 7 17:59:16 2016 -0300

    9701: Simplifying small bufferblock query

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 7100d05..3d5e921 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -552,8 +552,8 @@ class _BlockManager(object):
     @synchronized
     def repack_small_blocks(self, force=False, sync=False):
         """Packs small blocks together before uploading"""
-        # Search blocks ready for getting packed together before being committed to Keep
-        small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner and b.owner.closed() and b.owner.size() <= (config.KEEP_BLOCK_SIZE / 2)]
+        # Search blocks ready for getting packed together before being committed to Keep.
+        small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
         if len(small_blocks) <= 1:
             # Not enough small blocks for repacking
             return

commit 800a66e4d96c1fb341d643549d871d36e598ea31
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Fri Oct 7 17:27:41 2016 -0300

    9701: Set repack_small_blocks() method as @synchronized

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index a043bee..7100d05 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -549,6 +549,7 @@ class _BlockManager(object):
     def __exit__(self, exc_type, exc_value, traceback):
         self.stop_threads()
 
+    @synchronized
     def repack_small_blocks(self, force=False, sync=False):
         """Packs small blocks together before uploading"""
         # Search blocks ready for getting packed together before being committed to Keep
@@ -652,8 +653,9 @@ class _BlockManager(object):
         are uploaded.  Raises KeepWriteError() if any blocks failed to upload.
 
         """
+        self.repack_small_blocks(force=True, sync=True)
+
         with self.lock:
-            self.repack_small_blocks(force=True, sync=True)
             items = self._bufferblocks.items()
 
         for k,v in items:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list