[ARVADOS] created: 7116da151dc8bfd5ac1a9b016b2ed6e4c35572f7

Git user git at public.curoverse.com
Thu Apr 20 17:45:43 EDT 2017


        at  7116da151dc8bfd5ac1a9b016b2ed6e4c35572f7 (commit)


commit 7116da151dc8bfd5ac1a9b016b2ed6e4c35572f7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Apr 20 17:45:37 2017 -0400

    11507: Put bufferblocks into DELETED state when deleted so they don't get
    reused accidentally if there are dangling references.  When packing small
    blocks, update segments with the block hash instead of the bufferblock id to
    avoid dangling references.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index aad3ce1..accaaa0 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -289,6 +289,7 @@ class _BufferBlock(object):
     PENDING = 1
     COMMITTED = 2
     ERROR = 3
+    DELETED = 4
 
     def __init__(self, blockid, starting_capacity, owner):
         """
@@ -383,6 +384,7 @@ class _BufferBlock(object):
 
     @synchronized
     def clear(self):
+        self._state = _BufferBlock.DELETED
         self.owner = None
         self.buffer_block = None
         self.buffer_view = None
@@ -460,7 +462,7 @@ class _BlockManager(object):
 
     def _alloc_bufferblock(self, blockid=None, starting_capacity=2**14, owner=None):
         if blockid is None:
-            blockid = "%s" % uuid.uuid4()
+            blockid = str(uuid.uuid4())
         bufferblock = _BufferBlock(blockid, starting_capacity=starting_capacity, owner=owner)
         self._bufferblocks[bufferblock.blockid] = bufferblock
         return bufferblock
@@ -476,7 +478,7 @@ class _BlockManager(object):
           ArvadosFile that owns the new block
 
         """
-        new_blockid = "bufferblock%i" % len(self._bufferblocks)
+        new_blockid = str(uuid.uuid4())
         bufferblock = block.clone(new_blockid, owner)
         self._bufferblocks[bufferblock.blockid] = bufferblock
         return bufferblock
@@ -607,18 +609,21 @@ class _BlockManager(object):
                 return
 
             new_bb = self._alloc_bufferblock()
+            files = []
             while len(small_blocks) > 0 and (new_bb.write_pointer + small_blocks[0].size()) <= config.KEEP_BLOCK_SIZE:
                 bb = small_blocks.pop(0)
                 arvfile = bb.owner
                 self._pending_write_size -= bb.size()
                 new_bb.append(bb.buffer_view[0:bb.write_pointer].tobytes())
-                arvfile.set_segments([Range(new_bb.blockid,
-                                            0,
-                                            bb.size(),
-                                            new_bb.write_pointer - bb.size())])
-                self._delete_bufferblock(bb.blockid)
+                files.append((bb, new_bb.write_pointer - bb.size()))
+
             self.commit_bufferblock(new_bb, sync=sync)
 
+            for fn in files:
+                bb = fn[0]
+                bb.owner.set_segments([Range(new_bb.locator(), 0, bb.size(), fn[1])])
+                self._delete_bufferblock(bb.blockid)
+
     def commit_bufferblock(self, block, sync):
         """Initiate a background upload of a bufferblock.
 
@@ -1088,7 +1093,8 @@ class ArvadosFile(object):
         if self._current_bblock and self._current_bblock.state() != _BufferBlock.COMMITTED:
             if self._current_bblock.state() == _BufferBlock.WRITABLE:
                 self._repack_writes(num_retries)
-            self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=sync)
+            if self._current_bblock.state() != _BufferBlock.DELETED:
+                self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=sync)
 
         if sync:
             to_delete = set()
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 906043f..7421850 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1167,7 +1167,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
         self.assertEqual(
             c.manifest_text(),
-            '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+            '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):

commit bef5d901f00648e703bb6a3ad58fa481a610ffd7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Apr 20 17:10:30 2017 -0400

    11507: Test cases that reproduce bug

diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 0e3d5e1..906043f 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1111,7 +1111,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
     def test_only_small_blocks_are_packed_together(self):
         c = Collection()
-        # Write a couple of small files, 
+        # Write a couple of small files,
         f = c.open("count.txt", "w")
         f.write("0123456789")
         f.close(flush=False)
@@ -1126,6 +1126,49 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
             c.manifest_text("."),
             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
 
+    def test_flush_after_small_block_packing(self):
+        c = Collection()
+        # Write a couple of small files,
+        f = c.open("count.txt", "w")
+        f.write("0123456789")
+        f.close(flush=False)
+        foo = c.open("foo.txt", "w")
+        foo.write("foo")
+        foo.close(flush=False)
+
+        self.assertEqual(
+            c.manifest_text(),
+            '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+        f = c.open("count.txt", "r+")
+        f.close(flush=True)
+
+        self.assertEqual(
+            c.manifest_text(),
+            '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+    def test_write_after_small_block_packing2(self):
+        c = Collection()
+        # Write a couple of small files,
+        f = c.open("count.txt", "w")
+        f.write("0123456789")
+        f.close(flush=False)
+        foo = c.open("foo.txt", "w")
+        foo.write("foo")
+        foo.close(flush=False)
+
+        self.assertEqual(
+            c.manifest_text(),
+            '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+        f = c.open("count.txt", "r+")
+        f.write("abc")
+        f.close(flush=False)
+
+        self.assertEqual(
+            c.manifest_text(),
+            '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list