[ARVADOS] updated: 1d61934faad7093807fd3024096ca54d3bc24ea3
Git user
git at public.curoverse.com
Thu Apr 20 10:41:29 EDT 2017
Summary of changes:
sdk/python/arvados/arvfile.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
via 1d61934faad7093807fd3024096ca54d3bc24ea3 (commit)
from aed7702a67426dfd9d24b512c90df8e909162179 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 1d61934faad7093807fd3024096ca54d3bc24ea3
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Apr 20 10:41:05 2017 -0400
11510: Repack writes any time there's more than one segment referencing the same bufferblock.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 33e55ad..4129a15 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -994,14 +994,18 @@ class ArvadosFile(object):
"""
segs = self._segments
- # Sum up the segments to get the total bytes of the file referencing
- # into the buffer block.
+ # Collect the segments that reference the buffer block.
bufferblock_segs = [s for s in segs if s.locator == self._current_bblock.blockid]
- write_total = sum([s.range_size for s in bufferblock_segs])
- if write_total < self._current_bblock.size():
- # There is more data in the buffer block than is actually accounted for by segments, so
- # re-pack into a new buffer by copying over to a new buffer block.
+ if len(bufferblock_segs) > 1:
+ # Collect total data referenced by segments (could be smaller than
+ # bufferblock size if a portion of the file was written and
+ # then overwritten).
+ write_total = sum([s.range_size for s in bufferblock_segs])
+
+ # If there's more than one segment referencing this block, it is
+ # due to out-of-order writes and will produce a fragmented
+ # manifest, so try to optimize by re-packing into a new buffer.
contents = self.parent._my_block_manager().get_block_contents(self._current_bblock.blockid, num_retries)
new_bb = self.parent._my_block_manager().alloc_bufferblock(self._current_bblock.blockid, starting_capacity=write_total, owner=self)
for t in bufferblock_segs:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list