[ARVADOS] updated: b3cdfeea8341ac3b22ec84d58645523a59eceba9

git at public.curoverse.com git at public.curoverse.com
Fri Dec 19 16:36:08 EST 2014


Summary of changes:
 sdk/python/arvados/arvfile.py | 74 +++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 48 deletions(-)

       via  b3cdfeea8341ac3b22ec84d58645523a59eceba9 (commit)
      from  e78607008c39aa88ccf6e95d7c6dfcc20a52a2ed (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b3cdfeea8341ac3b22ec84d58645523a59eceba9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Dec 19 16:36:46 2014 -0500

    3198: Writing files works

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index d2b174f..3c71336 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -220,8 +220,11 @@ class BufferBlock(object):
         self.buffer_view[self.write_pointer:self.write_pointer+len(data)] = data
         self.write_pointer += len(data)
 
+    def size(self):
+        return self.write_pointer
+
     def calculate_locator(self):
-        return "%s+%i" % (hashlib.md5(self.buffer_view[0:self.write_pointer]).hexdigest(), self.write_pointer)
+        return "%s+%i" % (hashlib.md5(self.buffer_view[0:self.write_pointer]).hexdigest(), self.size())
 
 
 class ArvadosFile(object):
@@ -292,53 +295,28 @@ class ArvadosFile(object):
     def _repack_writes(self):
         pass
          # TODO: fixme
-#         '''Test if the buffer block has more data than is referenced by actual segments
-#         (this happens when a buffered write over-writes a file range written in
-#         a previous buffered write).  Re-pack the buffer block for efficiency
-#         and to avoid leaking information.
-#         '''
-#         segs = self._files.values()[0].segments
-
-#         bufferblock_segs = []
-#         i = 0
-#         tmp_segs = copy.copy(segs)
-#         while i < len(tmp_segs):
-#             # Go through each segment and identify segments that include the buffer block
-#             s = tmp_segs[i]
-#             if s[LOCATOR] < self.current_bblock.locator_list_entry.range_start and (s[LOCATOR] + s.range_size) > self.current_bblock.locator_list_entry.range_start:
-#                 # The segment straddles the previous block and the current buffer block.  Split the segment.
-#                 b1 = self.current_bblock.locator_list_entry.range_start - s[LOCATOR]
-#                 b2 = (s[LOCATOR] + s.range_size) - self.current_bblock.locator_list_entry.range_start
-#                 bb_seg = [self.current_bblock.locator_list_entry.range_start, b2, s.range_start+b1]
-#                 tmp_segs[i] = [s[LOCATOR], b1, s.range_start]
-#                 tmp_segs.insert(i+1, bb_seg)
-#                 bufferblock_segs.append(bb_seg)
-#                 i += 1
-#             elif s[LOCATOR] >= self.current_bblock.locator_list_entry.range_start:
-#                 # The segment's data is in the buffer block.
-#                 bufferblock_segs.append(s)
-#             i += 1
-
-#         # Now sum up the segments to get the total bytes
-#         # of the file referencing into the buffer block.
-#         write_total = sum([s.range_size for s in bufferblock_segs])
-
-#         if write_total < self.current_bblock.locator_list_entry.range_size:
-#             # There is more data in the buffer block than is actually accounted for by segments, so
-#             # re-pack into a new buffer by copying over to a new buffer block.
-#             new_bb = BufferBlock(self.current_bblock.locator,
-#                                  self.current_bblock.locator_list_entry.range_start,
-#                                  starting_size=write_total)
-#             for t in bufferblock_segs:
-#                 t_start = t[LOCATOR] - self.current_bblock.locator_list_entry.range_start
-#                 t_end = t_start + t.range_size
-#                 t[0] = self.current_bblock.locator_list_entry.range_start + new_bb.write_pointer
-#                 new_bb.append(self.current_bblock.buffer_block[t_start:t_end])
-
-#             self.current_bblock = new_bb
-#             self.bufferblocks[self.current_bblock.locator] = self.current_bblock
-#             self._data_locators[-1] = self.current_bblock.locator_list_entry
-#             self._files.values()[0].segments = tmp_segs
+        '''Test if the buffer block has more data than is referenced by actual segments
+        (this happens when a buffered write over-writes a file range written in
+        a previous buffered write).  Re-pack the buffer block for efficiency
+        and to avoid leaking information.
+        '''
+        segs = self._segments
+
+        # Sum up the segments to get the total bytes of the file referencing
+        # into the buffer block.
+        bufferblock_segs = [s for s in segs if s.locator == self._current_bblock.locator]
+        write_total = sum([s.range_size for s in bufferblock_segs])
+
+        if write_total < self._current_bblock.size():
+            # There is more data in the buffer block than is actually accounted for by segments, so
+            # re-pack into a new buffer by copying over to a new buffer block.
+            new_bb = BufferBlock(self._current_bblock.locator, starting_size=write_total)
+            for t in bufferblock_segs:
+                new_bb.append(self._current_bblock.buffer_view[t.segment_offset:t.segment_offset+t.range_size].tobytes())
+                t.segment_offset = new_bb.size() - t.range_size
+
+            self._current_bblock = new_bb
+            self._bufferblocks[self._current_bblock.locator] = self._current_bblock
 
 
     def writeto(self, offset, data, num_retries):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list