[ARVADOS] updated: b5efdd0afbe7795b036dc19b8f7d6b8a32da52df

Wed Dec 17 14:50:46 EST 2014

Summary of changes:
 sdk/python/arvados/ranges.py    | 76 +++++++++++++++++++++++------------------
 sdk/python/tests/test_stream.py | 52 +++++++++++++++++++++++++++-
 2 files changed, 93 insertions(+), 35 deletions(-)

       via  b5efdd0afbe7795b036dc19b8f7d6b8a32da52df (commit)
      from  b0e2fe9d0a18d5e809bc8c0d3382e0e023cb949c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b5efdd0afbe7795b036dc19b8f7d6b8a32da52df
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Dec 17 14:52:04 2014 -0500

    3198: Random access updates works.

diff --git a/sdk/python/arvados/ranges.py b/sdk/python/arvados/ranges.py
index fe9c04b..dbd5e3d 100644
--- a/sdk/python/arvados/ranges.py
+++ b/sdk/python/arvados/ranges.py
@@ -87,11 +87,11 @@ def locators_and_ranges(data_locators, range_start, range_size, debug=False):
 
 def replace_range(data_locators, range_start, range_size, new_locator, debug=False):
     '''
-    Replace a range with a new block.
-    data_locators: list of [locator, block_size, block_start], assumes that blocks are in order and contigous
+    Replace a file segment range with a new segment.
+    data_locators: list of [locator, segment_size, segment_start], assumes that segments are in order and contigous
     range_start: start of range
     range_size: size of range
-    new_locator: locator for new block to be inserted
+    new_locator: locator for new segment to be inserted
     !!! data_locators will be updated in place !!!
     '''
     if range_size == 0:
@@ -103,8 +103,8 @@ def replace_range(data_locators, range_start, range_size, new_locator, debug=Fal
 
     last = data_locators[-1]
     if (last[OFFSET]+last[BLOCKSIZE]) == range_start:
-        # append new block
-        data_locators.append([new_locator, range_size, range_start])
+        # extend last segment
+        last[BLOCKSIZE] += range_size
         return
 
     i = first_block(data_locators, range_start, range_size, debug)
@@ -112,38 +112,46 @@ def replace_range(data_locators, range_start, range_size, new_locator, debug=Fal
         return
 
     while i < len(data_locators):
-        locator, block_size, block_start = data_locators[i]
-        block_end = block_start + block_size
+        locator, segment_size, segment_start = data_locators[i]
+        segment_end = segment_start + segment_size
         if debug:
-            print locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end
-        if range_end <= block_start:
-            # range ends before this block starts, so don't look at any more locators
+            print locator, "range_start", range_start, "segment_start", segment_start, "range_end", range_end, "segment_end", segment_end
+        if range_end <= segment_start:
+            # range ends before this segment starts, so don't look at any more locators
             break
 
-        #if range_start >= block_end:
-            # range starts after this block ends, so go to next block
-            # we should always start at the first block due to the binary above, so this test is redundant
+        #if range_start >= segment_end:
+            # range starts after this segment ends, so go to next segment
+            # we should always start at the first segment due to the binary above, so this test is redundant
             #next
 
-        if range_start >= block_start and range_end <= block_end:
-            # range starts and ends in this block
-            # split block into 3 pieces
-            #resp.append([locator, block_size, range_start - block_start, range_size])
-            pass
-        elif range_start >= block_start and range_end > block_end:
-            # range starts in this block
-            # split block into 2 pieces
-            #resp.append([locator, block_size, range_start - block_start, block_end - range_start])
-            pass
-        elif range_start < block_start and range_end > block_end:
-            # range starts in a previous block and extends to further blocks
-            # zero out this block
-            #resp.append([locator, block_size, 0L, block_size])
-            pass
-        elif range_start < block_start and range_end <= block_end:
-            # range starts in a previous block and ends in this block
-            # split into 2 pieces
-            #resp.append([locator, block_size, 0L, range_end - block_start])
-            pass
-        block_start = block_end
+        if range_start >= segment_start and range_end <= segment_end:
+            # range starts and ends in this segment
+            # split segment into 3 pieces
+            if (range_start-segment_start) > 0:
+                data_locators[i] = [locator, (range_start-segment_start), segment_start]
+                data_locators.insert(i+1, [new_locator, range_size, range_start])
+            else:
+                data_locators[i] = [new_locator, range_size, range_start]
+                i -= 1
+            if (segment_end-range_end) > 0:
+                data_locators.insert(i+2, [(locator + (range_start-segment_start) + range_size), (segment_end-range_end), range_end])
+            return
+        elif range_start >= segment_start and range_end > segment_end:
+            # range starts in this segment
+            # split segment into 2 pieces
+            data_locators[i] = [locator, (range_start-segment_start), segment_start]
+            data_locators.insert(i+1, [new_locator, range_size, range_start])
+            i += 1
+        elif range_start < segment_start and range_end > segment_end:
+            # range starts in a previous segment and extends to further segments
+            # delete this segment
+            del data_locators[i]
+            i -= 1
+        elif range_start < segment_start and range_end <= segment_end:
+            # range starts in a previous segment and ends in this segment
+            # move the starting point of this segment up, and shrink it.
+            data_locators[i] = [locator+(range_end-segment_start), (segment_end-range_end), range_end]
+            return
+        segment_start = segment_end
         i += 1
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index baafc32..30f08ba 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -315,7 +315,57 @@ class StreamFileWriterTestCase(unittest.TestCase):
         writer.seek(10)
         writer.write("foo")
         self.assertEqual("56789foo", writer.readfrom(5, 8))
-        #print arvados.normalize_stream(".", {"count.txt": stream.locators_and_ranges(0, stream.size())})
+
+    def test_write0(self):
+        stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
+                              keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
+        writer = stream.files()["count.txt"]
+        self.assertEqual("0123456789", writer.readfrom(0, 13))
+        writer.seek(0)
+        writer.write("foo")
+        self.assertEqual("foo3456789", writer.readfrom(0, 13))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 10:3:count.txt 3:7:count.txt\n", stream.manifest_text())
+
+    def test_write1(self):
+        stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
+                              keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
+        writer = stream.files()["count.txt"]
+        self.assertEqual("0123456789", writer.readfrom(0, 13))
+        writer.seek(3)
+        writer.write("foo")
+        self.assertEqual("012foo6789", writer.readfrom(0, 13))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", stream.manifest_text())
+
+    def test_write2(self):
+        stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
+                              keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
+        writer = stream.files()["count.txt"]
+        self.assertEqual("0123456789", writer.readfrom(0, 13))
+        writer.seek(7)
+        writer.write("foo")
+        self.assertEqual("0123456foo", writer.readfrom(0, 13))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:3:count.txt\n", stream.manifest_text())
+
+    def test_write3(self):
+        stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt', '0:10:count.txt'],
+                              keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
+        writer = stream.files()["count.txt"]
+        self.assertEqual("012345678901234", writer.readfrom(0, 15))
+        writer.seek(7)
+        writer.write("foobar")
+        print stream.manifest_text()
+        self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", stream.manifest_text())
+
+    def test_write4(self):
+        stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:4:count.txt', '0:4:count.txt', '0:4:count.txt'],
+                              keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
+        writer = stream.files()["count.txt"]
+        self.assertEqual("012301230123", writer.readfrom(0, 15))
+        writer.seek(2)
+        writer.write("abcdefg")
+        self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", stream.manifest_text())
 
 if __name__ == '__main__':
     unittest.main()

-----------------------------------------------------------------------


hooks/post-receive
--