[ARVADOS] created: 04951581a941697d68cdaf9af6661c3c412f1bce

Git user git at public.curoverse.com
Tue Apr 18 16:40:56 EDT 2017


        at  04951581a941697d68cdaf9af6661c3c412f1bce (commit)


commit 04951581a941697d68cdaf9af6661c3c412f1bce
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Apr 18 16:40:51 2017 -0400

    11510: Add support for sparse writes to ArvadosFile.
    
    Allow seeking past end of file.  Allow using truncate() to extend file size.  Add zero padding to fill in gaps.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 9db19b0..d337785 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -96,7 +96,9 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
             pos += self._filepos
         elif whence == os.SEEK_END:
             pos += self.size()
-        self._filepos = min(max(pos, 0L), self.size())
+        if pos < 0L:
+            raise IOError(errno.EINVAL, "Tried to seek to negative file offset.")
+        self._filepos = pos
 
     def tell(self):
         return self._filepos
@@ -428,6 +430,7 @@ class _BlockManager(object):
         self.copies = copies
         self._pending_write_size = 0
         self.threads_lock = threading.Lock()
+        self.padding_block = None
 
     @synchronized
     def alloc_bufferblock(self, blockid=None, starting_capacity=2**14, owner=None):
@@ -654,6 +657,17 @@ class _BlockManager(object):
         return self._bufferblocks.get(locator)
 
     @synchronized
+    def get_padding_block(self):
+        """Get a bufferblock 64 MB in size consisting of all zeros, used as padding
+        when using truncate() to extend the size of a file."""
+
+        if self.padding_block is None:
+            self.padding_block = self._alloc_bufferblock(starting_capacity=config.KEEP_BLOCK_SIZE)
+            self.padding_block.write_pointer = config.KEEP_BLOCK_SIZE
+            self.commit_bufferblock(self.padding_block, False)
+        return self.padding_block
+
+    @synchronized
     def delete_bufferblock(self, locator):
         self._delete_bufferblock(locator)
 
@@ -900,11 +914,11 @@ class ArvadosFile(object):
     @must_be_writable
     @synchronized
     def truncate(self, size):
-        """Shrink the size of the file.
+        """Shrink or expand the size of the file.
 
         If `size` is less than the size of the file, the file contents after
         `size` will be discarded.  If `size` is greater than the current size
-        of the file, an IOError will be raised.
+        of the file, it will be filled with zero bytes.
 
         """
         if size < self.size():
@@ -925,7 +939,17 @@ class ArvadosFile(object):
             self._segments = new_segs
             self.set_committed(False)
         elif size > self.size():
-            raise IOError(errno.EINVAL, "truncate() does not support extending the file size")
+            padding = self.parent._my_block_manager().get_padding_block()
+            diff = size - self.size()
+            while diff > config.KEEP_BLOCK_SIZE:
+                self._segments.append(Range(padding.blockid, self.size(), config.KEEP_BLOCK_SIZE, 0))
+                diff -= config.KEEP_BLOCK_SIZE
+            if diff > 0:
+                self._segments.append(Range(padding.blockid, self.size(), diff, 0))
+            self.set_committed(False)
+        else:
+            # size == self.size()
+            pass
 
     def readfrom(self, offset, size, num_retries, exact=False):
         """Read up to `size` bytes from the file starting at `offset`.
@@ -998,8 +1022,12 @@ class ArvadosFile(object):
         if len(data) == 0:
             return
 
+        print "Writing", len(data), "bytes to offset", offset, "current size is", self.size()
+
         if offset > self.size():
-            raise ArgumentError("Offset is past the end of the file")
+            print "Need to extend to", offset
+            self.truncate(offset)
+            print "Size is now", self.size()
 
         if len(data) > config.KEEP_BLOCK_SIZE:
             # Chunk it up into smaller writes
diff --git a/sdk/python/arvados/config.py b/sdk/python/arvados/config.py
index 8f2b265..0c4ccb0 100644
--- a/sdk/python/arvados/config.py
+++ b/sdk/python/arvados/config.py
@@ -15,6 +15,9 @@ else:
 KEEP_BLOCK_SIZE = 2**26
 EMPTY_BLOCK_LOCATOR = 'd41d8cd98f00b204e9800998ecf8427e+0'
 
+# This is the 64 MiB block consisting entirely of zeros
+PADDING_BLOCK_LOCATOR = '7f614da9329cd3aebf59b91aadc30bf0+67108864'
+
 def initialize(config_file=default_config_file):
     global _settings
     _settings = {}
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 1b66935..95106a7 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -91,6 +91,34 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
             self.assertFalse(c.modified())
 
+
+    def test_truncate2(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+        api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate2",
+                                                 "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
+                                                 "replication_desired":None},
+                                                {"uuid":"zzzzz-4zz18-mockcollection0",
+                                                 "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
+                                                 "portable_data_hash":"272da898abdf86ddc71994835e3155f8+95"})
+        with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
+                             api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 10)
+            self.assertEqual("0123456789", writer.read(12))
+
+            # extend file size
+            writer.truncate(12)
+
+            self.assertEqual(writer.size(), 12)
+            writer.seek(0, os.SEEK_SET)
+            self.assertEqual(b"0123456789\x00\x00", writer.read(12))
+
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            c.save_new("test_truncate2")
+            self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
+            self.assertFalse(c.modified())
+
     def test_write_to_end(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
         api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
@@ -268,6 +296,40 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
 
             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
 
+    def test_sparse_write(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({}, {})
+        with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 0)
+
+            text = "0123456789"
+            writer.seek(2)
+            writer.write(text)
+            self.assertEqual(writer.size(), 12)
+            writer.seek(0, os.SEEK_SET)
+            self.assertEqual(writer.read(), b"\x00\x00"+text)
+
+            self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n")
+
+
+    def test_sparse_write2(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({}, {})
+        with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 0)
+
+            text = "0123456789"
+            writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2)
+            writer.write(text)
+            self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12)
+            writer.seek(0, os.SEEK_SET)
+
+            self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n")
+
     def test_rewrite_on_empty_file(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index 624f1b8..d4b2207 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -76,7 +76,8 @@ class StreamFileReaderTestCase(unittest.TestCase):
     def test_seek_max_size(self):
         sfile = self.make_count_reader()
         sfile.seek(2, os.SEEK_END)
-        self.assertEqual(9, sfile.tell())
+        # POSIX permits seeking past end of file.
+        self.assertEqual(11, sfile.tell())
 
     def test_size(self):
         self.assertEqual(9, self.make_count_reader().size())

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list