[ARVADOS] created: 1.2.0-459-g3138f9673

Git user git at public.curoverse.com
Thu Nov 29 14:29:49 EST 2018


        at  3138f9673f6cdd557800e6252b6e9d605ddcf2b8 (commit)


commit 3138f9673f6cdd557800e6252b6e9d605ddcf2b8
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Nov 29 13:38:28 2018 -0500

    11419: Support text modes in Collection.open().
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 3281d78e2..ef3cd5273 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -15,6 +15,7 @@ import copy
 import errno
 import functools
 import hashlib
+import locale
 import logging
 import os
 import queue
@@ -57,10 +58,13 @@ class UnownedBlockError(Exception):
 
 
 class _FileLikeObjectBase(object):
-    def __init__(self, name, mode):
+    def __init__(self, name, mode, encoding=None):
         self.name = name
         self.mode = mode
         self.closed = False
+        if encoding is None:
+            encoding = locale.getpreferredencoding(False)
+        self.encoding = encoding
 
     @staticmethod
     def _before_close(orig_func):
@@ -86,11 +90,8 @@ class _FileLikeObjectBase(object):
 
 
 class ArvadosFileReaderBase(_FileLikeObjectBase):
-    def __init__(self, name, mode, num_retries=None):
-        super(ArvadosFileReaderBase, self).__init__(name, mode)
-        self._binary = 'b' in mode
-        if sys.version_info >= (3, 0) and not self._binary:
-            raise NotImplementedError("text mode {!r} is not implemented".format(mode))
+    def __init__(self, name, mode, num_retries=None, encoding=None):
+        super(ArvadosFileReaderBase, self).__init__(name, mode, encoding=encoding)
         self._filepos = 0
         self.num_retries = num_retries
         self._readline_cache = (None, None)
@@ -1268,8 +1269,8 @@ class ArvadosFileReader(ArvadosFileReaderBase):
 
     """
 
-    def __init__(self, arvadosfile, mode="r", num_retries=None):
-        super(ArvadosFileReader, self).__init__(arvadosfile.name, mode=mode, num_retries=num_retries)
+    def __init__(self, arvadosfile, mode="r", num_retries=None, encoding=None):
+        super(ArvadosFileReader, self).__init__(arvadosfile.name, mode=mode, num_retries=num_retries, encoding=encoding)
         self.arvadosfile = arvadosfile
 
     def size(self):
@@ -1278,6 +1279,11 @@ class ArvadosFileReader(ArvadosFileReaderBase):
     def stream_name(self):
         return self.arvadosfile.parent.stream_name()
 
+    def readinto(self, b):
+        data = self.read(len(b))
+        b[:len(data)] = data
+        return len(data)
+
     @_FileLikeObjectBase._before_close
     @retry_method
     def read(self, size=None, num_retries=None):
@@ -1320,8 +1326,8 @@ class ArvadosFileWriter(ArvadosFileReader):
 
     """
 
-    def __init__(self, arvadosfile, mode, num_retries=None):
-        super(ArvadosFileWriter, self).__init__(arvadosfile, mode=mode, num_retries=num_retries)
+    def __init__(self, arvadosfile, mode, num_retries=None, encoding=None):
+        super(ArvadosFileWriter, self).__init__(arvadosfile, mode=mode, num_retries=num_retries, encoding=encoding)
         self.arvadosfile.add_writer(self)
 
     def writable(self):
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 48fdaf03e..8241d4f6d 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -7,16 +7,18 @@ from future.utils import listitems, listvalues, viewkeys
 from builtins import str
 from past.builtins import basestring
 from builtins import object
+import ciso8601
+import datetime
+import errno
 import functools
+import hashlib
+import io
 import logging
 import os
 import re
-import errno
-import hashlib
-import datetime
-import ciso8601
-import time
+import sys
 import threading
+import time
 
 from collections import deque
 from stat import *
@@ -35,6 +37,21 @@ from arvados.retry import retry_method
 
 _logger = logging.getLogger('arvados.collection')
 
+
+if sys.version_info >= (3, 0):
+    TextIOWrapper = io.TextIOWrapper
+else:
+    class TextIOWrapper(io.TextIOWrapper):
+        """To maintain backward compatibility, cast str to unicode in
+        write('foo').
+
+        """
+        def write(self, data):
+            if isinstance(data, basestring):
+                data = unicode(data)
+            return super(TextIOWrapper, self).write(data)
+
+
 class CollectionBase(object):
     """Abstract base class for Collection classes."""
 
@@ -654,7 +671,7 @@ class RichCollectionBase(CollectionBase):
 
         return self.find_or_create(path, COLLECTION)
 
-    def open(self, path, mode="r"):
+    def open(self, path, mode="r", encoding=None):
         """Open a file-like object for access.
 
         :path:
@@ -676,17 +693,20 @@ class RichCollectionBase(CollectionBase):
             opens for reading and writing.  All writes are appended to
             the end of the file.  Writing does not affect the file pointer for
             reading.
+
         """
 
         if not re.search(r'^[rwa][bt]?\+?$', mode):
             raise errors.ArgumentError("Invalid mode {!r}".format(mode))
 
         if mode[0] == 'r' and '+' not in mode:
+            writing = False
             fclass = ArvadosFileReader
             arvfile = self.find(path)
         elif not self.writable():
             raise IOError(errno.EROFS, "Collection is read only")
         else:
+            writing = True
             fclass = ArvadosFileWriter
             arvfile = self.find_or_create(path, FILE)
 
@@ -698,7 +718,18 @@ class RichCollectionBase(CollectionBase):
         if mode[0] == 'w':
             arvfile.truncate(0)
 
-        return fclass(arvfile, mode=mode, num_retries=self.num_retries)
+        wrap_text_mode = 'b' not in mode
+        if wrap_text_mode:
+            mode = 'b' + mode
+
+        f = fclass(arvfile, mode=mode, num_retries=self.num_retries, encoding=encoding)
+        if wrap_text_mode:
+            if writing:
+                f = io.BufferedRandom(raw=f)
+            else:
+                f = io.BufferedReader(raw=f)
+            f = TextIOWrapper(buffer=f)
+        return f
 
     def modified(self):
         """Determine if the collection has been modified since last commited."""
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index ac18c44c6..de0100674 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -836,17 +836,58 @@ class CollectionOpenModes(run_test_server.TestCaseWithServers):
         with c.open('foo', 'wb') as f:
             f.write('foo')
         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
-            if sys.version_info >= (3, 0):
-                with self.assertRaises(NotImplementedError):
-                    c.open('foo', mode)
-            else:
-                with c.open('foo', mode) as f:
-                    if mode[0] == 'r' and '+' not in mode:
-                        self.assertEqual('foo', f.read(3))
-                    else:
-                        f.write('bar')
-                        f.seek(-3, os.SEEK_CUR)
-                        self.assertEqual('bar', f.read(3))
+            with c.open('foo', mode) as f:
+                if mode[0] == 'r' and '+' not in mode:
+                    self.assertEqual('foo', f.read(3))
+                else:
+                    f.write('bar')
+                    f.seek(0, os.SEEK_SET)
+                    self.assertEqual('bar', f.read(3))
+
+
+class TextModes(run_test_server.TestCaseWithServers):
+
+    def setUp(self):
+        arvados.config.KEEP_BLOCK_SIZE = 4
+        if sys.version_info < (3, 0):
+            import unicodedata
+            self.sailboat = unicodedata.lookup('SAILBOAT')
+            self.snowman = unicodedata.lookup('SNOWMAN')
+        else:
+            self.sailboat = '\N{SAILBOAT}'
+            self.snowman = '\N{SNOWMAN}'
+
+    def tearDown(self):
+        arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
+
+    def test_read_sailboat_across_block_boundary(self):
+        c = Collection()
+        f = c.open('sailboats', 'wb')
+        data = self.sailboat.encode('utf-8')
+        f.write(data)
+        f.write(data[:1])
+        f.write(data[1:])
+        f.write(b'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
+
+        f = c.open('sailboats', 'r')
+        string = f.readline()
+        self.assertEqual(string, self.sailboat+self.sailboat+'\n')
+        f.close()
+
+    def test_write_snowman_across_block_boundary(self):
+        c = Collection()
+        f = c.open('snowmany', 'w')
+        data = self.snowman
+        f.write(data+data+'\n'+data+'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
+
+        f = c.open('snowmany', 'r')
+        self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
+        self.assertEqual(f.readline(), self.snowman+'\n')
+        f.close()
 
 
 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list