[ARVADOS] created: 1.2.0-459-g3138f9673
Git user
git at public.curoverse.com
Thu Nov 29 14:29:49 EST 2018
at 3138f9673f6cdd557800e6252b6e9d605ddcf2b8 (commit)
commit 3138f9673f6cdd557800e6252b6e9d605ddcf2b8
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Nov 29 13:38:28 2018 -0500
11419: Support text modes in Collection.open().
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 3281d78e2..ef3cd5273 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -15,6 +15,7 @@ import copy
import errno
import functools
import hashlib
+import locale
import logging
import os
import queue
@@ -57,10 +58,13 @@ class UnownedBlockError(Exception):
class _FileLikeObjectBase(object):
- def __init__(self, name, mode):
+ def __init__(self, name, mode, encoding=None):
self.name = name
self.mode = mode
self.closed = False
+ if encoding is None:
+ encoding = locale.getpreferredencoding(False)
+ self.encoding = encoding
@staticmethod
def _before_close(orig_func):
@@ -86,11 +90,8 @@ class _FileLikeObjectBase(object):
class ArvadosFileReaderBase(_FileLikeObjectBase):
- def __init__(self, name, mode, num_retries=None):
- super(ArvadosFileReaderBase, self).__init__(name, mode)
- self._binary = 'b' in mode
- if sys.version_info >= (3, 0) and not self._binary:
- raise NotImplementedError("text mode {!r} is not implemented".format(mode))
+ def __init__(self, name, mode, num_retries=None, encoding=None):
+ super(ArvadosFileReaderBase, self).__init__(name, mode, encoding=encoding)
self._filepos = 0
self.num_retries = num_retries
self._readline_cache = (None, None)
@@ -1268,8 +1269,8 @@ class ArvadosFileReader(ArvadosFileReaderBase):
"""
- def __init__(self, arvadosfile, mode="r", num_retries=None):
- super(ArvadosFileReader, self).__init__(arvadosfile.name, mode=mode, num_retries=num_retries)
+ def __init__(self, arvadosfile, mode="r", num_retries=None, encoding=None):
+ super(ArvadosFileReader, self).__init__(arvadosfile.name, mode=mode, num_retries=num_retries, encoding=encoding)
self.arvadosfile = arvadosfile
def size(self):
@@ -1278,6 +1279,11 @@ class ArvadosFileReader(ArvadosFileReaderBase):
def stream_name(self):
return self.arvadosfile.parent.stream_name()
+ def readinto(self, b):
+ data = self.read(len(b))
+ b[:len(data)] = data
+ return len(data)
+
@_FileLikeObjectBase._before_close
@retry_method
def read(self, size=None, num_retries=None):
@@ -1320,8 +1326,8 @@ class ArvadosFileWriter(ArvadosFileReader):
"""
- def __init__(self, arvadosfile, mode, num_retries=None):
- super(ArvadosFileWriter, self).__init__(arvadosfile, mode=mode, num_retries=num_retries)
+ def __init__(self, arvadosfile, mode, num_retries=None, encoding=None):
+ super(ArvadosFileWriter, self).__init__(arvadosfile, mode=mode, num_retries=num_retries, encoding=encoding)
self.arvadosfile.add_writer(self)
def writable(self):
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 48fdaf03e..8241d4f6d 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -7,16 +7,18 @@ from future.utils import listitems, listvalues, viewkeys
from builtins import str
from past.builtins import basestring
from builtins import object
+import ciso8601
+import datetime
+import errno
import functools
+import hashlib
+import io
import logging
import os
import re
-import errno
-import hashlib
-import datetime
-import ciso8601
-import time
+import sys
import threading
+import time
from collections import deque
from stat import *
@@ -35,6 +37,21 @@ from arvados.retry import retry_method
_logger = logging.getLogger('arvados.collection')
+
+if sys.version_info >= (3, 0):
+ TextIOWrapper = io.TextIOWrapper
+else:
+ class TextIOWrapper(io.TextIOWrapper):
+ """To maintain backward compatibility, cast str to unicode in
+ write('foo').
+
+ """
+ def write(self, data):
+ if isinstance(data, basestring):
+ data = unicode(data)
+ return super(TextIOWrapper, self).write(data)
+
+
class CollectionBase(object):
"""Abstract base class for Collection classes."""
@@ -654,7 +671,7 @@ class RichCollectionBase(CollectionBase):
return self.find_or_create(path, COLLECTION)
- def open(self, path, mode="r"):
+ def open(self, path, mode="r", encoding=None):
"""Open a file-like object for access.
:path:
@@ -676,17 +693,20 @@ class RichCollectionBase(CollectionBase):
opens for reading and writing. All writes are appended to
the end of the file. Writing does not affect the file pointer for
reading.
+
"""
if not re.search(r'^[rwa][bt]?\+?$', mode):
raise errors.ArgumentError("Invalid mode {!r}".format(mode))
if mode[0] == 'r' and '+' not in mode:
+ writing = False
fclass = ArvadosFileReader
arvfile = self.find(path)
elif not self.writable():
raise IOError(errno.EROFS, "Collection is read only")
else:
+ writing = True
fclass = ArvadosFileWriter
arvfile = self.find_or_create(path, FILE)
@@ -698,7 +718,18 @@ class RichCollectionBase(CollectionBase):
if mode[0] == 'w':
arvfile.truncate(0)
- return fclass(arvfile, mode=mode, num_retries=self.num_retries)
+ wrap_text_mode = 'b' not in mode
+ if wrap_text_mode:
+ mode = 'b' + mode
+
+ f = fclass(arvfile, mode=mode, num_retries=self.num_retries, encoding=encoding)
+ if wrap_text_mode:
+ if writing:
+ f = io.BufferedRandom(raw=f)
+ else:
+ f = io.BufferedReader(raw=f)
+ f = TextIOWrapper(buffer=f)
+ return f
def modified(self):
"""Determine if the collection has been modified since last commited."""
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index ac18c44c6..de0100674 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -836,17 +836,58 @@ class CollectionOpenModes(run_test_server.TestCaseWithServers):
with c.open('foo', 'wb') as f:
f.write('foo')
for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
- if sys.version_info >= (3, 0):
- with self.assertRaises(NotImplementedError):
- c.open('foo', mode)
- else:
- with c.open('foo', mode) as f:
- if mode[0] == 'r' and '+' not in mode:
- self.assertEqual('foo', f.read(3))
- else:
- f.write('bar')
- f.seek(-3, os.SEEK_CUR)
- self.assertEqual('bar', f.read(3))
+ with c.open('foo', mode) as f:
+ if mode[0] == 'r' and '+' not in mode:
+ self.assertEqual('foo', f.read(3))
+ else:
+ f.write('bar')
+ f.seek(0, os.SEEK_SET)
+ self.assertEqual('bar', f.read(3))
+
+
+class TextModes(run_test_server.TestCaseWithServers):
+
+ def setUp(self):
+ arvados.config.KEEP_BLOCK_SIZE = 4
+ if sys.version_info < (3, 0):
+ import unicodedata
+ self.sailboat = unicodedata.lookup('SAILBOAT')
+ self.snowman = unicodedata.lookup('SNOWMAN')
+ else:
+ self.sailboat = '\N{SAILBOAT}'
+ self.snowman = '\N{SNOWMAN}'
+
+ def tearDown(self):
+ arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
+
+ def test_read_sailboat_across_block_boundary(self):
+ c = Collection()
+ f = c.open('sailboats', 'wb')
+ data = self.sailboat.encode('utf-8')
+ f.write(data)
+ f.write(data[:1])
+ f.write(data[1:])
+ f.write(b'\n')
+ f.close()
+ self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
+
+ f = c.open('sailboats', 'r')
+ string = f.readline()
+ self.assertEqual(string, self.sailboat+self.sailboat+'\n')
+ f.close()
+
+ def test_write_snowman_across_block_boundary(self):
+ c = Collection()
+ f = c.open('snowmany', 'w')
+ data = self.snowman
+ f.write(data+data+'\n'+data+'\n')
+ f.close()
+ self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
+
+ f = c.open('snowmany', 'r')
+ self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
+ self.assertEqual(f.readline(), self.snowman+'\n')
+ f.close()
class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list