[ARVADOS] updated: 1.3.0-67-gdaf8e382e
Git user
git at public.curoverse.com
Fri Dec 21 16:57:36 EST 2018
Summary of changes:
sdk/python/arvados/_normalize_stream.py | 9 +++++++--
sdk/python/arvados/collection.py | 15 ++++++---------
sdk/python/tests/test_collections.py | 9 ++++++++-
3 files changed, 21 insertions(+), 12 deletions(-)
via daf8e382e4d37d8a73cc2b6166c2013cb7572f77 (commit)
from 3016fbd5fba0273350311d336f1f7c929f6fc20f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit daf8e382e4d37d8a73cc2b6166c2013cb7572f77
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Fri Dec 21 18:56:31 2018 -0300
14539: Move escaping to normalize_manifest.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/python/arvados/_normalize_stream.py b/sdk/python/arvados/_normalize_stream.py
index 47b66c82d..9caef764e 100644
--- a/sdk/python/arvados/_normalize_stream.py
+++ b/sdk/python/arvados/_normalize_stream.py
@@ -5,6 +5,11 @@
from __future__ import absolute_import
from . import config
+import re
+
+def escape(path):
+ return re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path).replace(' ', '\\040')
+
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
@@ -16,7 +21,7 @@ def normalize_stream(stream_name, stream):
"""
- stream_name = stream_name.replace(' ', '\\040')
+ stream_name = escape(stream_name)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
@@ -38,7 +43,7 @@ def normalize_stream(stream_name, stream):
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout = streamfile.replace(' ', '\\040')
+ fout = escape(streamfile)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index a24552909..7ad07cc60 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -26,7 +26,7 @@ from stat import *
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
from .keep import KeepLocator, KeepClient
from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
from ._ranges import Range, LocatorAndRange
from .safeapi import ThreadSafeApiCache
import arvados.config as config
@@ -562,11 +562,6 @@ class RichCollectionBase(CollectionBase):
def stream_name(self):
raise NotImplementedError()
- def _unescape_manifest_path(self, path):
- return re.sub('\\\\([0-3][0-7][0-7])', lambda m: chr(int(m.group(1), 8)), path)
-
- def _escape_manifest_path(self, path):
- return path.replace('\\', '\\134').replace(' ', '\\040')
@synchronized
def has_remote_blocks(self):
@@ -606,8 +601,6 @@ class RichCollectionBase(CollectionBase):
pathcomponents = path.split("/", 1)
if pathcomponents[0]:
- # Escape '\' & ' ' chars on path names
- pathcomponents[0] = self._escape_manifest_path(pathcomponents[0])
item = self._items.get(pathcomponents[0])
if len(pathcomponents) == 1:
if item is None:
@@ -1717,6 +1710,9 @@ class Collection(RichCollectionBase):
_block_re = re.compile(r'[0-9a-f]{32}\+(\d+)(\+\S+)*')
_segment_re = re.compile(r'(\d+):(\d+):(\S+)')
+ def _unescape_manifest_path(self, path):
+ return re.sub('\\\\([0-3][0-7][0-7])', lambda m: chr(int(m.group(1), 8)), path)
+
@synchronized
def _import_manifest(self, manifest_text):
"""Import a manifest into a `Collection`.
@@ -1844,7 +1840,8 @@ class Subcollection(RichCollectionBase):
def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
"""Encode empty directories by using an \056-named (".") empty file"""
if len(self._items) == 0:
- return "%s %s 0:0:\\056\n" % (stream_name, config.EMPTY_BLOCK_LOCATOR)
+ return "%s %s 0:0:\\056\n" % (
+ escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
return super(Subcollection, self)._get_manifest_text(stream_name,
strip, normalize,
only_committed)
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index bfc1d1970..3a4dabfea 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -952,12 +952,19 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
self.assertIs(c.find("./nonexistant.txt"), None)
self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
- def test_escaped_paths_dont_get_unescaped(self):
+ def test_escaped_paths_dont_get_unescaped_on_manifest(self):
# Dir & file names are literally '\056' (escaped form: \134056)
manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
c = Collection(manifest)
self.assertEqual(c.portable_manifest_text(), manifest)
+ def test_escaped_paths_do_get_unescaped_on_listing(self):
+ # Dir & file names are literally '\056' (escaped form: \134056)
+ manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+ c = Collection(manifest)
+ self.assertIn('\\056 Test', c.keys())
+ self.assertIn('\\056', c['\\056 Test'].keys())
+
def test_make_empty_dir_with_escaped_chars(self):
c = Collection()
c.mkdirs('./Empty\\056Dir')
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list