[ARVADOS] created: 1.3.0-71-g13e7ad813
Git user
git at public.curoverse.com
Fri Dec 21 17:01:24 EST 2018
at 13e7ad8135a0bafc3d1d225ff7e4c62de4f3b43f (commit)
commit 13e7ad8135a0bafc3d1d225ff7e4c62de4f3b43f
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Thu Dec 20 16:32:39 2018 -0300
14539: Escape backslash & space chars on file/dir names, with tests.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/python/arvados/_normalize_stream.py b/sdk/python/arvados/_normalize_stream.py
index 47b66c82d..9caef764e 100644
--- a/sdk/python/arvados/_normalize_stream.py
+++ b/sdk/python/arvados/_normalize_stream.py
@@ -5,6 +5,11 @@
from __future__ import absolute_import
from . import config
+import re
+
+def escape(path):
+ return re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path).replace(' ', '\\040')
+
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
@@ -16,7 +21,7 @@ def normalize_stream(stream_name, stream):
"""
- stream_name = stream_name.replace(' ', '\\040')
+ stream_name = escape(stream_name)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
@@ -38,7 +43,7 @@ def normalize_stream(stream_name, stream):
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout = streamfile.replace(' ', '\\040')
+ fout = escape(streamfile)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index c2517c618..7ad07cc60 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -26,7 +26,7 @@ from stat import *
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
from .keep import KeepLocator, KeepClient
from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
from ._ranges import Range, LocatorAndRange
from .safeapi import ThreadSafeApiCache
import arvados.config as config
@@ -562,6 +562,7 @@ class RichCollectionBase(CollectionBase):
def stream_name(self):
raise NotImplementedError()
+
@synchronized
def has_remote_blocks(self):
"""Recursively check for a +R segment locator signature."""
@@ -600,9 +601,6 @@ class RichCollectionBase(CollectionBase):
pathcomponents = path.split("/", 1)
if pathcomponents[0]:
- # Don't allow naming files/dirs \\056
- if pathcomponents[0] == "\\056":
- raise IOError(errno.EINVAL, "Invalid name", pathcomponents[0])
item = self._items.get(pathcomponents[0])
if len(pathcomponents) == 1:
if item is None:
@@ -1842,7 +1840,8 @@ class Subcollection(RichCollectionBase):
def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
"""Encode empty directories by using an \056-named (".") empty file"""
if len(self._items) == 0:
- return "%s %s 0:0:\\056\n" % (stream_name, config.EMPTY_BLOCK_LOCATOR)
+ return "%s %s 0:0:\\056\n" % (
+ escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
return super(Subcollection, self)._get_manifest_text(stream_name,
strip, normalize,
only_committed)
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 55dd0838f..3a4dabfea 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -952,23 +952,36 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
self.assertIs(c.find("./nonexistant.txt"), None)
self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
+ def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+ # Dir & file names are literally '\056' (escaped form: \134056)
+ manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+ c = Collection(manifest)
+ self.assertEqual(c.portable_manifest_text(), manifest)
+
+ def test_escaped_paths_do_get_unescaped_on_listing(self):
+ # Dir & file names are literally '\056' (escaped form: \134056)
+ manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+ c = Collection(manifest)
+ self.assertIn('\\056 Test', c.keys())
+ self.assertIn('\\056', c['\\056 Test'].keys())
+
+ def test_make_empty_dir_with_escaped_chars(self):
+ c = Collection()
+ c.mkdirs('./Empty\\056Dir')
+ self.assertEqual(c.portable_manifest_text(),
+ './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+ def test_make_empty_dir_with_spaces(self):
+ c = Collection()
+ c.mkdirs('./foo bar/baz waz')
+ self.assertEqual(c.portable_manifest_text(),
+ './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
def test_remove_in_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
- def test_create_dot_file(self):
- c = Collection()
- with self.assertRaises(IOError):
- with c.open("./dir/\\056", "wb") as f:
- f.write("Should not be allowed")
-
- def test_create_file_inside_dot_dir(self):
- c = Collection()
- with self.assertRaises(IOError):
- with c.open("./dir/\\056/foo", "wb") as f:
- f.write("Should not be allowed")
-
def test_remove_empty_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")
commit bf5c9ddab89fb7392950c9f6edd83e5e497969f2
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Mon Dec 17 21:54:07 2018 -0300
14539: Fixes fuse tests.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index fb282d1aa..6a00cdb6b 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -158,7 +158,7 @@ class FuseMagicTest(MountTestBase):
self.assertIn(self.testcollection,
llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
self.assertIn(self.test_project, mount_ls)
- self.assertIn(self.test_project,
+ self.assertIn(self.test_project,
llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
with self.assertRaises(OSError):
@@ -617,7 +617,8 @@ class FuseRmTest(MountTestBase):
# Can't have empty directories :-( so manifest will be empty.
collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
- self.assertEqual(collection2["manifest_text"], "")
+ self.assertRegexpMatches(collection2["manifest_text"],
+ r'./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
self.pool.apply(fuseRmTestHelperRmdir, (self.mounttmp,))
@@ -674,7 +675,7 @@ class FuseMvFileTest(MountTestBase):
collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
self.assertRegexpMatches(collection2["manifest_text"],
- r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+ r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt\n\./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
def fuseRenameTestHelper(mounttmp):
commit 3b0327f9a02dcde5bac5ec37433ff6fb1c42dc51
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Mon Dec 17 20:35:31 2018 -0300
14539: Tests that '\056' cannot be used as a file or directory name.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/build/run-tests.sh b/build/run-tests.sh
index cd44347cb..50c578269 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -37,7 +37,7 @@ CONFIGSRC=path Dir with api server config files to copy into source tree.
(If none given, leave config files alone in source tree.)
services/api_test="TEST=test/functional/arvados/v1/collections_controller_test.rb"
Restrict apiserver tests to the given file
-sdk/python_test="--test-suite test.test_keep_locator"
+sdk/python_test="--test-suite tests.test_keep_locator"
Restrict Python SDK tests to the given class
apps/workbench_test="TEST=test/integration/pipeline_instances_test.rb"
Restrict Workbench tests to the given file
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index b5c0c1c52..55dd0838f 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -957,6 +957,18 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
c.remove("foo/count2.txt")
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
+ def test_create_dot_file(self):
+ c = Collection()
+ with self.assertRaises(IOError):
+ with c.open("./dir/\\056", "wb") as f:
+ f.write("Should not be allowed")
+
+ def test_create_file_inside_dot_dir(self):
+ c = Collection()
+ with self.assertRaises(IOError):
+ with c.open("./dir/\\056/foo", "wb") as f:
+ f.write("Should not be allowed")
+
def test_remove_empty_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")
commit 7d804c0b62975b4059dea757dbc2fbd0320c1497
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Mon Dec 17 18:18:52 2018 -0300
14539: Persists empty directories by adding a '\056' empty file.
Also:
* Fixes test
* Errors out when trying to open a file/dir with '\056' on its path.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 627f0346d..c2517c618 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -600,6 +600,9 @@ class RichCollectionBase(CollectionBase):
pathcomponents = path.split("/", 1)
if pathcomponents[0]:
+ # Don't allow naming files/dirs \\056
+ if pathcomponents[0] == "\\056":
+ raise IOError(errno.EINVAL, "Invalid name", pathcomponents[0])
item = self._items.get(pathcomponents[0])
if len(pathcomponents) == 1:
if item is None:
@@ -1058,7 +1061,9 @@ class RichCollectionBase(CollectionBase):
if stream:
buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
- buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True, only_committed=only_committed))
+ buf.append(self[dirname].manifest_text(
+ stream_name=os.path.join(stream_name, dirname),
+ strip=strip, normalize=True, only_committed=only_committed))
return "".join(buf)
else:
if strip:
@@ -1833,6 +1838,15 @@ class Subcollection(RichCollectionBase):
self.name = newname
self.lock = self.parent.root_collection().lock
+ @synchronized
+ def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
+ """Encode empty directories by using an \056-named (".") empty file"""
+ if len(self._items) == 0:
+ return "%s %s 0:0:\\056\n" % (stream_name, config.EMPTY_BLOCK_LOCATOR)
+ return super(Subcollection, self)._get_manifest_text(stream_name,
+ strip, normalize,
+ only_committed)
+
class CollectionReader(Collection):
"""A read-only collection object.
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index de0100674..b5c0c1c52 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -955,7 +955,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
def test_remove_in_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")
- self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+ self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
def test_remove_empty_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list