[ARVADOS] updated: 1aa683826a4e9d1956167f812e12a9386de58c81

Thu Nov 13 14:53:24 EST 2014

Summary of changes:
 sdk/python/arvados/collection.py      | 24 ++++++++++++-----
 sdk/python/arvados/stream.py          |  7 ++---
 sdk/python/tests/test_collections.py  |  4 +++
 services/api/app/models/collection.rb |  6 ++---
 services/fuse/setup.py                |  2 +-
 services/fuse/tests/test_mount.py     | 51 ++++++++++++++++++++++++-----------
 6 files changed, 65 insertions(+), 29 deletions(-)

       via  1aa683826a4e9d1956167f812e12a9386de58c81 (commit)
       via  f78caf95ed818b476a1b9de1c4fc868aa020f185 (commit)
       via  069b22b2d1196d0437253e60e616840ea141e1a9 (commit)
       via  4d01f18d6a1129fb3f2f0efc4bda31795519fda2 (commit)
       via  e363a7973a911a6484b24d4e7f92f363d4d3e521 (commit)
      from  53192c327dcce6159d21f6cc27f7d5c0bfc9e7b0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 1aa683826a4e9d1956167f812e12a9386de58c81
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 13 14:45:17 2014 -0500

    4363: Compare unicode strings (ignore utf-8 encoding differences). Bump fuse->sdk version dependency.

diff --git a/services/fuse/setup.py b/services/fuse/setup.py
index 1e72fee..5cf9d23 100644
--- a/services/fuse/setup.py
+++ b/services/fuse/setup.py
@@ -36,7 +36,7 @@ setup(name='arvados_fuse',
         'bin/arv-mount'
         ],
       install_requires=[
-        'arvados-python-client>=0.1.20141103223015.68dae83',
+        'arvados-python-client>=0.1.20141113192857.f78caf9',
         'llfuse',
         'python-daemon'
         ],
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index 7e5325d..262e85a 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -79,7 +79,7 @@ class FuseMountTest(MountTestBase):
 
         self._utf8 = ["\xe2\x9c\x8c",     # victory sign
                       "\xe2\x9b\xb5",     # sailboat
-                      "\xf0\x9f\x98\xb1", # scream
+                      # "\xf0\x9f\x98\xb1", # scream. doesn't work!!
                       ]
         cw.start_new_stream('edgecases/utf8')
         for f in self._utf8:
@@ -93,7 +93,8 @@ class FuseMountTest(MountTestBase):
         path = self.mounttmp
         if subdir:
             path = os.path.join(path, subdir)
-        self.assertEqual(sorted(expect_content), sorted(os.listdir(path)))
+        self.assertEqual(sorted([fn.decode('utf-8') for fn in expect_content]),
+                         sorted([fn.decode('utf-8') for fn in os.listdir(path)]))
 
     def runTest(self):
         # Create the request handler

commit f78caf95ed818b476a1b9de1c4fc868aa020f185
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 13 14:28:57 2014 -0500

    4363: Do not blow up when manifests contain utf-8 characters.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 0f49438..0fe7b3e 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -42,14 +42,14 @@ def normalize_stream(s, stream):
                 if segmentoffset == current_span[1]:
                     current_span[1] += segment[arvados.SEGMENTSIZE]
                 else:
-                    stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+                    stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
                     current_span = [segmentoffset, segmentoffset + segment[arvados.SEGMENTSIZE]]
 
         if current_span is not None:
-            stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+            stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
 
         if not stream[f]:
-            stream_tokens.append("0:0:{0}".format(fout))
+            stream_tokens.append(u"0:0:{0}".format(fout))
 
     return stream_tokens
 
@@ -186,9 +186,14 @@ class CollectionReader(CollectionBase):
                     self._manifest_locator,
                     error_via_api,
                     error_via_keep))
-        self._streams = [sline.split()
-                         for sline in self._manifest_text.split("\n")
-                         if sline]
+        if type(self._manifest_text) == unicode:
+            unicode_manifest = self._manifest_text
+        else:
+            unicode_manifest = self._manifest_text.decode('utf-8')
+        self._streams = [
+            sline.split()
+            for sline in unicode_manifest.split("\n")
+            if sline]
 
     def normalize(self):
         self._populate()
@@ -211,7 +216,8 @@ class CollectionReader(CollectionBase):
         # Regenerate the manifest text based on the normalized streams
         self._manifest_text = ''.join(
             [StreamReader(stream, keep=self._my_keep()).manifest_text()
-             for stream in self._streams])
+             for stream in self._streams]
+            ).encode('utf-8')
 
     def open(self, streampath, filename=None):
         """open(streampath[, filename]) -> file-like object
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index c263dd8..cd8153e 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -108,11 +108,12 @@ class StreamFileReader(ArvadosFileBase):
         # Older SDK provided a name() method.
         # This class provides both, for maximum compatibility.
         def __call__(self):
-            return self
+            return self.decode('utf-8')
 
 
     def __init__(self, stream, segments, name):
-        super(StreamFileReader, self).__init__(self._NameAttribute(name), 'rb')
+        super(StreamFileReader, self).__init__(
+            self._NameAttribute(name.encode('utf-8')), 'rb')
         self._stream = stream
         self.segments = segments
         self._filepos = 0L
@@ -327,7 +328,7 @@ class StreamReader(object):
                 manifest_text.append(m.group(0))
         else:
             manifest_text.extend([d[LOCATOR] for d in self._data_locators])
-        manifest_text.extend([' '.join(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], f.name().replace(' ', '\\040'))
+        manifest_text.extend([' '.join([u"{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], f.name().replace(' ', '\\040'))
                                         for seg in f.segments])
                               for f in self._files.values()])
         return ' '.join(manifest_text) + '\n'
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 254a29f..be62ad4 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -209,6 +209,10 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
 """
         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
 
+        m_utf8 = """./\xe2\x9b\xb5 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:\xf0\x9f\x98\xb1
+"""
+        self.assertEqual(arvados.CollectionReader(m_utf8, self.api_client).manifest_text(normalize=True), m_utf8)
+
     def test_locators_and_ranges(self):
         blocks2 = [['a', 10, 0],
                   ['b', 10, 10],
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index accd2cc..28fc70e 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -76,7 +76,7 @@ class Collection < ArvadosModel
 
   def set_portable_data_hash
     if (self.portable_data_hash.nil? or (self.portable_data_hash == "") or (manifest_text_changed? and !portable_data_hash_changed?))
-      self.portable_data_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
+      self.portable_data_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.bytesize}"
     elsif portable_data_hash_changed?
       begin
         loc = Keep::Locator.parse!(self.portable_data_hash)
@@ -84,7 +84,7 @@ class Collection < ArvadosModel
         if loc.size
           self.portable_data_hash = loc.to_s
         else
-          self.portable_data_hash = "#{loc.hash}+#{self.manifest_text.length}"
+          self.portable_data_hash = "#{loc.hash}+#{self.manifest_text.bytesize}"
         end
       rescue ArgumentError => e
         errors.add(:portable_data_hash, "#{e}")
@@ -96,7 +96,7 @@ class Collection < ArvadosModel
 
   def ensure_hash_matches_manifest_text
     if manifest_text_changed? or portable_data_hash_changed?
-      computed_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
+      computed_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.bytesize}"
       unless computed_hash == portable_data_hash
         logger.debug "(computed) '#{computed_hash}' != '#{portable_data_hash}' (provided)"
         errors.add(:portable_data_hash, "does not match hash of manifest_text")
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index 3a59084..7e5325d 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -77,6 +77,15 @@ class FuseMountTest(MountTestBase):
             cw.start_new_file('x/x')
             cw.write('x')
 
+        self._utf8 = ["\xe2\x9c\x8c",     # victory sign
+                      "\xe2\x9b\xb5",     # sailboat
+                      "\xf0\x9f\x98\xb1", # scream
+                      ]
+        cw.start_new_stream('edgecases/utf8')
+        for f in self._utf8:
+            cw.start_new_file(f)
+            cw.write(f)
+
         self.testcollection = cw.finish()
         self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
 
@@ -105,9 +114,10 @@ class FuseMountTest(MountTestBase):
         self.assertDirContents('dir2', ['thing5.txt', 'thing6.txt', 'dir3'])
         self.assertDirContents('dir2/dir3', ['thing7.txt', 'thing8.txt'])
         self.assertDirContents('edgecases',
-                               "dirs/:/_/__/.../-/*/\x01\\/ ".split("/"))
+                               "dirs/utf8/:/_/__/.../-/*/\x01\\/ ".split("/"))
         self.assertDirContents('edgecases/dirs',
                                ":/__/.../-/*/\x01\\/ ".split("/"))
+        self.assertDirContents('edgecases/utf8', self._utf8)
 
         files = {'thing1.txt': 'data 1',
                  'thing2.txt': 'data 2',

commit 069b22b2d1196d0437253e60e616840ea141e1a9
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 13 11:54:36 2014 -0500

    4363: Test edge cases as directory names, too.

diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index eb89a2a..3a59084 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -72,6 +72,11 @@ class FuseMountTest(MountTestBase):
             cw.start_new_file(f)
             cw.write('x')
 
+        for f in ":/../.../-/*/\x01\\/ ".split("/"):
+            cw.start_new_stream('edgecases/dirs/' + f)
+            cw.start_new_file('x/x')
+            cw.write('x')
+
         self.testcollection = cw.finish()
         self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
 
@@ -100,7 +105,9 @@ class FuseMountTest(MountTestBase):
         self.assertDirContents('dir2', ['thing5.txt', 'thing6.txt', 'dir3'])
         self.assertDirContents('dir2/dir3', ['thing7.txt', 'thing8.txt'])
         self.assertDirContents('edgecases',
-                               ":/_/__/.../-/*/\x01\\/ ".split("/"))
+                               "dirs/:/_/__/.../-/*/\x01\\/ ".split("/"))
+        self.assertDirContents('edgecases/dirs',
+                               ":/__/.../-/*/\x01\\/ ".split("/"))
 
         files = {'thing1.txt': 'data 1',
                  'thing2.txt': 'data 2',

commit 4d01f18d6a1129fb3f2f0efc4bda31795519fda2
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 13 11:42:08 2014 -0500

    4363: Test that munged filenames show up in os.listdir().

diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index f9d06de..eb89a2a 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -67,9 +67,20 @@ class FuseMountTest(MountTestBase):
         cw.start_new_file('thing8.txt')
         cw.write("data 8")
 
+        cw.start_new_stream('edgecases')
+        for f in ":/./../.../-/*/\x01\\/ ".split("/"):
+            cw.start_new_file(f)
+            cw.write('x')
+
         self.testcollection = cw.finish()
         self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
 
+    def assertDirContents(self, subdir, expect_content):
+        path = self.mounttmp
+        if subdir:
+            path = os.path.join(path, subdir)
+        self.assertEqual(sorted(expect_content), sorted(os.listdir(path)))
+
     def runTest(self):
         # Create the request handler
         operations = fuse.Operations(os.getuid(), os.getgid())
@@ -83,21 +94,13 @@ class FuseMountTest(MountTestBase):
         operations.initlock.wait()
 
         # now check some stuff
-        d1 = os.listdir(self.mounttmp)
-        d1.sort()
-        self.assertEqual(['dir1', 'dir2', 'thing1.txt', 'thing2.txt'], d1)
-
-        d2 = os.listdir(os.path.join(self.mounttmp, 'dir1'))
-        d2.sort()
-        self.assertEqual(['thing3.txt', 'thing4.txt'], d2)
-
-        d3 = os.listdir(os.path.join(self.mounttmp, 'dir2'))
-        d3.sort()
-        self.assertEqual(['dir3', 'thing5.txt', 'thing6.txt'], d3)
-
-        d4 = os.listdir(os.path.join(self.mounttmp, 'dir2/dir3'))
-        d4.sort()
-        self.assertEqual(['thing7.txt', 'thing8.txt'], d4)
+        self.assertDirContents(None, ['thing1.txt', 'thing2.txt',
+                                      'edgecases', 'dir1', 'dir2'])
+        self.assertDirContents('dir1', ['thing3.txt', 'thing4.txt'])
+        self.assertDirContents('dir2', ['thing5.txt', 'thing6.txt', 'dir3'])
+        self.assertDirContents('dir2/dir3', ['thing7.txt', 'thing8.txt'])
+        self.assertDirContents('edgecases',
+                               ":/_/__/.../-/*/\x01\\/ ".split("/"))
 
         files = {'thing1.txt': 'data 1',
                  'thing2.txt': 'data 2',

commit e363a7973a911a6484b24d4e7f92f363d4d3e521
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 13 11:40:23 2014 -0500

    4363: Reject NUL characters in filenames in CollectionWriter.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index fa782a1..0f49438 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -472,6 +472,10 @@ class CollectionWriter(CollectionBase):
             raise errors.AssertionError(
                 "Manifest filenames cannot contain whitespace: %s" %
                 newfilename)
+        elif re.search(r'\x00', newfilename):
+            raise errors.AssertionError(
+                "Manifest filenames cannot contain NUL characters: %s" %
+                newfilename)
         self._current_file_name = newfilename
 
     def current_file_name(self):

-----------------------------------------------------------------------


hooks/post-receive
--