[ARVADOS] updated: 9f2325b6c76c7e57d9410bb18f804f4754ae85b3

git at public.curoverse.com git at public.curoverse.com
Sat Jun 13 01:12:19 EDT 2015


Summary of changes:
 sdk/ruby/lib/arvados/keep.rb        | 32 ++++++++++----
 sdk/ruby/test/test_keep_manifest.rb | 84 ++++++++++++++++++++++++++++---------
 2 files changed, 87 insertions(+), 29 deletions(-)

       via  9f2325b6c76c7e57d9410bb18f804f4754ae85b3 (commit)
      from  b92d4bfa6f4378756df5d28360bb708e9251917a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 9f2325b6c76c7e57d9410bb18f804f4754ae85b3
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jun 11 01:58:49 2015 -0400

    6277: Catch whitespace errors, and "." and ".." in paths. Rename valid? to validate!.

diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb
index bcab5fc..62f231f 100644
--- a/sdk/ruby/lib/arvados/keep.rb
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -97,8 +97,8 @@ module Keep
   end
 
   class Manifest
-    STREAM_REGEXP = /(\.)((\/+.*[^\/])*)$/
-    FILE_REGEXP = /^[[:digit:]]+:[[:digit:]]+:(?!\/).*[^\/]$/
+    STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
+    STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
 
     # Class to parse a manifest text and provide common views of that data.
     def initialize(manifest_text)
@@ -228,22 +228,21 @@ module Keep
       false
     end
 
-    # Verify that a given manifest is valid as per the manifest format definition.
-    # Valid format: stream name + one or more locators + one or more files for each stream in manifest.
+    # Verify that a given manifest is valid according to
     # https://arvados.org/projects/arvados/wiki/Keep_manifest_format
-    def self.valid?(manifest)
-      raise ArgumentError.new "Invalid manifest: does not end with new line" if !manifest.end_with?("\n")
+    def self.validate! manifest
+      raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n")
       line_count = 0
       manifest.each_line do |line|
         line_count += 1
 
-        words = line.split(/[[:space:]]/)
+        words = line[0..-2].split(/ /)
         raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty?
 
         count = 0
 
         word = words.shift
-        count += 1 if word =~ STREAM_REGEXP and !word.include? '//'
+        count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
         raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
 
         count = 0
@@ -255,7 +254,7 @@ module Keep
         raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
 
         count = 0
-        while(word =~ FILE_REGEXP and !word.include? '//')
+        while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
           word = words.shift
           count += 1
         end
@@ -265,6 +264,21 @@ module Keep
         elsif count == 0
           raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens"
         end
+
+        # Ruby's split() method silently drops trailing empty tokens
+        # (which are not allowed by the manifest format) so we have to
+        # check trailing spaces manually.
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n"
+      end
+      true
+    end
+
+    def self.valid? manifest
+      begin
+        validate! manifest
+        true
+      rescue ArgumentError
+        false
       end
     end
   end
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb
index 18a2b06..620bec5 100644
--- a/sdk/ruby/test/test_keep_manifest.rb
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -281,19 +281,61 @@ class ManifestTest < Minitest::Test
   end
 
   [
+    [true, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"], # 2 locators
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:...\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.../.foo./.../bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/...\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.../bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.bar/baz.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar./baz.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 000000000000000000000000000000:0777:foo.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:0:0\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\040\n"],
+    [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
+    [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000 at ffffffff\n"],
     [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
-      "Invalid manifest: does not end with new line"],
+      "Invalid manifest: does not end with newline"],
     [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
       "invalid stream name \"abc\""],
+    [false, "abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"abc/./foo\""],
+    [false, "./abc/../foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/../foo\""],
+    [false, "./abc/. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/.\""],
+    [false, "./abc/.. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/..\""],
+    [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/./foo\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
+      "invalid file token \"0:0:.\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
+      "invalid file token \"0:0:..\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
+      "invalid file token \"0:0:./abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:../abc.txt\n",
+      "invalid file token \"0:0:../abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/.\n",
+      "invalid file token \"0:0:abc.txt/.\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/..\n",
+      "invalid file token \"0:0:abc.txt/..\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/./bc.txt\n",
+      "invalid file token \"0:0:a/./bc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/../bc.txt\n",
+      "invalid file token \"0:0:a/../bc.txt\""],
+    [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/./foo\""],
     [false, "d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n",
       "invalid stream name \"d41d8cd98f00b204e9800998ecf8427e+0\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427 0:0:abc.txt\n",
       "invalid locator \"d41d8cd98f00b204e9800998ecf8427\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e\n",
       "Manifest invalid for stream 1: no file tokens"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"], # 2 locators
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n/dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n",
       "Manifest invalid for stream 2: missing or invalid stream name \"/dir1\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n",
@@ -304,12 +346,6 @@ class ManifestTest < Minitest::Test
       "Manifest invalid for stream 2: invalid file token \"0:abc.txt\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt xyz.txt\n",
       "Manifest invalid for stream 2: invalid file token \"xyz.txt\""],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar.txt\n"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 000000000000000000000000000000:0777:foo.txt\n"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:0:0\n"],
-    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\040\n"],
-    [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
-    [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000 at ffffffff\n"],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt d41d8cd98f00b204e9800998ecf8427e+0\n",
       "Manifest invalid for stream 1: invalid file token \"d41d8cd98f00b204e9800998ecf8427e+0\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\n",
@@ -320,26 +356,33 @@ class ManifestTest < Minitest::Test
       "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""],
     [false, ". 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""],
     [false, ".\n", "Manifest invalid for stream 1: missing or invalid locator"],
-    [false, ".", "Invalid manifest: does not end with new line"],
+    [false, ".", "Invalid manifest: does not end with newline"],
     [false, ". \n", "Manifest invalid for stream 1: missing or invalid locator"],
     [false, ".  \n", "Manifest invalid for stream 1: missing or invalid locator"],
-# This does not fail since \t is interpreted as one white space character and hence is split upon
-#    [false, ".\td41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: no file tokens"],
     [false, " . d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid stream name"],
-# This does not fail since the white space after foo.txt is used to split upon
-#    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt \n", "Manifest invalid for stream 1: no file tokens"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt \n", "stream 1: trailing space"],
+   # TAB and other tricky whitespace characters:
+    [false, "\v. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid stream name \"\\v."],
+    [false, "./foo\vbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\vbar"],
+    [false, "\t. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid stream name \"\\t"],
+    [false, ".\td41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "Manifest invalid for stream 1: missing or invalid stream name \".\\t"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\t\n", "stream 1: invalid file token \"0:0:foo.txt\\t\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0\t 0:0:foo.txt\n", "stream 1: missing or invalid locator \"d41d8cd98f00b204e9800998ecf8427e+0\\t\""],
+    [false, "./foo\tbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", "stream 1: missing or invalid stream name \"./foo\\tbar\""],
+   # other whitespace errors:
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0  0:0:foo.txt\n",
-      "Manifest invalid for stream 1: invalid file token"],
+      "Manifest invalid for stream 1: invalid file token \"\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n \n",
       "Manifest invalid for stream 2: missing stream name"],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n\n",
       "Manifest invalid for stream 2: missing stream name"],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n ",
-      "Invalid manifest: does not end with new line"],
+      "Invalid manifest: does not end with newline"],
     [false, "\n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
       "Manifest invalid for stream 1: missing stream name"],
     [false, " \n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
       "Manifest invalid for stream 1: missing stream name"],
+   # empty file and stream name components:
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:/foo.txt\n",
       "Manifest invalid for stream 1: invalid file token \"0:0:/foo.txt\""],
     [false, "./ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
@@ -355,12 +398,13 @@ class ManifestTest < Minitest::Test
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
       "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
   ].each do |ok, manifest, expected_error=nil|
-    define_method "test_manifest_valid_#{ok}_#{manifest}_and_expect_error_#{expected_error}" do
+    define_method "test_validate manifest #{manifest.inspect}" do
+      assert_equal ok, Keep::Manifest.valid?(manifest)
       if ok
-        assert Keep::Manifest.valid? manifest
+        assert Keep::Manifest.validate! manifest
       else
         begin
-          Keep::Manifest.valid? manifest
+          Keep::Manifest.validate! manifest
         rescue ArgumentError => e
           msg = e.message
         end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list