[ARVADOS] created: 1.3.0-139-g7b86c93c1
Git user
git at public.curoverse.com
Mon Jan 7 09:16:51 EST 2019
at 7b86c93c104581fbf7a1fc9bfeb1ae300b9ee29e (commit)
commit 7b86c93c104581fbf7a1fc9bfeb1ae300b9ee29e
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Mon Jan 7 11:12:01 2019 -0300
14482: Handle escaped chars on manifest validation.
* Escaped & unescaped chars are treated equally, except for \040.
* Empty '.' (and \056) named file tokens are accepted.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb
index ce1331ac1..2d444d074 100644
--- a/sdk/ruby/lib/arvados/keep.rb
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -103,6 +103,7 @@ module Keep
class Manifest
STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+ EMPTY_DOT_FILE_TOKEN_REGEXP = /^0:0:\.$/
# Class to parse a manifest text and provide common views of that data.
def initialize(manifest_text)
@@ -131,18 +132,25 @@ module Keep
end
end
- def unescape(s)
+ def self.unescape(s, except=[])
+ return nil if s.nil?
+
# Parse backslash escapes in a Keep manifest stream or file name.
s.gsub(/\\(\\|[0-7]{3})/) do |_|
- case $1
- when '\\'
+ if $1 == '\\'
'\\'
+ elsif except.include? $1
+ $1
else
$1.to_i(8).chr
end
end
end
+ def unescape(s)
+ self.class.unescape(s)
+ end
+
def split_file_token token
start_pos, filesize, filename = token.split(':', 3)
if filename.nil?
@@ -250,7 +258,8 @@ module Keep
count = 0
word = words.shift
- count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+ unescaped_word = unescape(word, except=["040"])
+ count += 1 if unescaped_word =~ STRICT_STREAM_TOKEN_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
count = 0
@@ -262,7 +271,8 @@ module Keep
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
count = 0
- while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+ while unescape(word) =~ EMPTY_DOT_FILE_TOKEN_REGEXP or
+ (unescape(word, except=["040"]) =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
word = words.shift
count += 1
end
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb
index 71a24a28c..bd1312701 100644
--- a/sdk/ruby/test/test_keep_manifest.rb
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -320,6 +320,7 @@ class ManifestTest < Minitest::Test
[true, ". 00000000000000000000000000000000+0 0:0:0\n"],
[true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000 at ffffffff\n"],
[true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000 at ffffffff 0:0:empty.txt\n"],
+ [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n"],
[false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
"Invalid manifest: does not end with newline"],
[false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
@@ -334,8 +335,9 @@ class ManifestTest < Minitest::Test
"invalid stream name \"./abc/..\""],
[false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
"invalid stream name \"./abc/./foo\""],
- [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
- "invalid file token \"0:0:.\""],
+ # non-empty '.'-named file tokens aren't acceptable. Empty ones are used as empty dir placeholders.
+ [false, ". 8cf8463b34caa8ac871a52d5dd7ad1ef+1 0:1:.\n",
+ "invalid file token \"0:1:.\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
"invalid file token \"0:0:..\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
@@ -429,6 +431,18 @@ class ManifestTest < Minitest::Test
"Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
"Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
+ # escaped chars
+ [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"],
+ [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"],
+ [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""],
+ [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057/Data\""],
+ [true, "./Data\\040Folder d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\057foo/bar\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:\\\\057foo/bar\""],
+ [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134057foo/bar\n"],
].each do |ok, manifest, expected_error=nil|
define_method "test_validate manifest #{manifest.inspect}" do
assert_equal ok, Keep::Manifest.valid?(manifest)
commit 58f1b2d26ba08e28307f45731be157ad61feb8ce
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Fri Jan 4 18:37:15 2019 -0300
14482: Don't list empty dir placeholder files on collections listings.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/apps/workbench/app/models/collection.rb b/apps/workbench/app/models/collection.rb
index 09af60fb9..f5aef841e 100644
--- a/apps/workbench/app/models/collection.rb
+++ b/apps/workbench/app/models/collection.rb
@@ -68,8 +68,8 @@ class Collection < ArvadosBase
.sort.flat_map do |parts|
[parts + [nil]] + dir_to_tree.call(File.join(parts))
end
- # Then extend that list with files in this directory.
- subnodes + tree[File.split(dirname)]
+ # Then extend that list with files in this directory, except the empty dir placeholders (0:0:. files).
+ subnodes + tree[File.split(dirname)].reject { |_, basename, size| (basename == '.') and (size == 0) }
end
dir_to_tree.call('.')
end
commit 73e01bb3eaef21828ec2ae802440077752b5b7e9
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Fri Jan 4 18:34:58 2019 -0300
14482: Enhance code readability
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb
index b2096b5ea..ce1331ac1 100644
--- a/sdk/ruby/lib/arvados/keep.rb
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -162,15 +162,15 @@ module Keep
elsif in_file_tokens or not Locator.valid? token
in_file_tokens = true
- file_tokens = split_file_token(token)
+ start_pos, file_size, file_name = split_file_token(token)
stream_name_adjuster = ''
- if file_tokens[2].include?('/') # '/' in filename
- parts = file_tokens[2].rpartition('/')
- stream_name_adjuster = parts[1] + parts[0] # /dir_parts
- file_tokens[2] = parts[2]
+ if file_name.include?('/') # '/' in filename
+ dirname, sep, basename = file_name.rpartition('/')
+ stream_name_adjuster = sep + dirname # /dir_parts
+ file_name = basename
end
- yield [stream_name + stream_name_adjuster] + file_tokens
+ yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
end
end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list