[ARVADOS] updated: cfb2585c5c1744e62eb57c6178b563f477020090

git at public.curoverse.com git at public.curoverse.com
Wed Mar 11 11:14:35 EDT 2015


Summary of changes:
 sdk/ruby/lib/arvados/collection.rb | 266 +++++++++++++++++++------------------
 sdk/ruby/test/test_collection.rb   | 112 ++++++++++------
 2 files changed, 206 insertions(+), 172 deletions(-)

       via  cfb2585c5c1744e62eb57c6178b563f477020090 (commit)
      from  dad9a13b4942f50e9a05d796281770de75eb8660 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit cfb2585c5c1744e62eb57c6178b563f477020090
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Mar 10 17:26:43 2015 -0400

    Fixups from code review.

diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb
index 67c0580..ec0f443 100644
--- a/sdk/ruby/lib/arvados/collection.rb
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -3,17 +3,9 @@ require "arvados/keep"
 module Arv
   class Collection
     def initialize(manifest_text="")
-      @tree = CollectionStream.new(".")
-      @manifest_text = ""
+      @manifest_text = manifest_text
       @modified = false
-      import_manifest(manifest_text)
-    end
-
-    def manifest_text
-      @manifest_text ||= @tree.manifest_text
-    end
-
-    def import_manifest(manifest_text)
+      @root = CollectionRoot.new
       manifest = Keep::Manifest.new(manifest_text)
       manifest.each_line do |stream_root, locators, file_specs|
         if stream_root.empty? or locators.empty? or file_specs.empty?
@@ -22,16 +14,14 @@ module Arv
         loc_list = LocatorList.new(locators)
         file_specs.map { |s| manifest.split_file_token(s) }.
             each do |file_start, file_len, file_path|
-          @tree.file_at(normalize_path(stream_root, file_path)).
+          @root.file_at(normalize_path(stream_root, file_path)).
             add_segment(loc_list.segment(file_start, file_len))
         end
       end
-      if @manifest_text == ""
-        @manifest_text = manifest_text
-      else
-        modified
-      end
-      self
+    end
+
+    def manifest_text
+      @manifest_text ||= @root.manifest_text
     end
 
     def modified?
@@ -44,12 +34,13 @@ module Arv
     end
 
     def normalize
-      @manifest_text = @tree.manifest_text
+      @manifest_text = @root.manifest_text
       self
     end
 
     def cp_r(source, target, source_collection=nil)
-      copy(:merge, source, target, source_collection)
+      opts = {descend_target: !source.end_with?("/")}
+      copy(:merge, source.chomp("/"), target, source_collection, opts)
     end
 
     def rename(source, target)
@@ -67,12 +58,7 @@ module Arv
     protected
 
     def find(*parts)
-      normpath = normalize_path(*parts)
-      if normpath.empty?
-        [@tree, nil]
-      else
-        @tree.find(normpath)
-      end
+      @root.find(normalize_path(*parts))
     end
 
     private
@@ -85,11 +71,16 @@ module Arv
 
     def normalize_path(*parts)
       path = File.join(*parts)
-      raise ArgumentError.new("empty path") if path.empty?
-      path.sub(/^\.(\/|$)/, "")
+      if path.empty?
+        raise ArgumentError.new("empty path")
+      elsif (path == ".") or path.start_with?("./")
+        path
+      else
+        "./#{path}"
+      end
     end
 
-    def copy(copy_method, source, target, source_collection=nil)
+    def copy(copy_method, source, target, source_collection=nil, opts={})
       # Find the item at path `source` in `source_collection`, find the
       # destination stream at path `target`, and use `copy_method` to copy
       # the found object there.  If a block is passed in, it will be called
@@ -101,28 +92,26 @@ module Arv
       if (source_collection.equal?(self) and
           (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
         return self
-      elsif src_tail.nil?
-        src_item = src_stream
-        src_tail = src_stream.name
-      else
-        src_item = src_stream[src_tail]
       end
+      src_item = src_stream[src_tail]
       dst_tail ||= src_tail
       check_method = "check_can_#{copy_method}".to_sym
-      begin
-        # Find out if `target` refers to a stream we should copy into.
-        tail_stream = dst_stream[dst_tail]
-        tail_stream.send(check_method, src_item, src_tail)
-      rescue Errno::ENOENT, Errno::ENOTDIR
-        # It does not.  Check that we can copy `source` to the full
-        # path specified by `target`.
+      target_name = nil
+      if opts.fetch(:descend_target, true)
+        begin
+          # Find out if `target` refers to a stream we should copy into.
+          tail_stream = dst_stream[dst_tail]
+          tail_stream.send(check_method, src_item, src_tail)
+          # Yes it does.  Copy the item at `source` into it with the same name.
+          dst_stream = tail_stream
+          target_name = src_tail
+        rescue Errno::ENOENT, Errno::ENOTDIR
+          # It does not.  We'll fall back to writing to `target` below.
+        end
+      end
+      if target_name.nil?
         dst_stream.send(check_method, src_item, dst_tail)
         target_name = dst_tail
-      else
-        # Yes, `target` is a stream.  Copy the item at `source` into it with
-        # the same name.
-        dst_stream = tail_stream
-        target_name = src_tail
       end
       # At this point, we know the operation will work.  Call any block as
       # a pre-copy hook.
@@ -130,12 +119,7 @@ module Arv
         yield
         # Re-find the destination stream, in case the block removed
         # the original (that's how rename is implemented).
-        dst_path = normalize_path(dst_stream.path)
-        if dst_path.empty?
-          dst_stream = @tree
-        else
-          dst_stream = @tree.stream_at(dst_path)
-        end
+        dst_stream = @root.stream_at(dst_stream.path)
       end
       dst_stream.send(copy_method, src_item, target_name)
       modified
@@ -143,17 +127,7 @@ module Arv
 
     def remove(path, opts={})
       stream, name = find(path)
-      if name.nil?
-        if not opts[:recursive]
-          raise Errno::EISDIR.new(@tree.path)
-        elsif @tree.leaf?
-          return self
-        else
-          @tree = CollectionStream.new(".")
-        end
-      else
-        stream.delete(name, opts)
-      end
+      stream.delete(name, opts)
       modified
     end
 
@@ -170,24 +144,13 @@ module Arv
     end
 
     class LocatorList
-      def initialize(locators=[])
-        @ranges = []
-        @loc_ranges = {}
-        @last_loc_range = nil
-        extend(locators)
-      end
-
-      def manifest_s
-        @loc_ranges.keys.join(" ")
-      end
-
-      def extend(locators)
-        locators.each do |loc_s|
-          @ranges << new_range_after(@ranges.last, loc_s)
-          unless @loc_ranges.include?(loc_s)
-            @loc_ranges[loc_s] = new_range_after(@last_loc_range, loc_s)
-            @last_loc_range = @loc_ranges[loc_s]
-          end
+      # LocatorList efficiently builds LocatorSegments from a stream manifest.
+      def initialize(locators)
+        next_start = 0
+        @ranges = locators.map do |loc_s|
+          new_range = LocatorRange.new(loc_s, next_start)
+          next_start = new_range.end
+          new_range
         end
       end
 
@@ -205,43 +168,8 @@ module Arv
                            length)
       end
 
-      def specs_for(filename, segment)
-        # Given an escaped filename and a LocatorSegment, add the
-        # locators stored in the Segment, then return the smallest
-        # possible array of file spec strings to build the file from
-        # locators in the list.
-        extend(segment.locators)
-        start_pos = segment.start_pos
-        length = segment.length
-        start_loc = segment.locators.first
-        prev_loc = start_loc
-        result = []
-        # Build a list of file specs by iterating through the segment's
-        # locators and preparing a file spec for each contiguous range.
-        segment.locators[1..-1].each do |loc_s|
-          range = @loc_ranges[loc_s]
-          if range.begin != @loc_ranges[prev_loc].end
-            range_start, range_length =
-              start_and_length_at(start_loc, prev_loc, start_pos, length)
-            result << "#{range_start}:#{range_length}:#{filename}"
-            start_pos = 0
-            length -= range_length
-            start_loc = loc_s
-          end
-          prev_loc = loc_s
-        end
-        range_start, range_length =
-          start_and_length_at(start_loc, prev_loc, start_pos, length)
-        result << "#{range_start}:#{range_length}:#{filename}"
-        result
-      end
-
       private
 
-      def new_range_after(prev_range, loc_s)
-        LocatorRange.new(loc_s, (prev_range.nil?) ? 0 : prev_range.end)
-      end
-
       def search_for_byte(target, start_index=0)
         # Do a binary search for byte `target` in the list of locators,
         # starting from `start_index`.  Return the index of the range in
@@ -262,12 +190,6 @@ module Arv
           end
         end
       end
-
-      def start_and_length_at(start_key, end_key, start_pos, length)
-        range_begin = @loc_ranges[start_key].begin + start_pos
-        range_length = [@loc_ranges[end_key].end - range_begin, length].min
-        [range_begin, range_length]
-      end
     end
 
     class CollectionItem
@@ -408,7 +330,7 @@ module Arv
       end
 
       def add_copy(src_item, key)
-        items[key] = src_item.copy_named("#{path}/#{key}")
+        self[key] = src_item.copy_named("#{path}/#{key}")
       end
 
       def merge(src_item, key)
@@ -423,7 +345,11 @@ module Arv
           dest = self[key]
           error = nil
           # Copy as much as possible, then raise any error encountered.
-          src_item.items.each_pair do |sub_key, sub_item|
+          # Start with streams for a depth-first merge.
+          src_items = src_item.items.each_pair.sort_by do |_, sub_item|
+            (sub_item.file?) ? 1 : 0
+          end
+          src_items.each do |sub_key, sub_item|
             begin
               dest.merge(sub_item, sub_key)
             rescue Errno::ENOTDIR => error
@@ -447,13 +373,17 @@ module Arv
 
       private
 
+      def []=(key, item)
+        items[key] = item
+      end
+
       def get_or_new(key, klass)
         # Return the collection item at `key` and ensure that it's a `klass`.
         # If `key` does not exist, create a new `klass` there.
         # If the value for `key` is not a `klass`, raise an ArgumentError.
         item = items[key]
         if item.nil?
-          items[key] = klass.new("#{path}/#{key}")
+          self[key] = klass.new("#{path}/#{key}")
         elsif not item.is_a?(klass)
           raise ArgumentError.
             new("in stream %p, %p is a %s, not a %s" %
@@ -464,6 +394,41 @@ module Arv
       end
     end
 
+    class CollectionRoot < CollectionStream
+      def initialize
+        super("")
+        setup
+      end
+
+      def delete(name, opts={})
+        super
+        # If that didn't fail, it deleted the . stream.  Recreate it.
+        setup
+      end
+
+      def check_can_merge(src_item, key)
+        if items.include?(key)
+          super
+        else
+          raise_root_write_error(key)
+        end
+      end
+
+      private
+
+      def setup
+        items["."] = CollectionStream.new(".")
+      end
+
+      def raise_root_write_error(key)
+        raise ArgumentError.new("can't write to %p at collection root" % key)
+      end
+
+      def []=(key, item)
+        raise_root_write_error(key)
+      end
+    end
+
     class StreamManifest
       # Build a manifest text for a single stream, without substreams.
       # The manifest includes files in the order they're added.  If you want
@@ -471,14 +436,15 @@ module Arv
 
       def initialize(name)
         @name = name
-        @locators = LocatorList.new
+        @loc_ranges = {}
+        @loc_range_start = 0
         @file_specs = []
       end
 
       def add_file(coll_file)
         coll_file.each_segment do |segment|
-          @file_specs += @locators.specs_for(escape_name(coll_file.name),
-                                             segment)
+          extend_locator_ranges(segment.locators)
+          extend_file_specs(coll_file.name, segment)
         end
       end
 
@@ -486,18 +452,62 @@ module Arv
         if @file_specs.empty?
           ""
         else
-          "%s %s %s\n" % [escape_name(@name), @locators.manifest_s,
+          "%s %s %s\n" % [escape_name(@name),
+                          @loc_ranges.keys.join(" "),
                           @file_specs.join(" ")]
         end
       end
 
       private
 
+      def extend_locator_ranges(locators)
+        locators.
+            select { |loc_s| not @loc_ranges.include?(loc_s) }.
+            each do |loc_s|
+          @loc_ranges[loc_s] = LocatorRange.new(loc_s, @loc_range_start)
+          @loc_range_start = @loc_ranges[loc_s].end
+        end
+      end
+
+      def extend_file_specs(filename, segment)
+        # Given a filename and a LocatorSegment, add the smallest
+        # possible array of file spec strings to @file_specs that
+        # builds the file from available locators.
+        filename = escape_name(filename)
+        start_pos = segment.start_pos
+        length = segment.length
+        start_loc = segment.locators.first
+        prev_loc = start_loc
+        # Build a list of file specs by iterating through the segment's
+        # locators and preparing a file spec for each contiguous range.
+        segment.locators[1..-1].each do |loc_s|
+          range = @loc_ranges[loc_s]
+          if range.begin != @loc_ranges[prev_loc].end
+            range_start, range_length =
+              start_and_length_at(start_loc, prev_loc, start_pos, length)
+            @file_specs << "#{range_start}:#{range_length}:#{filename}"
+            start_pos = 0
+            length -= range_length
+            start_loc = loc_s
+          end
+          prev_loc = loc_s
+        end
+        range_start, range_length =
+          start_and_length_at(start_loc, prev_loc, start_pos, length)
+        @file_specs << "#{range_start}:#{range_length}:#{filename}"
+      end
+
       def escape_name(name)
         name.gsub(/\\/, "\\\\\\\\").gsub(/\s/) do |s|
           s.each_byte.map { |c| "\\%03o" % c }.join("")
         end
       end
+
+      def start_and_length_at(start_key, end_key, start_pos, length)
+        range_begin = @loc_ranges[start_key].begin + start_pos
+        range_length = [@loc_ranges[end_key].end - range_begin, length].min
+        [range_begin, range_length]
+      end
     end
   end
 end
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb
index e31b009..3dd1ab3 100644
--- a/sdk/ruby/test/test_collection.rb
+++ b/sdk/ruby/test/test_collection.rb
@@ -48,54 +48,34 @@ class CollectionTest < Minitest::Test
     assert_equal(NONNORMALIZED_MANIFEST, coll.manifest_text)
   end
 
-  def test_no_implicit_normalization_from_first_import
-    coll = Arv::Collection.new
-    coll.import_manifest(NONNORMALIZED_MANIFEST)
-    assert_equal(NONNORMALIZED_MANIFEST, coll.manifest_text)
-  end
-
-  ### .import_manifest
+  ### .normalize
 
   def test_non_posix_path_handling
-    block = random_block(9)
-    coll = Arv::Collection.new("./.. #{block} 0:5:.\n")
-    coll.import_manifest("./.. #{block} 5:4:..\n")
-    assert_equal("./.. #{block} 0:5:. 5:4:..\n", coll.manifest_text)
+    m_text = "./.. #{random_block(9)} 0:5:. 5:4:..\n"
+    coll = Arv::Collection.new(m_text.dup)
+    coll.normalize
+    assert_equal(m_text, coll.manifest_text)
   end
 
   def test_escaping_through_normalization
     coll = Arv::Collection.new(MANY_ESCAPES_MANIFEST)
-    coll.import_manifest(MANY_ESCAPES_MANIFEST)
+    coll.normalize
     # The result should simply duplicate the file spec.
     # The source file spec has an unescaped backslash in it.
     # It's OK for the Collection class to properly escape that.
     expect_text = MANY_ESCAPES_MANIFEST.sub(/ \d+:\d+:\S+/) do |file_spec|
-      file_spec.gsub(/([^\\])(\\[^\\\d])/, '\1\\\\\2') * 2
+      file_spec.gsub(/([^\\])(\\[^\\\d])/, '\1\\\\\2')
     end
     assert_equal(expect_text, coll.manifest_text)
   end
 
-  def test_concatenation_from_multiple_imports(file_name="file.txt",
-                                               out_name=nil)
-    out_name ||= file_name
-    blocks = random_blocks(2, 9)
-    coll = Arv::Collection.new
-    blocks.each do |block|
-      coll.import_manifest(". #{block} 1:8:#{file_name}\n")
-    end
-    assert_equal(". #{blocks.join(' ')} 1:8:#{out_name} 10:8:#{out_name}\n",
-                 coll.manifest_text)
-  end
-
-  def test_concatenation_from_multiple_escaped_imports
-    test_concatenation_from_multiple_imports('a\040\141.txt', 'a\040a.txt')
-  end
-
   def test_concatenation_with_locator_overlap(over_index=0)
     blocks = random_blocks(4, 2)
-    coll = Arv::Collection.new(". #{blocks.join(' ')} 0:8:file\n")
-    coll.import_manifest(". #{blocks[over_index, 2].join(' ')} 0:4:file\n")
-    assert_equal(". #{blocks.join(' ')} 0:8:file #{over_index * 2}:4:file\n",
+    blocks_s = blocks.join(" ")
+    coll = Arv::Collection.new(". %s 0:8:file\n. %s 0:4:file\n" %
+                               [blocks_s, blocks[over_index, 2].join(" ")])
+    coll.normalize
+    assert_equal(". #{blocks_s} 0:8:file #{over_index * 2}:4:file\n",
                  coll.manifest_text)
   end
 
@@ -109,14 +89,14 @@ class CollectionTest < Minitest::Test
 
   def test_concatenation_with_partial_locator_overlap
     blocks = random_blocks(3, 3)
-    coll = Arv::Collection.new(". #{blocks[0, 2].join(' ')} 0:6:overlap\n")
-    coll.import_manifest(". #{blocks[1, 2].join(' ')} 0:6:overlap\n")
+    coll = Arv::Collection
+      .new(". %s 0:6:overlap\n. %s 0:6:overlap\n" %
+           [blocks[0, 2].join(" "), blocks[1, 2].join(" ")])
+    coll.normalize
     assert_equal(". #{blocks.join(' ')} 0:6:overlap 3:6:overlap\n",
                  coll.manifest_text)
   end
 
-  ### .normalize
-
   def test_normalize
     block = random_block
     coll = Arv::Collection.new(". #{block} 0:0:f2 0:0:f1\n")
@@ -280,7 +260,8 @@ class CollectionTest < Minitest::Test
   def test_adding_to_root_after_copy
     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
     coll.cp_r(".", "./root")
-    coll.import_manifest(COLON_FILENAME_MANIFEST)
+    src_coll = Arv::Collection.new(COLON_FILENAME_MANIFEST)
+    coll.cp_r("./file:test.txt", ".", src_coll)
     got_lines = coll.manifest_text.lines
     assert_equal(2, got_lines.size)
     assert_match(/^\. \S{33,} \S{33,} 0:9:file:test\.txt 9:9:simple\.txt\n/,
@@ -334,6 +315,54 @@ class CollectionTest < Minitest::Test
     assert_equal(src_text, src_coll.manifest_text)
   end
 
+  def test_copy_stream_contents
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.cp_r("./dir0/subdir/", "./dir1/subdir")
+    expect_lines = MULTILEVEL_MANIFEST.lines
+    expect_lines[4] = expect_lines[2].sub("./dir0/", "./dir1/")
+    assert_equal(expect_lines.join(""), coll.manifest_text)
+  end
+
+  def test_copy_stream_contents_into_root
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./s1/", ".")
+    assert_equal(". %s 0:5:f1 14:4:f2 5:4:f3\n%s" %
+                 [TWO_BY_TWO_BLOCKS.reverse.join(" "),
+                  TWO_BY_TWO_MANIFEST_A.last],
+                 coll.manifest_text)
+  end
+
+  def test_copy_root_contents_into_stream
+    # This is especially fun, because we're copying a parent into its child.
+    # Make sure that happens depth-first.
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./", "./s1")
+    assert_equal("%s./s1 %s 0:5:f1 5:4:f2 14:4:f3\n%s" %
+                 [TWO_BY_TWO_MANIFEST_A.first, TWO_BY_TWO_BLOCKS.join(" "),
+                  TWO_BY_TWO_MANIFEST_A.last.sub("./s1 ", "./s1/s1 ")],
+                 coll.manifest_text)
+  end
+
+  def test_copy_stream_contents_across_collections
+    block = random_block(8)
+    src_coll = Arv::Collection.new("./s1 #{block} 0:8:f1\n")
+    dst_coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    dst_coll.cp_r("./s1/", "./s1", src_coll)
+    assert_equal("%s./s1 %s %s 0:8:f1 13:4:f3\n" %
+                 [TWO_BY_TWO_MANIFEST_A.first, block, TWO_BY_TWO_BLOCKS.last],
+                 dst_coll.manifest_text)
+  end
+
+  def test_copy_root_contents_across_collections
+    block = random_block(8)
+    src_coll = Arv::Collection.new(". #{block} 0:8:f1\n")
+    dst_coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    dst_coll.cp_r("./", ".", src_coll)
+    assert_equal(". %s %s 0:8:f1 13:4:f2\n%s" %
+                 [block, TWO_BY_TWO_BLOCKS.first, TWO_BY_TWO_MANIFEST_A.last],
+                 dst_coll.manifest_text)
+  end
+
   def test_copy_empty_source_path_raises_ArgumentError(src="", dst="./s1")
     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
     assert_raises(ArgumentError) do
@@ -449,7 +478,8 @@ class CollectionTest < Minitest::Test
   def test_adding_to_root_after_rename
     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
     coll.rename(".", "./root")
-    coll.import_manifest(SIMPLEST_MANIFEST)
+    src_coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", ".", src_coll)
     assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./root "),
                  coll.manifest_text)
   end
@@ -578,12 +608,6 @@ class CollectionTest < Minitest::Test
     test_new_collection_unmodified(SIMPLEST_MANIFEST)
   end
 
-  def test_collection_unmodified_after_initial_import
-    test_new_collection_unmodified do |coll|
-      coll.import_manifest(SIMPLEST_MANIFEST)
-    end
-  end
-
   def test_collection_unmodified_after_mark
     test_new_collection_unmodified(SIMPLEST_MANIFEST) do |coll|
       coll.cp_r("./simple.txt", "./copy")

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list