[ARVADOS] updated: 36f6241516d81c726bb7439650cf6ec56e6d6525

git at public.curoverse.com git at public.curoverse.com
Thu Mar 12 11:35:59 EDT 2015


Summary of changes:
 sdk/ruby/lib/arvados/collection.rb  | 513 +++++++++++++++++++++++++++++
 sdk/ruby/test/sdk_fixtures.rb       |  65 ++++
 sdk/ruby/test/test_collection.rb    | 641 ++++++++++++++++++++++++++++++++++++
 sdk/ruby/test/test_keep_manifest.rb |  52 +--
 4 files changed, 1233 insertions(+), 38 deletions(-)
 create mode 100644 sdk/ruby/lib/arvados/collection.rb
 create mode 100644 sdk/ruby/test/sdk_fixtures.rb
 create mode 100644 sdk/ruby/test/test_collection.rb

       via  36f6241516d81c726bb7439650cf6ec56e6d6525 (commit)
       via  3a38dfbc1b558c069511d2c1c5292bcd8690689b (commit)
       via  5fd7a6b976b93d15a3a66c7610fc4d8b824bb31e (commit)
      from  20f3649bb6c55b6ae1d6e89b65561c424bbd7b91 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 36f6241516d81c726bb7439650cf6ec56e6d6525
Merge: 20f3649 3a38dfb
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Mar 12 11:35:35 2015 -0400

    Merge branch '5104-ruby-sdk-collections-wip'
    
    Closes #5104, #5423.


commit 3a38dfbc1b558c069511d2c1c5292bcd8690689b
Author: Brett Smith <brett at curoverse.com>
Date:   Sun Mar 8 17:12:40 2015 -0400

    5104: Add Collection class to the Ruby SDK.
    
    This provides an interface to do filesystem-like operations on a tree
    expressed in a manifest.  It currently supports copy, rename, and
    remove operations, and will give us common ground for further work in
    Ruby components.

diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb
new file mode 100644
index 0000000..ec0f443
--- /dev/null
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -0,0 +1,513 @@
+require "arvados/keep"
+
+module Arv
+  class Collection
+    def initialize(manifest_text="")
+      @manifest_text = manifest_text
+      @modified = false
+      @root = CollectionRoot.new
+      manifest = Keep::Manifest.new(manifest_text)
+      manifest.each_line do |stream_root, locators, file_specs|
+        if stream_root.empty? or locators.empty? or file_specs.empty?
+          raise ArgumentError.new("manifest text includes malformed line")
+        end
+        loc_list = LocatorList.new(locators)
+        file_specs.map { |s| manifest.split_file_token(s) }.
+            each do |file_start, file_len, file_path|
+          @root.file_at(normalize_path(stream_root, file_path)).
+            add_segment(loc_list.segment(file_start, file_len))
+        end
+      end
+    end
+
+    def manifest_text
+      @manifest_text ||= @root.manifest_text
+    end
+
+    def modified?
+      @modified
+    end
+
+    def unmodified
+      @modified = false
+      self
+    end
+
+    def normalize
+      @manifest_text = @root.manifest_text
+      self
+    end
+
+    def cp_r(source, target, source_collection=nil)
+      opts = {descend_target: !source.end_with?("/")}
+      copy(:merge, source.chomp("/"), target, source_collection, opts)
+    end
+
+    def rename(source, target)
+      copy(:add_copy, source, target) { rm_r(source) }
+    end
+
+    def rm(source)
+      remove(source)
+    end
+
+    def rm_r(source)
+      remove(source, recursive: true)
+    end
+
+    protected
+
+    def find(*parts)
+      @root.find(normalize_path(*parts))
+    end
+
+    private
+
+    def modified
+      @manifest_text = nil
+      @modified = true
+      self
+    end
+
+    def normalize_path(*parts)
+      path = File.join(*parts)
+      if path.empty?
+        raise ArgumentError.new("empty path")
+      elsif (path == ".") or path.start_with?("./")
+        path
+      else
+        "./#{path}"
+      end
+    end
+
+    def copy(copy_method, source, target, source_collection=nil, opts={})
+      # Find the item at path `source` in `source_collection`, find the
+      # destination stream at path `target`, and use `copy_method` to copy
+      # the found object there.  If a block is passed in, it will be called
+      # right before we do the actual copy, after we confirm that everything
+      # is found and can be copied.
+      source_collection = self if source_collection.nil?
+      src_stream, src_tail = source_collection.find(source)
+      dst_stream, dst_tail = find(target)
+      if (source_collection.equal?(self) and
+          (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
+        return self
+      end
+      src_item = src_stream[src_tail]
+      dst_tail ||= src_tail
+      check_method = "check_can_#{copy_method}".to_sym
+      target_name = nil
+      if opts.fetch(:descend_target, true)
+        begin
+          # Find out if `target` refers to a stream we should copy into.
+          tail_stream = dst_stream[dst_tail]
+          tail_stream.send(check_method, src_item, src_tail)
+          # Yes it does.  Copy the item at `source` into it with the same name.
+          dst_stream = tail_stream
+          target_name = src_tail
+        rescue Errno::ENOENT, Errno::ENOTDIR
+          # It does not.  We'll fall back to writing to `target` below.
+        end
+      end
+      if target_name.nil?
+        dst_stream.send(check_method, src_item, dst_tail)
+        target_name = dst_tail
+      end
+      # At this point, we know the operation will work.  Call any block as
+      # a pre-copy hook.
+      if block_given?
+        yield
+        # Re-find the destination stream, in case the block removed
+        # the original (that's how rename is implemented).
+        dst_stream = @root.stream_at(dst_stream.path)
+      end
+      dst_stream.send(copy_method, src_item, target_name)
+      modified
+    end
+
+    def remove(path, opts={})
+      stream, name = find(path)
+      stream.delete(name, opts)
+      modified
+    end
+
+    LocatorSegment = Struct.new(:locators, :start_pos, :length)
+
+    class LocatorRange < Range
+      attr_reader :locator
+
+      def initialize(loc_s, start)
+        @locator = loc_s
+        range_end = start + Keep::Locator.parse(loc_s).size.to_i
+        super(start, range_end, false)
+      end
+    end
+
+    class LocatorList
+      # LocatorList efficiently builds LocatorSegments from a stream manifest.
+      def initialize(locators)
+        next_start = 0
+        @ranges = locators.map do |loc_s|
+          new_range = LocatorRange.new(loc_s, next_start)
+          next_start = new_range.end
+          new_range
+        end
+      end
+
+      def segment(start_pos, length)
+        # Return a LocatorSegment that captures `length` bytes from `start_pos`.
+        start_index = search_for_byte(start_pos)
+        if length == 0
+          end_index = start_index
+        else
+          end_index = search_for_byte(start_pos + length - 1, start_index)
+        end
+        seg_ranges = @ranges[start_index..end_index]
+        LocatorSegment.new(seg_ranges.map(&:locator),
+                           start_pos - seg_ranges.first.begin,
+                           length)
+      end
+
+      private
+
+      def search_for_byte(target, start_index=0)
+        # Do a binary search for byte `target` in the list of locators,
+        # starting from `start_index`.  Return the index of the range in
+        # @ranges that contains the byte.
+        lo = start_index
+        hi = @ranges.size
+        loop do
+          ii = (lo + hi) / 2
+          range = @ranges[ii]
+          if range.include?(target)
+            return ii
+          elsif ii == lo
+            raise RangeError.new("%i not in segment" % target)
+          elsif target < range.begin
+            hi = ii
+          else
+            lo = ii
+          end
+        end
+      end
+    end
+
+    class CollectionItem
+      attr_reader :path, :name
+
+      def initialize(path)
+        @path = path
+        @name = File.basename(path)
+      end
+    end
+
+    class CollectionFile < CollectionItem
+      def initialize(path)
+        super
+        @segments = []
+      end
+
+      def self.human_name
+        "file"
+      end
+
+      def file?
+        true
+      end
+
+      def leaf?
+        true
+      end
+
+      def add_segment(segment)
+        @segments << segment
+      end
+
+      def each_segment(&block)
+        @segments.each(&block)
+      end
+
+      def check_can_add_copy(src_item, name)
+        raise Errno::ENOTDIR.new(path)
+      end
+
+      alias_method :check_can_merge, :check_can_add_copy
+
+      def copy_named(copy_path)
+        copy = self.class.new(copy_path)
+        each_segment { |segment| copy.add_segment(segment) }
+        copy
+      end
+    end
+
+    class CollectionStream < CollectionItem
+      def initialize(path)
+        super
+        @items = {}
+      end
+
+      def self.human_name
+        "stream"
+      end
+
+      def file?
+        false
+      end
+
+      def leaf?
+        items.empty?
+      end
+
+      def [](key)
+        items[key] or
+          raise Errno::ENOENT.new("%p not found in %p" % [key, path])
+      end
+
+      def delete(name, opts={})
+        item = self[name]
+        if item.file? or opts[:recursive]
+          items.delete(name)
+        else
+          raise Errno::EISDIR.new(path)
+        end
+      end
+
+      def find(find_path)
+        # Given a POSIX-style path, return the CollectionStream that
+        # contains the object at that path, and the name of the object
+        # inside it.
+        components = find_path.split("/")
+        tail = components.pop
+        [components.reduce(self, :[]), tail]
+      end
+
+      def stream_at(find_path)
+        key, rest = find_path.split("/", 2)
+        next_stream = get_or_new(key, CollectionStream)
+        if rest.nil?
+          next_stream
+        else
+          next_stream.stream_at(rest)
+        end
+      end
+
+      def file_at(find_path)
+        stream_path, _, file_name = find_path.rpartition("/")
+        if stream_path.empty?
+          get_or_new(file_name, CollectionFile)
+        else
+          stream_at(stream_path).file_at(file_name)
+        end
+      end
+
+      def manifest_text
+        # Return a string with the normalized manifest text for this stream,
+        # including all substreams.
+        file_keys, stream_keys = items.keys.sort.partition do |key|
+          items[key].file?
+        end
+        my_line = StreamManifest.new(path)
+        file_keys.each do |file_name|
+          my_line.add_file(items[file_name])
+        end
+        sub_lines = stream_keys.map do |sub_name|
+          items[sub_name].manifest_text
+        end
+        my_line.to_s + sub_lines.join("")
+      end
+
+      def check_can_add_copy(src_item, key)
+        if existing = check_can_merge(src_item, key) and not existing.leaf?
+          raise Errno::ENOTEMPTY.new(existing.path)
+        end
+      end
+
+      def check_can_merge(src_item, key)
+        if existing = items[key] and (existing.class != src_item.class)
+          raise Errno::ENOTDIR.new(existing.path)
+        end
+        existing
+      end
+
+      def add_copy(src_item, key)
+        self[key] = src_item.copy_named("#{path}/#{key}")
+      end
+
+      def merge(src_item, key)
+        # Do a recursive copy of the collection item `src_item` to destination
+        # `key`.  If a simple copy is safe, do that; otherwise, recursively
+        # merge the contents of the stream `src_item` into the stream at
+        # `key`.
+        begin
+          check_can_add_copy(src_item, key)
+          add_copy(src_item, key)
+        rescue Errno::ENOTEMPTY
+          dest = self[key]
+          error = nil
+          # Copy as much as possible, then raise any error encountered.
+          # Start with streams for a depth-first merge.
+          src_items = src_item.items.each_pair.sort_by do |_, sub_item|
+            (sub_item.file?) ? 1 : 0
+          end
+          src_items.each do |sub_key, sub_item|
+            begin
+              dest.merge(sub_item, sub_key)
+            rescue Errno::ENOTDIR => error
+            end
+          end
+          raise error unless error.nil?
+        end
+      end
+
+      def copy_named(copy_path)
+        copy = self.class.new(copy_path)
+        items.each_pair do |key, item|
+          copy.add_copy(item, key)
+        end
+        copy
+      end
+
+      protected
+
+      attr_reader :items
+
+      private
+
+      def []=(key, item)
+        items[key] = item
+      end
+
+      def get_or_new(key, klass)
+        # Return the collection item at `key` and ensure that it's a `klass`.
+        # If `key` does not exist, create a new `klass` there.
+        # If the value for `key` is not a `klass`, raise an ArgumentError.
+        item = items[key]
+        if item.nil?
+          self[key] = klass.new("#{path}/#{key}")
+        elsif not item.is_a?(klass)
+          raise ArgumentError.
+            new("in stream %p, %p is a %s, not a %s" %
+                [path, key, items[key].class.human_name, klass.human_name])
+        else
+          item
+        end
+      end
+    end
+
+    class CollectionRoot < CollectionStream
+      def initialize
+        super("")
+        setup
+      end
+
+      def delete(name, opts={})
+        super
+        # If that didn't fail, it deleted the . stream.  Recreate it.
+        setup
+      end
+
+      def check_can_merge(src_item, key)
+        if items.include?(key)
+          super
+        else
+          raise_root_write_error(key)
+        end
+      end
+
+      private
+
+      def setup
+        items["."] = CollectionStream.new(".")
+      end
+
+      def raise_root_write_error(key)
+        raise ArgumentError.new("can't write to %p at collection root" % key)
+      end
+
+      def []=(key, item)
+        raise_root_write_error(key)
+      end
+    end
+
+    class StreamManifest
+      # Build a manifest text for a single stream, without substreams.
+      # The manifest includes files in the order they're added.  If you want
+      # a normalized manifest, add files in lexical order by name.
+
+      def initialize(name)
+        @name = name
+        @loc_ranges = {}
+        @loc_range_start = 0
+        @file_specs = []
+      end
+
+      def add_file(coll_file)
+        coll_file.each_segment do |segment|
+          extend_locator_ranges(segment.locators)
+          extend_file_specs(coll_file.name, segment)
+        end
+      end
+
+      def to_s
+        if @file_specs.empty?
+          ""
+        else
+          "%s %s %s\n" % [escape_name(@name),
+                          @loc_ranges.keys.join(" "),
+                          @file_specs.join(" ")]
+        end
+      end
+
+      private
+
+      def extend_locator_ranges(locators)
+        locators.
+            select { |loc_s| not @loc_ranges.include?(loc_s) }.
+            each do |loc_s|
+          @loc_ranges[loc_s] = LocatorRange.new(loc_s, @loc_range_start)
+          @loc_range_start = @loc_ranges[loc_s].end
+        end
+      end
+
+      def extend_file_specs(filename, segment)
+        # Given a filename and a LocatorSegment, add the smallest
+        # possible array of file spec strings to @file_specs that
+        # builds the file from available locators.
+        filename = escape_name(filename)
+        start_pos = segment.start_pos
+        length = segment.length
+        start_loc = segment.locators.first
+        prev_loc = start_loc
+        # Build a list of file specs by iterating through the segment's
+        # locators and preparing a file spec for each contiguous range.
+        segment.locators[1..-1].each do |loc_s|
+          range = @loc_ranges[loc_s]
+          if range.begin != @loc_ranges[prev_loc].end
+            range_start, range_length =
+              start_and_length_at(start_loc, prev_loc, start_pos, length)
+            @file_specs << "#{range_start}:#{range_length}:#{filename}"
+            start_pos = 0
+            length -= range_length
+            start_loc = loc_s
+          end
+          prev_loc = loc_s
+        end
+        range_start, range_length =
+          start_and_length_at(start_loc, prev_loc, start_pos, length)
+        @file_specs << "#{range_start}:#{range_length}:#{filename}"
+      end
+
+      def escape_name(name)
+        name.gsub(/\\/, "\\\\\\\\").gsub(/\s/) do |s|
+          s.each_byte.map { |c| "\\%03o" % c }.join("")
+        end
+      end
+
+      def start_and_length_at(start_key, end_key, start_pos, length)
+        range_begin = @loc_ranges[start_key].begin + start_pos
+        range_length = [@loc_ranges[end_key].end - range_begin, length].min
+        [range_begin, range_length]
+      end
+    end
+  end
+end
diff --git a/sdk/ruby/test/sdk_fixtures.rb b/sdk/ruby/test/sdk_fixtures.rb
index 8af81cd..52d7377 100644
--- a/sdk/ruby/test/sdk_fixtures.rb
+++ b/sdk/ruby/test/sdk_fixtures.rb
@@ -7,6 +7,10 @@ module SDKFixtures
     def random_block(size=nil)
       sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
     end
+
+    def random_blocks(count, size=nil)
+      (0...count).map { |_| random_block(size) }
+    end
   end
 
   extend StaticMethods
@@ -50,4 +54,12 @@ module SDKFixtures
   NONNORMALIZED_MANIFEST =
     ["./dir2 #{random_block} 0:0:z 0:0:y 0:0:x",
      "./dir1 #{random_block} 0:0:p 0:0:o 0:0:n\n"].join("\n")
+
+  ### Non-tree manifests
+  # These manifests follow the spec, but they express a structure that can't
+  # can't be represented by a POSIX filesystem tree.  For example, there's a
+  # name conflict between a stream and a filename.
+  NAME_CONFLICT_MANIFEST =
+    [". #{random_block(9)} 0:9:conflict",
+     "./conflict #{random_block} 0:0:name\n"].join("\n")
 end
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb
new file mode 100644
index 0000000..3dd1ab3
--- /dev/null
+++ b/sdk/ruby/test/test_collection.rb
@@ -0,0 +1,641 @@
+require "arvados/collection"
+require "minitest/autorun"
+require "sdk_fixtures"
+
+class CollectionTest < Minitest::Test
+  include SDKFixtures
+
+  TWO_BY_TWO_BLOCKS = SDKFixtures.random_blocks(2, 9)
+  TWO_BY_TWO_MANIFEST_A =
+    [". #{TWO_BY_TWO_BLOCKS.first} 0:5:f1 5:4:f2\n",
+     "./s1 #{TWO_BY_TWO_BLOCKS.last} 0:5:f1 5:4:f3\n"]
+  TWO_BY_TWO_MANIFEST_S = TWO_BY_TWO_MANIFEST_A.join("")
+
+  ### .new
+
+  def test_empty_construction
+    coll = Arv::Collection.new
+    assert_equal("", coll.manifest_text)
+  end
+
+  def test_successful_construction
+    [:SIMPLEST_MANIFEST, :MULTIBLOCK_FILE_MANIFEST, :MULTILEVEL_MANIFEST].
+        each do |manifest_name|
+      manifest_text = SDKFixtures.const_get(manifest_name)
+      coll = Arv::Collection.new(manifest_text)
+      assert_equal(manifest_text, coll.manifest_text,
+                   "did not get same manifest back out from #{manifest_name}")
+    end
+  end
+
+  def test_non_manifest_construction_error
+    ["word", ". abc def", ". #{random_block} 0:", ". / !"].each do |m_text|
+      assert_raises(ArgumentError,
+                    "built collection from manifest #{m_text.inspect}") do
+        Arv::Collection.new(m_text)
+      end
+    end
+  end
+
+  def test_file_directory_conflict_construction_error
+    assert_raises(ArgumentError) do
+      Arv::Collection.new(NAME_CONFLICT_MANIFEST)
+    end
+  end
+
+  def test_no_implicit_normalization
+    coll = Arv::Collection.new(NONNORMALIZED_MANIFEST)
+    assert_equal(NONNORMALIZED_MANIFEST, coll.manifest_text)
+  end
+
+  ### .normalize
+
+  def test_non_posix_path_handling
+    m_text = "./.. #{random_block(9)} 0:5:. 5:4:..\n"
+    coll = Arv::Collection.new(m_text.dup)
+    coll.normalize
+    assert_equal(m_text, coll.manifest_text)
+  end
+
+  def test_escaping_through_normalization
+    coll = Arv::Collection.new(MANY_ESCAPES_MANIFEST)
+    coll.normalize
+    # The result should simply duplicate the file spec.
+    # The source file spec has an unescaped backslash in it.
+    # It's OK for the Collection class to properly escape that.
+    expect_text = MANY_ESCAPES_MANIFEST.sub(/ \d+:\d+:\S+/) do |file_spec|
+      file_spec.gsub(/([^\\])(\\[^\\\d])/, '\1\\\\\2')
+    end
+    assert_equal(expect_text, coll.manifest_text)
+  end
+
+  def test_concatenation_with_locator_overlap(over_index=0)
+    blocks = random_blocks(4, 2)
+    blocks_s = blocks.join(" ")
+    coll = Arv::Collection.new(". %s 0:8:file\n. %s 0:4:file\n" %
+                               [blocks_s, blocks[over_index, 2].join(" ")])
+    coll.normalize
+    assert_equal(". #{blocks_s} 0:8:file #{over_index * 2}:4:file\n",
+                 coll.manifest_text)
+  end
+
+  def test_concatenation_with_middle_locator_overlap
+    test_concatenation_with_locator_overlap(1)
+  end
+
+  def test_concatenation_with_end_locator_overlap
+    test_concatenation_with_locator_overlap(2)
+  end
+
+  def test_concatenation_with_partial_locator_overlap
+    blocks = random_blocks(3, 3)
+    coll = Arv::Collection
+      .new(". %s 0:6:overlap\n. %s 0:6:overlap\n" %
+           [blocks[0, 2].join(" "), blocks[1, 2].join(" ")])
+    coll.normalize
+    assert_equal(". #{blocks.join(' ')} 0:6:overlap 3:6:overlap\n",
+                 coll.manifest_text)
+  end
+
+  def test_normalize
+    block = random_block
+    coll = Arv::Collection.new(". #{block} 0:0:f2 0:0:f1\n")
+    coll.normalize
+    assert_equal(". #{block} 0:0:f1 0:0:f2\n", coll.manifest_text)
+  end
+
+  def test_normalization_file_spans_two_whole_blocks(file_specs="0:10:f1",
+                                                     num_blocks=2)
+    blocks = random_blocks(num_blocks, 5)
+    m_text = ". #{blocks.join(' ')} #{file_specs}\n"
+    coll = Arv::Collection.new(m_text.dup)
+    coll.normalize
+    assert_equal(m_text, coll.manifest_text)
+  end
+
+  def test_normalization_file_fits_beginning_block
+    test_normalization_file_spans_two_whole_blocks("0:7:f1")
+  end
+
+  def test_normalization_file_fits_end_block
+    test_normalization_file_spans_two_whole_blocks("3:7:f1")
+  end
+
+  def test_normalization_file_spans_middle
+    test_normalization_file_spans_two_whole_blocks("3:5:f1")
+  end
+
+  def test_normalization_file_spans_three_whole_blocks
+    test_normalization_file_spans_two_whole_blocks("0:15:f1", 3)
+  end
+
+  def test_normalization_file_skips_bytes
+    test_normalization_file_spans_two_whole_blocks("0:3:f1 5:5:f1")
+  end
+
+  def test_normalization_file_inserts_bytes
+    test_normalization_file_spans_two_whole_blocks("0:3:f1 5:3:f1 3:2:f1")
+  end
+
+  def test_normalization_file_duplicates_bytes
+    test_normalization_file_spans_two_whole_blocks("2:3:f1 2:3:f1", 1)
+  end
+
+  def test_normalization_dedups_locators
+    blocks = random_blocks(2, 5)
+    coll = Arv::Collection.new(". %s %s 1:8:f1 11:8:f1\n" %
+                               [blocks.join(" "), blocks.reverse.join(" ")])
+    coll.normalize
+    assert_equal(". #{blocks.join(' ')} 1:8:f1 6:4:f1 0:4:f1\n",
+                 coll.manifest_text)
+  end
+
+  ### .cp_r
+
+  def test_simple_file_copy
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new")
+    assert_equal(SIMPLEST_MANIFEST.sub(" 0:9:", " 0:9:new 0:9:"),
+                 coll.manifest_text)
+  end
+
+  def test_copy_file_into_other_stream(target="./s1/f2", basename="f2")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./f2", target)
+    expected = "%s./s1 %s 0:5:f1 14:4:%s 5:4:f3\n" %
+      [TWO_BY_TWO_MANIFEST_A.first,
+       TWO_BY_TWO_BLOCKS.reverse.join(" "), basename]
+    assert_equal(expected, coll.manifest_text)
+  end
+
+  def test_implicit_copy_file_into_other_stream
+    test_copy_file_into_other_stream("./s1")
+  end
+
+  def test_copy_file_into_other_stream_with_new_name
+    test_copy_file_into_other_stream("./s1/f2a", "f2a")
+  end
+
+  def test_copy_file_over_in_other_stream(target="./s1/f1")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./f1", target)
+    expected = "%s./s1 %s 0:5:f1 14:4:f3\n" %
+      [TWO_BY_TWO_MANIFEST_A.first, TWO_BY_TWO_BLOCKS.join(" ")]
+    assert_equal(expected, coll.manifest_text)
+  end
+
+  def test_implicit_copy_file_over_in_other_stream
+    test_copy_file_over_in_other_stream("./s1")
+  end
+
+  def test_simple_stream_copy
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./s1", "./sNew")
+    new_line = TWO_BY_TWO_MANIFEST_A.last.sub("./s1 ", "./sNew ")
+    assert_equal(TWO_BY_TWO_MANIFEST_S + new_line, coll.manifest_text)
+  end
+
+  def test_copy_stream_into_other_stream(target="./dir2/subdir",
+                                         basename="subdir")
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.cp_r("./dir1/subdir", target)
+    new_line = MULTILEVEL_MANIFEST.lines[4].sub("./dir1/subdir ",
+                                                "./dir2/#{basename} ")
+    assert_equal(MULTILEVEL_MANIFEST + new_line, coll.manifest_text)
+  end
+
+  def test_implicit_copy_stream_into_other_stream
+    test_copy_stream_into_other_stream("./dir2")
+  end
+
+  def test_copy_stream_into_other_stream_with_new_name
+    test_copy_stream_into_other_stream("./dir2/newsub", "newsub")
+  end
+
+  def test_copy_stream_over_empty_stream
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    (1..3).each do |file_num|
+      coll.rm("./dir0/subdir/file#{file_num}")
+    end
+    coll.cp_r("./dir1/subdir", "./dir0")
+    expected = MULTILEVEL_MANIFEST.lines
+    expected[2] = expected[4].sub("./dir1/", "./dir0/")
+    assert_equal(expected.join(""), coll.manifest_text)
+  end
+
+  def test_copy_stream_over_file_raises_ENOTDIR
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    assert_raises(Errno::ENOTDIR) do
+      coll.cp_r("./s1", "./f2")
+    end
+  end
+
+  def test_copy_stream_over_nonempty_stream_merges_and_overwrites
+    blocks = random_blocks(3, 9)
+    manifest_a =
+      ["./subdir #{blocks[0]} 0:1:s1 1:2:zero\n",
+       "./zdir #{blocks[1]} 0:9:zfile\n",
+       "./zdir/subdir #{blocks[2]} 0:1:s2 1:2:zero\n"]
+    coll = Arv::Collection.new(manifest_a.join(""))
+    coll.cp_r("./subdir", "./zdir")
+    manifest_a[2] = "./zdir/subdir %s %s 0:1:s1 9:1:s2 1:2:zero\n" %
+      [blocks[0], blocks[2]]
+    assert_equal(manifest_a.join(""), coll.manifest_text)
+  end
+
+  def test_copy_stream_into_substream(source="./dir1",
+                                      target="./dir1/subdir/dir1")
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.cp_r(source, target)
+    expected = MULTILEVEL_MANIFEST.lines.flat_map do |line|
+      [line, line.gsub(/^#{Regexp.escape(source)}([\/ ])/, "#{target}\\1")].uniq
+    end
+    assert_equal(expected.sort.join(""), coll.manifest_text)
+  end
+
+  def test_copy_root
+    test_copy_stream_into_substream(".", "./root")
+  end
+
+  def test_adding_to_root_after_copy
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r(".", "./root")
+    src_coll = Arv::Collection.new(COLON_FILENAME_MANIFEST)
+    coll.cp_r("./file:test.txt", ".", src_coll)
+    got_lines = coll.manifest_text.lines
+    assert_equal(2, got_lines.size)
+    assert_match(/^\. \S{33,} \S{33,} 0:9:file:test\.txt 9:9:simple\.txt\n/,
+                 got_lines.first)
+    assert_equal(SIMPLEST_MANIFEST.sub(". ", "./root "), got_lines.last)
+  end
+
+  def test_copy_chaining
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./a").cp_r("./a", "./b")
+    assert_equal(SIMPLEST_MANIFEST.sub(" 0:9:", " 0:9:a 0:9:b 0:9:"),
+                 coll.manifest_text)
+  end
+
+  def prep_two_collections_for_copy(src_stream, dst_stream)
+    blocks = random_blocks(2, 8)
+    src_text = "#{src_stream} #{blocks.first} 0:8:f1\n"
+    dst_text = "#{dst_stream} #{blocks.last} 0:8:f2\n"
+    return [blocks, src_text, dst_text,
+            Arv::Collection.new(src_text.dup),
+            Arv::Collection.new(dst_text.dup)]
+  end
+
+  def test_copy_file_from_other_collection(src_stream=".", dst_stream="./s1")
+    blocks, src_text, dst_text, src_coll, dst_coll =
+      prep_two_collections_for_copy(src_stream, dst_stream)
+    dst_coll.cp_r("#{src_stream}/f1", dst_stream, src_coll)
+    assert_equal("#{dst_stream} #{blocks.join(' ')} 0:8:f1 8:8:f2\n",
+                 dst_coll.manifest_text)
+    assert_equal(src_text, src_coll.manifest_text)
+  end
+
+  def test_copy_file_from_other_collection_to_root
+    test_copy_file_from_other_collection("./s1", ".")
+  end
+
+  def test_copy_stream_from_other_collection
+    blocks, src_text, dst_text, src_coll, dst_coll =
+      prep_two_collections_for_copy("./s2", "./s1")
+    dst_coll.cp_r("./s2", "./s1", src_coll)
+    assert_equal(dst_text + src_text.sub("./s2 ", "./s1/s2 "),
+                 dst_coll.manifest_text)
+    assert_equal(src_text, src_coll.manifest_text)
+  end
+
+  def test_copy_stream_from_other_collection_to_root
+    blocks, src_text, dst_text, src_coll, dst_coll =
+      prep_two_collections_for_copy("./s1", ".")
+    dst_coll.cp_r("./s1", ".", src_coll)
+    assert_equal(dst_text + src_text, dst_coll.manifest_text)
+    assert_equal(src_text, src_coll.manifest_text)
+  end
+
+  def test_copy_stream_contents
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.cp_r("./dir0/subdir/", "./dir1/subdir")
+    expect_lines = MULTILEVEL_MANIFEST.lines
+    expect_lines[4] = expect_lines[2].sub("./dir0/", "./dir1/")
+    assert_equal(expect_lines.join(""), coll.manifest_text)
+  end
+
+  def test_copy_stream_contents_into_root
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./s1/", ".")
+    assert_equal(". %s 0:5:f1 14:4:f2 5:4:f3\n%s" %
+                 [TWO_BY_TWO_BLOCKS.reverse.join(" "),
+                  TWO_BY_TWO_MANIFEST_A.last],
+                 coll.manifest_text)
+  end
+
+  def test_copy_root_contents_into_stream
+    # This is especially fun, because we're copying a parent into its child.
+    # Make sure that happens depth-first.
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.cp_r("./", "./s1")
+    assert_equal("%s./s1 %s 0:5:f1 5:4:f2 14:4:f3\n%s" %
+                 [TWO_BY_TWO_MANIFEST_A.first, TWO_BY_TWO_BLOCKS.join(" "),
+                  TWO_BY_TWO_MANIFEST_A.last.sub("./s1 ", "./s1/s1 ")],
+                 coll.manifest_text)
+  end
+
+  def test_copy_stream_contents_across_collections
+    block = random_block(8)
+    src_coll = Arv::Collection.new("./s1 #{block} 0:8:f1\n")
+    dst_coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    dst_coll.cp_r("./s1/", "./s1", src_coll)
+    assert_equal("%s./s1 %s %s 0:8:f1 13:4:f3\n" %
+                 [TWO_BY_TWO_MANIFEST_A.first, block, TWO_BY_TWO_BLOCKS.last],
+                 dst_coll.manifest_text)
+  end
+
+  def test_copy_root_contents_across_collections
+    block = random_block(8)
+    src_coll = Arv::Collection.new(". #{block} 0:8:f1\n")
+    dst_coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    dst_coll.cp_r("./", ".", src_coll)
+    assert_equal(". %s %s 0:8:f1 13:4:f2\n%s" %
+                 [block, TWO_BY_TWO_BLOCKS.first, TWO_BY_TWO_MANIFEST_A.last],
+                 dst_coll.manifest_text)
+  end
+
+  def test_copy_empty_source_path_raises_ArgumentError(src="", dst="./s1")
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    assert_raises(ArgumentError) do
+      coll.cp_r(src, dst)
+    end
+  end
+
+  def test_copy_empty_destination_path_raises_ArgumentError
+    test_copy_empty_source_path_raises_ArgumentError(".", "")
+  end
+
+  ### .rename
+
+  def test_simple_file_rename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rename("./simple.txt", "./new")
+    assert_equal(SIMPLEST_MANIFEST.sub(":simple.txt", ":new"),
+                 coll.manifest_text)
+  end
+
+  def test_rename_file_into_other_stream(target="./s1/f2", basename="f2")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.rename("./f2", target)
+    expected = ". %s 0:5:f1\n./s1 %s 0:5:f1 14:4:%s 5:4:f3\n" %
+      [TWO_BY_TWO_BLOCKS.first,
+       TWO_BY_TWO_BLOCKS.reverse.join(" "), basename]
+    assert_equal(expected, coll.manifest_text)
+  end
+
+  def test_implicit_rename_file_into_other_stream
+    test_rename_file_into_other_stream("./s1")
+  end
+
+  def test_rename_file_into_other_stream_with_new_name
+    test_rename_file_into_other_stream("./s1/f2a", "f2a")
+  end
+
+  def test_rename_file_over_in_other_stream(target="./s1/f1")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.rename("./f1", target)
+    expected = ". %s 5:4:f2\n./s1 %s 0:5:f1 14:4:f3\n" %
+      [TWO_BY_TWO_BLOCKS.first, TWO_BY_TWO_BLOCKS.join(" ")]
+    assert_equal(expected, coll.manifest_text)
+  end
+
+  def test_implicit_rename_file_over_in_other_stream
+    test_rename_file_over_in_other_stream("./s1")
+  end
+
+  def test_simple_stream_rename
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    coll.rename("./s1", "./newS")
+    assert_equal(TWO_BY_TWO_MANIFEST_S.sub("\n./s1 ", "\n./newS "),
+                 coll.manifest_text)
+  end
+
+  def test_rename_stream_into_other_stream(target="./dir2/subdir",
+                                           basename="subdir")
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.rename("./dir1/subdir", target)
+    expected = MULTILEVEL_MANIFEST.lines
+    replaced_line = expected.delete_at(4)
+    expected << replaced_line.sub("./dir1/subdir ", "./dir2/#{basename} ")
+    assert_equal(expected.join(""), coll.manifest_text)
+  end
+
+  def test_implicit_rename_stream_into_other_stream
+    test_rename_stream_into_other_stream("./dir2")
+  end
+
+  def test_rename_stream_into_other_stream_with_new_name
+    test_rename_stream_into_other_stream("./dir2/newsub", "newsub")
+  end
+
+  def test_rename_stream_over_empty_stream
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    (1..3).each do |file_num|
+      coll.rm("./dir0/subdir/file#{file_num}")
+    end
+    coll.rename("./dir1/subdir", "./dir0")
+    expected = MULTILEVEL_MANIFEST.lines
+    expected[2] = expected.delete_at(4).sub("./dir1/", "./dir0/")
+    assert_equal(expected.sort.join(""), coll.manifest_text)
+  end
+
+  def test_rename_stream_over_file_raises_ENOTDIR
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    assert_raises(Errno::ENOTDIR) do
+      coll.rename("./s1", "./f2")
+    end
+  end
+
+  def test_rename_stream_over_nonempty_stream_raises_ENOTEMPTY
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    assert_raises(Errno::ENOTEMPTY) do
+      coll.rename("./dir1/subdir", "./dir0")
+    end
+  end
+
+  def test_rename_stream_into_substream(source="./dir1",
+                                        target="./dir1/subdir/dir1")
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.rename(source, target)
+    assert_equal(MULTILEVEL_MANIFEST.gsub(/^#{Regexp.escape(source)}([\/ ])/m,
+                                          "#{target}\\1"),
+                 coll.manifest_text)
+  end
+
+  def test_rename_root
+    test_rename_stream_into_substream(".", "./root")
+  end
+
+  def test_adding_to_root_after_rename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rename(".", "./root")
+    src_coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", ".", src_coll)
+    assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./root "),
+                 coll.manifest_text)
+  end
+
+  def test_rename_chaining
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rename("./simple.txt", "./x").rename("./x", "./simple.txt")
+    assert_equal(SIMPLEST_MANIFEST, coll.manifest_text)
+  end
+
+  ### .rm
+
+  def test_simple_remove
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S.dup)
+    coll.rm("./f2")
+    assert_equal(TWO_BY_TWO_MANIFEST_S.sub(" 5:4:f2", ""), coll.manifest_text)
+  end
+
+  def empty_stream_and_assert(expect_index=0)
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    yield coll
+    assert_equal(TWO_BY_TWO_MANIFEST_A[expect_index], coll.manifest_text)
+  end
+
+  def test_remove_all_files_in_substream
+    empty_stream_and_assert do |coll|
+      coll.rm("./s1/f1")
+      coll.rm("./s1/f3")
+    end
+  end
+
+  def test_remove_all_files_in_root_stream
+    empty_stream_and_assert(1) do |coll|
+      coll.rm("./f1")
+      coll.rm("./f2")
+    end
+  end
+
+  def test_chaining_removes
+    empty_stream_and_assert do |coll|
+      coll.rm("./s1/f1").rm("./s1/f3")
+    end
+  end
+
+  def test_remove_last_file
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rm("./simple.txt")
+    assert_equal("", coll.manifest_text)
+  end
+
+  def test_remove_nonexistent_file_raises_ENOENT(path="./NoSuchFile",
+                                                 method=:rm)
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    assert_raises(Errno::ENOENT) do
+      coll.send(method, path)
+    end
+  end
+
+  def test_remove_from_nonexistent_stream_raises_ENOENT
+    test_remove_nonexistent_file_raises_ENOENT("./NoSuchStream/simple.txt")
+  end
+
+  def test_remove_stream_raises_EISDIR(path="./s1")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    assert_raises(Errno::EISDIR) do
+      coll.rm(path)
+    end
+  end
+
+  def test_remove_root_raises_EISDIR
+    test_remove_stream_raises_EISDIR(".")
+  end
+
+  def test_remove_empty_string_raises_ArgumentError
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    assert_raises(ArgumentError) do
+      coll.rm("")
+    end
+  end
+
+  ### rm_r
+
+  def test_recursive_remove
+    empty_stream_and_assert do |coll|
+      coll.rm_r("./s1")
+    end
+  end
+
+  def test_recursive_remove_on_files
+    empty_stream_and_assert do |coll|
+      coll.rm_r("./s1/f1")
+      coll.rm_r("./s1/f3")
+    end
+  end
+
+  def test_recursive_remove_root
+    coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
+    coll.rm_r(".")
+    assert_equal("", coll.manifest_text)
+  end
+
+  def test_rm_r_nonexistent_file_raises_ENOENT(path="./NoSuchFile")
+    test_remove_nonexistent_file_raises_ENOENT("./NoSuchFile", :rm_r)
+  end
+
+  def test_rm_r_from_nonexistent_stream_raises_ENOENT
+    test_remove_nonexistent_file_raises_ENOENT("./NoSuchStream/file", :rm_r)
+  end
+
+  def test_rm_r_empty_string_raises_ArgumentError
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    assert_raises(ArgumentError) do
+      coll.rm_r("")
+    end
+  end
+
+  ### .modified?
+
+  def test_new_collection_unmodified(*args)
+    coll = Arv::Collection.new(*args)
+    yield coll if block_given?
+    refute(coll.modified?)
+  end
+
+  def test_collection_unmodified_after_instantiation
+    test_new_collection_unmodified(SIMPLEST_MANIFEST)
+  end
+
+  def test_collection_unmodified_after_mark
+    test_new_collection_unmodified(SIMPLEST_MANIFEST) do |coll|
+      coll.cp_r("./simple.txt", "./copy")
+      coll.unmodified
+    end
+  end
+
+  def check_collection_modified
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    yield coll
+    assert(coll.modified?)
+  end
+
+  def test_collection_modified_after_copy
+    check_collection_modified do |coll|
+      coll.cp_r("./simple.txt", "./copy")
+    end
+  end
+
+  def test_collection_modified_after_remove
+    check_collection_modified do |coll|
+      coll.rm("./simple.txt")
+    end
+  end
+
+  def test_collection_modified_after_rename
+    check_collection_modified do |coll|
+      coll.rename("./simple.txt", "./newname")
+    end
+  end
+end

commit 5fd7a6b976b93d15a3a66c7610fc4d8b824bb31e
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Mar 5 12:44:02 2015 -0500

    5104: Refactor Ruby SDK test infrastructure into sharable module.

diff --git a/sdk/ruby/test/sdk_fixtures.rb b/sdk/ruby/test/sdk_fixtures.rb
new file mode 100644
index 0000000..8af81cd
--- /dev/null
+++ b/sdk/ruby/test/sdk_fixtures.rb
@@ -0,0 +1,53 @@
+require "yaml"
+
+module SDKFixtures
+  module StaticMethods
+    # SDKFixtures will use these as class methods, and install them as
+    # instance methods on the test classes.
+    def random_block(size=nil)
+      sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
+    end
+  end
+
+  extend StaticMethods
+
+  def self.included(base)
+    base.include(StaticMethods)
+  end
+
+  @@fixtures = {}
+  def fixtures name
+    @@fixtures[name] ||=
+      begin
+        path = File.
+          expand_path("../../../../services/api/test/fixtures/#{name}.yml",
+                      __FILE__)
+        file = IO.read(path)
+        trim_index = file.index('# Test Helper trims the rest of the file')
+        file = file[0, trim_index] if trim_index
+        YAML.load(file)
+      end
+  end
+
+  ### Valid manifests
+  SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
+  MULTIBLOCK_FILE_MANIFEST =
+    [". #{random_block(8)} 0:4:repfile 4:4:uniqfile",
+     "./s1 #{random_block(6)} 0:3:repfile 3:3:uniqfile",
+     ". #{random_block(8)} 0:7:uniqfile2 7:1:repfile\n"].join("\n")
+  MULTILEVEL_MANIFEST =
+    [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir0 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir0/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
+  COLON_FILENAME_MANIFEST = ". #{random_block(9)} 0:9:file:test.txt\n"
+  # Filename is `a a.txt`.
+  ESCAPED_FILENAME_MANIFEST = ". #{random_block(9)} 0:9:a\\040\\141.txt\n"
+  MANY_ESCAPES_MANIFEST =
+    "./dir\\040name #{random_block(9)} 0:9:file\\\\name\\011\\here.txt\n"
+  NONNORMALIZED_MANIFEST =
+    ["./dir2 #{random_block} 0:0:z 0:0:y 0:0:x",
+     "./dir1 #{random_block} 0:0:p 0:0:o 0:0:n\n"].join("\n")
+end
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb
index 8ad8134..2ad0941 100644
--- a/sdk/ruby/test/test_keep_manifest.rb
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -1,22 +1,9 @@
-require "minitest/autorun"
 require "arvados/keep"
-require "yaml"
-
-def random_block(size=nil)
-  sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
-end
+require "minitest/autorun"
+require "sdk_fixtures"
 
 class ManifestTest < Minitest::Test
-  SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
-  MULTIBLOCK_FILE_MANIFEST =
-    [". #{random_block(8)} 0:4:repfile 4:4:uniqfile",
-     "./s1 #{random_block(6)} 0:3:repfile 3:3:uniqfile",
-     ". #{random_block(8)} 0:7:uniqfile2 7:1:repfile\n"].join("\n")
-  MULTILEVEL_MANIFEST =
-    [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
-     "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
-     "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
-     "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
+  include SDKFixtures
 
   def check_stream(stream, exp_name, exp_blocks, exp_files)
     assert_equal(exp_name, stream.first)
@@ -51,7 +38,8 @@ class ManifestTest < Minitest::Test
       seen << stream
       assert_equal(3, files.size, "wrong file count for stream #{stream}")
     end
-    assert_equal(4, seen.size, "wrong number of streams")
+    assert_equal(MULTILEVEL_MANIFEST.count("\n"), seen.size,
+                 "wrong number of streams")
   end
 
   def test_empty_each_line
@@ -87,13 +75,12 @@ class ManifestTest < Minitest::Test
   end
 
   def test_backslash_escape_parsing
-    m_text = "./dir\\040name #{random_block} 0:0:file\\\\name\\011\\here.txt\n"
-    manifest = Keep::Manifest.new(m_text)
+    manifest = Keep::Manifest.new(MANY_ESCAPES_MANIFEST)
     streams = manifest.each_line.to_a
     assert_equal(1, streams.size, "wrong number of streams with whitespace")
     assert_equal("./dir name", streams.first.first,
                  "wrong stream name with whitespace")
-    assert_equal(["0:0:file\\name\t\\here.txt"], streams.first.last,
+    assert_equal(["0:9:file\\name\t\\here.txt"], streams.first.last,
                  "wrong filename(s) with whitespace")
   end
 
@@ -118,12 +105,12 @@ class ManifestTest < Minitest::Test
   end
 
   def test_files_with_colons_in_names
-    manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:file:test.txt\n")
+    manifest = Keep::Manifest.new(COLON_FILENAME_MANIFEST)
     assert_equal([[".", "file:test.txt", 9]], manifest.files)
   end
 
   def test_files_with_escape_sequence_in_filename
-    manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:a\\040\\141.txt\n")
+    manifest = Keep::Manifest.new(ESCAPED_FILENAME_MANIFEST)
     assert_equal([[".", "a a.txt", 9]], manifest.files)
   end
 
@@ -177,11 +164,11 @@ class ManifestTest < Minitest::Test
   end
 
   def test_has_file_with_spaces
-    manifest = Keep::Manifest.new(". #{random_block(3)} 0:3:a\\040b\\040c\n")
-    assert(manifest.has_file?("./a b c"), "one-arg 'a b c' not found")
-    assert(manifest.has_file?(".", "a b c"), "two-arg 'a b c' not found")
-    refute(manifest.has_file?("a\\040b\\040c"), "one-arg unescaped found")
-    refute(manifest.has_file?(".", "a\\040b\\040c"), "two-arg unescaped found")
+    manifest = Keep::Manifest.new(ESCAPED_FILENAME_MANIFEST)
+    assert(manifest.has_file?("./a a.txt"), "one-arg path not found")
+    assert(manifest.has_file?(".", "a a.txt"), "two-arg path not found")
+    refute(manifest.has_file?("a\\040\\141"), "one-arg unescaped found")
+    refute(manifest.has_file?(".", "a\\040\\141"), "two-arg unescaped found")
   end
 
   def test_parse_all_fixtures
@@ -208,15 +195,4 @@ class ManifestTest < Minitest::Test
       assert !file_name.empty?, "empty file_name in #{name} fixture"
     end
   end
-
-  @@fixtures = nil
-  def fixtures name
-    return @@fixtures if @@fixtures
-    path = File.expand_path("../../../../services/api/test/fixtures/#{name}.yml",
-                            __FILE__)
-    file = IO.read(path)
-    trim_index = file.index('# Test Helper trims the rest of the file')
-    file = file[0, trim_index] if trim_index
-    @@fixtures = YAML.load(file)
-  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list