[ARVADOS] updated: a8ef8be836b1805b4e35922ea787caad7b81f689

git at public.curoverse.com git at public.curoverse.com
Mon Apr 13 13:22:46 EDT 2015


Summary of changes:
 apps/workbench/Gemfile                             |   2 +-
 apps/workbench/Gemfile.lock                        |   4 +-
 .../app/controllers/actions_controller.rb          | 202 +++++++++------------
 .../views/application/_title_and_buttons.html.erb  |  24 +--
 .../test/controllers/actions_controller_test.rb    |  69 ++++---
 sdk/ruby/lib/arvados/collection.rb                 |  55 ++++--
 sdk/ruby/test/test_collection.rb                   |  87 ++++++++-
 7 files changed, 266 insertions(+), 177 deletions(-)

       via  a8ef8be836b1805b4e35922ea787caad7b81f689 (commit)
       via  43aa02f5af636f874bac5ffe96cff0061bcd6a44 (commit)
       via  ee60fe9a20238dfac97dd988380c0149a84c372a (commit)
       via  ef2572886ea8693f16316cbfe537053f106a2bb5 (commit)
       via  95298001afb4cb471d16bd181a8487ebe58bb0d4 (commit)
       via  0acda438a00257099b07141f21ad18cf92f03355 (commit)
       via  8893e8cf0dfd542508e9e45d715ee0165b249bd8 (commit)
       via  c17712a6fd8264e3b9fecfedc5124a9b6df00a14 (commit)
      from  ec8879aa56810dfc6475ad8cdc56770ff91f84f2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit a8ef8be836b1805b4e35922ea787caad7b81f689
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Apr 13 13:22:37 2015 -0400

    5614: Update Workbench Gemfile for new Ruby SDK.
    
    Refs #5614.

diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile
index b51f674..91c21f1 100644
--- a/apps/workbench/Gemfile
+++ b/apps/workbench/Gemfile
@@ -1,7 +1,7 @@
 source 'https://rubygems.org'
 
 gem 'rails', '~> 4.1.0'
-gem 'arvados', '>= 0.1.20150313191637'
+gem 'arvados', '>= 0.1.20150413172135'
 
 gem 'sqlite3'
 
diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock
index 19b2857..59d72f4 100644
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -40,7 +40,7 @@ GEM
     andand (1.3.3)
     angularjs-rails (1.3.8)
     arel (5.0.1.20140414130214)
-    arvados (0.1.20150313191637)
+    arvados (0.1.20150413172135)
       activesupport (>= 3.2.13)
       andand (~> 1.3, >= 1.3.3)
       google-api-client (~> 0.6.3, >= 0.6.3)
@@ -258,7 +258,7 @@ DEPENDENCIES
   RedCloth
   andand
   angularjs-rails
-  arvados (>= 0.1.20150313191637)
+  arvados (>= 0.1.20150413172135)
   bootstrap-sass (~> 3.1.0)
   bootstrap-tab-history-rails
   bootstrap-x-editable-rails

commit 43aa02f5af636f874bac5ffe96cff0061bcd6a44
Merge: ec8879a ee60fe9
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Apr 13 13:21:15 2015 -0400

    Merge branch '5614-workbench-optimize-combine-collections-wip'
    
    Refs #5614, #4943.  Closes #5686.


commit ee60fe9a20238dfac97dd988380c0149a84c372a
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Apr 8 16:37:58 2015 -0400

    5614: Improve Workbench combine collections performance.
    
    * Get links with API list calls, instead of fetching each one
      individually.
    
    * Get a list mapping portable data hashes to UUIDs, and add a single
      UUID per portable data hash to the fetch list.  This helps us avoid
      downloading multiple copies the same manifest text, and is probably
      the single-biggest win in this entire commit for most use cases.
    
    * Use the Ruby SDK to build the new collection.  This lets us avoid
      spawning new arv-normalize processes, and piping large manifests to
      them.  It also lets us build the entire collection and normalize
      only when we're done.
    
    * Use Oj instead of JSON.

diff --git a/apps/workbench/app/controllers/actions_controller.rb b/apps/workbench/app/controllers/actions_controller.rb
index 7737a3c..e6ef6eb 100644
--- a/apps/workbench/app/controllers/actions_controller.rb
+++ b/apps/workbench/app/controllers/actions_controller.rb
@@ -1,3 +1,5 @@
+require "arvados/collection"
+
 class ActionsController < ApplicationController
 
   skip_filter :require_thread_api_token, only: [:report_issue_popup, :report_issue]
@@ -100,141 +102,113 @@ class ActionsController < ApplicationController
     end
   end
 
-  def arv_normalize mt, *opts
-    r = ""
-    env = Hash[ENV].
-      merge({'ARVADOS_API_HOST' =>
-              arvados_api_client.arvados_v1_base.
-              sub(/\/arvados\/v1/, '').
-              sub(/^https?:\/\//, ''),
-              'ARVADOS_API_TOKEN' => 'x',
-              'ARVADOS_API_HOST_INSECURE' =>
-              Rails.configuration.arvados_insecure_https ? 'true' : 'false'
-            })
-    IO.popen([env, 'arv-normalize'] + opts, 'w+b') do |io|
-      io.write mt
-      io.close_write
-      while buf = io.read(2**16)
-        r += buf
-      end
+  expose_action :combine_selected_files_into_collection do
+    link_uuids, coll_ids = params["selection"].partition do |sel_s|
+      ArvadosBase::resource_class_for_uuid(sel_s) == Link
     end
-    r
-  end
 
-  expose_action :combine_selected_files_into_collection do
-    uuids = []
-    pdhs = []
-    files = []
-    params["selection"].each do |s|
-      a = ArvadosBase::resource_class_for_uuid s
-      if a == Link
-        begin
-          if (m = CollectionsHelper.match(Link.find(s).head_uuid))
-            pdhs.append(m[1] + m[2])
-            files.append(m)
-          end
-        rescue
+    unless link_uuids.empty?
+      Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
+        if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
+          coll_ids << link.head_uuid
         end
-      elsif (m = CollectionsHelper.match(s))
-        pdhs.append(m[1] + m[2])
-        files.append(m)
-      elsif (m = CollectionsHelper.match_uuid_with_optional_filepath(s))
-        uuids.append(m[1])
-        files.append(m)
       end
     end
 
-    pdhs = pdhs.uniq
-    uuids = uuids.uniq
-    chash = {}
-
-    Collection.select([:uuid, :manifest_text]).where(uuid: uuids).each do |c|
-      chash[c.uuid] = c
+    uuids = []
+    pdhs = []
+    source_paths = Hash.new { |hash, key| hash[key] = [] }
+    coll_ids.each do |coll_id|
+      if m = CollectionsHelper.match(coll_id)
+        key = m[1] + m[2]
+        pdhs << key
+        source_paths[key] << m[4]
+      elsif m = CollectionsHelper.match_uuid_with_optional_filepath(coll_id)
+        key = m[1]
+        uuids << key
+        source_paths[key] << m[4]
+      end
     end
 
-    Collection.select([:portable_data_hash, :manifest_text]).where(portable_data_hash: pdhs).each do |c|
-      chash[c.portable_data_hash] = c
+    unless pdhs.empty?
+      Collection.where(portable_data_hash: pdhs.uniq).
+          select([:uuid, :portable_data_hash]).each do |coll|
+        unless source_paths[coll.portable_data_hash].empty?
+          uuids << coll.uuid
+          source_paths[coll.uuid] = source_paths.delete(coll.portable_data_hash)
+        end
+      end
     end
 
-    combined = ""
-    files_in_dirs = {}
-    files.each do |m|
-      mt = chash[m[1]+m[2]].andand.manifest_text
-      if not m[4].nil? and m[4].size > 1
-        manifest_files = files_in_dirs['.']
-        if !manifest_files
-          manifest_files = []
-          files_in_dirs['.'] = manifest_files
-        end
-        manifest_file = m[4].split('/')[-1]
-        uniq_file = derive_unique_filename(manifest_file, manifest_files)
-        normalized = arv_normalize mt, '--extract', ".#{m[4]}"
-        normalized = normalized.gsub(/(\d+:\d+:)(#{Regexp.quote manifest_file})/) {|s| "#{$1}#{uniq_file}" }
-        combined += normalized
-        manifest_files << uniq_file
+    new_coll = Arv::Collection.new
+    Collection.where(uuid: uuids.uniq).
+        select([:uuid, :manifest_text]).each do |coll|
+      src_coll = Arv::Collection.new(coll.manifest_text)
+      src_pathlist = source_paths[coll.uuid]
+      if src_pathlist.any?(&:blank?)
+        src_pathlist = src_coll.each_file_path
+        destdir = nil
       else
-        mt = arv_normalize mt
-        manifest_streams = mt.split "\n"
-        adjusted_streams = []
-        manifest_streams.each do |stream|
-          manifest_parts = stream.split
-          adjusted_parts = []
-          manifest_files = files_in_dirs[manifest_parts[0]]
-          if !manifest_files
-            manifest_files = []
-            files_in_dirs[manifest_parts[0]] = manifest_files
-          end
-
-          manifest_parts.each do |part|
-            part_match = /(\d+:\d+:)(\S+)/.match(part)
-            if part_match
-              uniq_file = derive_unique_filename(part_match[2], manifest_files)
-              adjusted_parts << "#{part_match[1]}#{uniq_file}" 
-              manifest_files << uniq_file
-            else
-              adjusted_parts << part
-            end
-          end
-          adjusted_streams << adjusted_parts.join(' ')
+        destdir = "."
+      end
+      src_pathlist.each do |src_path|
+        src_path = src_path.sub(/^(\.\/|\/|)/, "./")
+        src_stream, _, basename = src_path.rpartition("/")
+        dst_stream = destdir || src_stream
+        # Generate a unique name by adding (1), (2), etc. to it.
+        # If the filename has a dot that's not at the beginning, insert the
+        # number just before that.  Otherwise, append the number to the name.
+        if match = basename.match(/[^\.]\./)
+          suffix_start = match.begin(0) + 1
+        else
+          suffix_start = basename.size
         end
-        adjusted_streams.each do |stream|
-          combined += (stream + "\n")
+        suffix_size = 0
+        dst_path = nil
+        loop.each_with_index do |_, try_count|
+          dst_path = "#{dst_stream}/#{basename}"
+          break unless new_coll.exist?(dst_path)
+          uniq_suffix = "(#{try_count + 1})"
+          basename[suffix_start, suffix_size] = uniq_suffix
+          suffix_size = uniq_suffix.size
         end
+        new_coll.cp_r(src_path, dst_path, src_coll)
       end
     end
 
-    normalized = arv_normalize combined
-    newc = Collection.new({:manifest_text => normalized})
-    newc.name = newc.name || "Collection created at #{Time.now.localtime}"
+    coll_attrs = {
+      manifest_text: new_coll.manifest_text,
+      name: "Collection created at #{Time.now.localtime}",
+    }
+    flash = {}
 
     # set owner_uuid to current project, provided it is writable
-    current_project_writable = false
-    action_data = JSON.parse(params['action_data']) if params['action_data']
-    if action_data && action_data['current_project_uuid']
-      current_project = Group.find(action_data['current_project_uuid']) rescue nil
-      if (current_project && current_project.writable_by.andand.include?(current_user.uuid))
-        newc.owner_uuid = action_data['current_project_uuid']
-        current_project_writable = true
-      end
+    action_data = Oj.load(params['action_data'] || "{}")
+    if action_data['current_project_uuid'] and
+        current_project = Group.find?(action_data['current_project_uuid']) and
+        current_project.writable_by.andand.include?(current_user.uuid)
+      coll_attrs[:owner_uuid] = current_project.uuid
+      flash[:message] =
+        "Created new collection in the project #{current_project.name}."
+    else
+      flash[:message] = "Created new collection in your Home project."
     end
 
-    newc.save!
-
-    chash.each do |k,v|
-      l = Link.new({
-                     tail_uuid: k,
-                     head_uuid: newc.uuid,
-                     link_class: "provenance",
-                     name: "provided"
-                   })
-      l.save!
+    newc = Collection.create!(coll_attrs)
+    source_paths.each_key do |src_uuid|
+      unless Link.create({
+                           tail_uuid: src_uuid,
+                           head_uuid: newc.uuid,
+                           link_class: "provenance",
+                           name: "provided",
+                         })
+        flash[:error] = "
+An error occurred when saving provenance information for this collection.
+You can try recreating the collection to get a copy with full provenance data."
+        break
+      end
     end
-
-    msg = current_project_writable ?
-              "Created new collection in the project #{current_project.name}." :
-              "Created new collection in your Home project."
-
-    redirect_to newc, flash: {'message' => msg}
+    redirect_to(newc, flash: flash)
   end
 
   def report_issue_popup

commit ef2572886ea8693f16316cbfe537053f106a2bb5
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Apr 13 11:59:51 2015 -0400

    5614: Workbench renders error flash separately from others.

diff --git a/apps/workbench/app/views/application/_title_and_buttons.html.erb b/apps/workbench/app/views/application/_title_and_buttons.html.erb
index 31ff2e6..398f248 100644
--- a/apps/workbench/app/views/application/_title_and_buttons.html.erb
+++ b/apps/workbench/app/views/application/_title_and_buttons.html.erb
@@ -52,15 +52,17 @@
   <% end %>
 <% end %>
 
-<%
-  # Display any flash messages in an alert. If there is any entry with "error" key, alert-danger is used.
-  flash_msg = ''
-  flash_msg_is_error = false
-  flash.each do |msg|
-    flash_msg_is_error ||= (msg[0]=='error')
-    flash_msg += ('<p class="contain-align-left">' + msg[1] + '</p>')
-  end
-  if flash_msg != ''
-%>
-<div class="flash-message alert <%= flash_msg_is_error ? 'alert-danger' : 'alert-warning' %>"><%=flash_msg.html_safe%></div>
+<% unless flash["error"].blank? %>
+<div class="flash-message alert alert-danger" role="alert">
+  <p class="contain-align-left"><%= flash["error"] %></p>
+</div>
+<% flash.delete("error") %>
+<% end %>
+
+<% unless flash.empty? %>
+<div class="flash-message alert alert-warning">
+  <% flash.each do |_, msg| %>
+  <p class="contain-align-left"><%= msg %></p>
+  <% end %>
+</div>
 <% end %>

commit 95298001afb4cb471d16bd181a8487ebe58bb0d4
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Apr 8 16:00:57 2015 -0400

    5614: Use assert_includes more in Workbench tests for better diagnostics.

diff --git a/apps/workbench/test/controllers/actions_controller_test.rb b/apps/workbench/test/controllers/actions_controller_test.rb
index 8745d96..26ef67b 100644
--- a/apps/workbench/test/controllers/actions_controller_test.rb
+++ b/apps/workbench/test/controllers/actions_controller_test.rb
@@ -27,19 +27,18 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
   end
 
   test "combine files  with repeated names into new collection" do
@@ -55,21 +54,19 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
-    assert manifest_text.include?('foo(2)'), 'Not found foo(2) in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo(1)")
+    assert_includes(manifest_text, "foo(2)")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
   end
 
   test "combine collections with repeated filenames in almost similar directories and expect files with proper suffixes" do
@@ -90,26 +87,26 @@ class ActionsControllerTest < ActionController::TestCase
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
 
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
 
     streams = manifest_text.split "\n"
     streams.each do |stream|
       if stream.start_with? './dir1'
         # dir1 stream
-        assert stream.include?(':alice(1)'), "Not found: alice(1) in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':bob.txt'), "Not found: bob.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':carol.txt'), "Not found: carol.txt in dir1 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice(1)')
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
+        assert_includes(stream, ':bob.txt')
+        assert_includes(stream, ':carol.txt')
       elsif stream.start_with? './dir2'
         # dir2 stream
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir2 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir2 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
       elsif stream.start_with? '. '
         # . stream
-        assert stream.include?(':foo'), "Not found: foo in . in manifest text #{manifest_text}"
-        assert stream.include?(':foo(1)'), "Not found: foo(1) in . in manifest text #{manifest_text}"
+        assert_includes(stream, ':foo')
+        assert_includes(stream, ':foo(1)')
       end
     end
   end
@@ -123,7 +120,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
@@ -134,12 +131,12 @@ class ActionsControllerTest < ActionController::TestCase
     assert_equal 2, streams.length
     streams.each do |stream|
       if stream.start_with? './dir1'
-        assert stream.include?('foo'), 'Not found: foo in dir1'
+        assert_includes(stream, 'foo')
       elsif stream.start_with? '. '
-        assert stream.include?('foo'), 'Not found: foo in .'
+        assert_includes(stream, 'foo')
       end
     end
-    assert !manifest_text.include?('foo(1)'), 'Found foo(1) in new collection manifest text'
+    refute_includes(manifest_text, 'foo(1)')
   end
 
   test "combine foo files from two different collection streams and expect proper filename suffixes" do
@@ -151,7 +148,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
@@ -160,7 +157,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     streams = manifest_text.split "\n"
     assert_equal 1, streams.length, "Incorrect number of streams in #{manifest_text}"
-    assert manifest_text.include?('foo'), "Not found foo in new collection manifest text #{manifest_text}"
-    assert manifest_text.include?('foo(1)'), "Not found foo(1) in new collection manifest text #{manifest_text}"
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
   end
 end

commit 0acda438a00257099b07141f21ad18cf92f03355
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Apr 8 15:42:53 2015 -0400

    5614: Ruby SDK cp_r method creates streams as needed.

diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb
index 9670f37..07b7519 100644
--- a/sdk/ruby/lib/arvados/collection.rb
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -14,8 +14,13 @@ module Arv
         loc_list = LocatorList.new(locators)
         file_specs.map { |s| manifest.split_file_token(s) }.
             each do |file_start, file_len, file_path|
-          @root.file_at(normalize_path(stream_root, file_path)).
-            add_segment(loc_list.segment(file_start, file_len))
+          begin
+            @root.file_at(normalize_path(stream_root, file_path)).
+              add_segment(loc_list.segment(file_start, file_len))
+          rescue Errno::ENOTDIR, Errno::EISDIR => error
+            raise ArgumentError.new("%p is both a stream and file" %
+                                    error.to_s.partition(" - ").last)
+          end
         end
       end
     end
@@ -101,13 +106,19 @@ module Arv
       # is found and can be copied.
       source_collection = self if source_collection.nil?
       src_stream, src_tail = source_collection.find(source)
-      dst_stream, dst_tail = find(target)
+      dst_stream_path, _, dst_tail = normalize_path(target).rpartition("/")
+      if dst_stream_path.empty?
+        dst_stream, dst_tail = @root.find(dst_tail)
+        dst_tail ||= src_tail
+      else
+        dst_stream = @root.stream_at(dst_stream_path)
+        dst_tail = src_tail if dst_tail.empty?
+      end
       if (source_collection.equal?(self) and
           (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
         return self
       end
       src_item = src_stream[src_tail]
-      dst_tail ||= src_tail
       check_method = "check_can_#{copy_method}".to_sym
       target_name = nil
       if opts.fetch(:descend_target, true)
@@ -307,7 +318,7 @@ module Arv
 
       def stream_at(find_path)
         key, rest = find_path.split("/", 2)
-        next_stream = get_or_new(key, CollectionStream)
+        next_stream = get_or_new(key, CollectionStream, Errno::ENOTDIR)
         if rest.nil?
           next_stream
         else
@@ -318,7 +329,7 @@ module Arv
       def file_at(find_path)
         stream_path, _, file_name = find_path.rpartition("/")
         if stream_path.empty?
-          get_or_new(file_name, CollectionFile)
+          get_or_new(file_name, CollectionFile, Errno::EISDIR)
         else
           stream_at(stream_path).file_at(file_name)
         end
@@ -401,17 +412,15 @@ module Arv
         items[key] = item
       end
 
-      def get_or_new(key, klass)
+      def get_or_new(key, klass, err_class)
         # Return the collection item at `key` and ensure that it's a `klass`.
         # If `key` does not exist, create a new `klass` there.
-        # If the value for `key` is not a `klass`, raise an ArgumentError.
+        # If the value for `key` is not a `klass`, raise an `err_class`.
         item = items[key]
         if item.nil?
           self[key] = klass.new("#{path}/#{key}")
         elsif not item.is_a?(klass)
-          raise ArgumentError.
-            new("in stream %p, %p is a %s, not a %s" %
-                [path, key, items[key].class.human_name, klass.human_name])
+          raise err_class.new(item.path)
         else
           item
         end
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb
index e96054b..e2a39bc 100644
--- a/sdk/ruby/test/test_collection.rb
+++ b/sdk/ruby/test/test_collection.rb
@@ -223,13 +223,17 @@ class CollectionTest < Minitest::Test
     assert_equal(expected.join(""), coll.manifest_text)
   end
 
-  def test_copy_stream_over_file_raises_ENOTDIR
+  def test_copy_stream_over_file_raises_ENOTDIR(source="./s1", target="./f2")
     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
     assert_raises(Errno::ENOTDIR) do
-      coll.cp_r("./s1", "./f2")
+      coll.cp_r(source, target)
     end
   end
 
+  def test_copy_file_under_file_raises_ENOTDIR
+    test_copy_stream_over_file_raises_ENOTDIR("./f1", "./f2/newfile")
+  end
+
   def test_copy_stream_over_nonempty_stream_merges_and_overwrites
     blocks = random_blocks(3, 9)
     manifest_a =
@@ -323,6 +327,20 @@ class CollectionTest < Minitest::Test
     assert_equal(expect_lines.join(""), coll.manifest_text)
   end
 
+  def test_copy_file_into_new_stream_with_implicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/")
+    assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./new "),
+                 coll.manifest_text)
+  end
+
+  def test_copy_file_into_new_stream_with_explicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/newfile.txt")
+    new_line = SIMPLEST_MANIFEST.sub(". ", "./new ").sub(":simple", ":newfile")
+    assert_equal(SIMPLEST_MANIFEST + new_line, coll.manifest_text)
+  end
+
   def test_copy_stream_contents_into_root
     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
     coll.cp_r("./s1/", ".")

commit 8893e8cf0dfd542508e9e45d715ee0165b249bd8
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Apr 7 15:50:31 2015 -0400

    5614: Add Collection#each_file to Ruby SDK.

diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb
index b3e7b7e..9670f37 100644
--- a/sdk/ruby/lib/arvados/collection.rb
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -43,6 +43,10 @@ module Arv
       copy(:merge, source.chomp("/"), target, source_collection, opts)
     end
 
+    def each_file_path(&block)
+      @root.each_file_path(&block)
+    end
+
     def exist?(path)
       begin
         substream, item = find(path)
@@ -281,6 +285,17 @@ module Arv
         end
       end
 
+      def each_file_path
+        return to_enum(__method__) unless block_given?
+        items.each_value do |item|
+          if item.file?
+            yield item.path
+          else
+            item.each_file_path { |path| yield path }
+          end
+        end
+      end
+
       def find(find_path)
         # Given a POSIX-style path, return the CollectionStream that
         # contains the object at that path, and the name of the object
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb
index a6aa1cf..e96054b 100644
--- a/sdk/ruby/test/test_collection.rb
+++ b/sdk/ruby/test/test_collection.rb
@@ -374,6 +374,40 @@ class CollectionTest < Minitest::Test
     test_copy_empty_source_path_raises_ArgumentError(".", "")
   end
 
+  ### .each_file_path
+
+  def test_each_file_path
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    if block_given?
+      result = yield(coll)
+    else
+      result = []
+      coll.each_file_path { |path| result << path }
+    end
+    assert_equal(["./f1", "./f2", "./s1/f1", "./s1/f3"], result.sort)
+  end
+
+  def test_each_file_path_without_block
+    test_each_file_path { |coll| coll.each_file_path.to_a }
+  end
+
+  def test_each_file_path_empty_collection
+    assert_empty(Arv::Collection.new.each_file_path.to_a)
+  end
+
+  def test_each_file_path_after_collection_emptied
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rm("simple.txt")
+    assert_empty(coll.each_file_path.to_a)
+  end
+
+  def test_each_file_path_deduplicates_manifest_listings
+    coll = Arv::Collection.new(MULTIBLOCK_FILE_MANIFEST)
+    assert_equal(["./repfile", "./s1/repfile", "./s1/uniqfile",
+                  "./uniqfile", "./uniqfile2"],
+                 coll.each_file_path.to_a.sort)
+  end
+
   ### .exist?
 
   def test_exist(test_method=:assert, path="f2")
@@ -394,7 +428,7 @@ class CollectionTest < Minitest::Test
   end
 
   def test_path_inside_stream_not_exist
-    test_exist(:refute, "s1/nonexistent")
+    test_exist(:refute, "s1/f2")
   end
 
   def test_path_under_file_not_exist

commit c17712a6fd8264e3b9fecfedc5124a9b6df00a14
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Apr 7 15:29:40 2015 -0400

    5614: Add Collection#exist? to Ruby SDK.

diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb
index ec0f443..b3e7b7e 100644
--- a/sdk/ruby/lib/arvados/collection.rb
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -43,6 +43,15 @@ module Arv
       copy(:merge, source.chomp("/"), target, source_collection, opts)
     end
 
+    def exist?(path)
+      begin
+        substream, item = find(path)
+        not (substream.leaf? or substream[item].nil?)
+      rescue Errno::ENOENT, Errno::ENOTDIR
+        false
+      end
+    end
+
     def rename(source, target)
       copy(:add_copy, source, target) { rm_r(source) }
     end
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb
index 3dd1ab3..a6aa1cf 100644
--- a/sdk/ruby/test/test_collection.rb
+++ b/sdk/ruby/test/test_collection.rb
@@ -374,6 +374,37 @@ class CollectionTest < Minitest::Test
     test_copy_empty_source_path_raises_ArgumentError(".", "")
   end
 
+  ### .exist?
+
+  def test_exist(test_method=:assert, path="f2")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    send(test_method, coll.exist?(path))
+  end
+
+  def test_file_not_exist
+    test_exist(:refute, "f3")
+  end
+
+  def test_stream_exist
+    test_exist(:assert, "s1")
+  end
+
+  def test_file_inside_stream_exist
+    test_exist(:assert, "s1/f1")
+  end
+
+  def test_path_inside_stream_not_exist
+    test_exist(:refute, "s1/nonexistent")
+  end
+
+  def test_path_under_file_not_exist
+    test_exist(:refute, "f2/nonexistent")
+  end
+
+  def test_deep_substreams_not_exist
+    test_exist(:refute, "a/b/c/d/e/f/g")
+  end
+
   ### .rename
 
   def test_simple_file_rename

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list