[ARVADOS] created: b1670d8bb221c397f443a508e083b6948a9dcc5b

git at public.curoverse.com git at public.curoverse.com
Wed Sep 3 17:58:03 EDT 2014


        at  b1670d8bb221c397f443a508e083b6948a9dcc5b (commit)


commit b1670d8bb221c397f443a508e083b6948a9dcc5b
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Sep 3 17:52:00 2014 -0400

    3720: Limit Workbench file rendering for large Collections.
    
    Rendering too many files can cause rendering to take too long, and
    there's not much point because it can really strain browsers too.
    Arbitrarily cap rendering at 10,000 files.

diff --git a/apps/workbench/app/views/collections/_show_files.html.erb b/apps/workbench/app/views/collections/_show_files.html.erb
index d888b2e..e592ee4 100644
--- a/apps/workbench/app/views/collections/_show_files.html.erb
+++ b/apps/workbench/app/views/collections/_show_files.html.erb
@@ -4,7 +4,7 @@
 <% else %>
   <ul id="collection_files" class="collection_files">
   <% dirstack = [file_tree.first.first] %>
-  <% file_tree.each_with_index do |(dirname, filename, size), index| %>
+  <% file_tree.take(10000).each_with_index do |(dirname, filename, size), index| %>
     <% file_path = CollectionsHelper::file_path([dirname, filename]) %>
     <% while dirstack.any? and (dirstack.last != dirname) %>
       <% dirstack.pop %></ul></li>
diff --git a/apps/workbench/app/views/collections/show_file_links.html.erb b/apps/workbench/app/views/collections/show_file_links.html.erb
index 042e4ff..a829d8f 100644
--- a/apps/workbench/app/views/collections/show_file_links.html.erb
+++ b/apps/workbench/app/views/collections/show_file_links.html.erb
@@ -44,10 +44,11 @@ the entire collection with wget, try:</p>
 
 <h2>File Listing</h2>
 
-<% if @object.andand.files_tree.andand.any? %>
+<% file_tree = @object.andand.files_tree %>
+<% if file_tree.andand.any? %>
   <ul id="collection_files" class="collection_files">
-  <% dirstack = [@object.files_tree.first.first] %>
-  <% @object.files_tree.each_with_index do |(dirname, filename, size), index| %>
+  <% dirstack = [file_tree.first.first] %>
+  <% file_tree.take(10000).each_with_index do |(dirname, filename, size), index| %>
     <% file_path = CollectionsHelper::file_path([dirname, filename]) %>
     <% while dirstack.any? and (dirstack.last != dirname) %>
       <% dirstack.pop %></ul></li>

commit 79ad5e9693cccd5505fa81ca5c0e113a73e25ab7
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Aug 29 17:37:13 2014 -0400

    3720: Refactor manifest parsing from API server to Ruby SDK.
    
    To date, the API server has been parsing manifests and returning
    parsed information in the files attribute.  That's convenient, but
    causes performance problems in large Collections.  Profiling indicates
    that most of the API server's response time is spent in the
    as_api_response method; rendering the same information twice really
    hurts.
    
    This commits removes the API server's need to always parse manifests,
    and moves the parsing code to the Ruby SDK.  Both the API server and
    Workbench use this Gem to parse manifests on an as-needed basis.
    This, combined with improvements to the new Keep::Manifest class, lays
    the groundwork for future performance gains.

diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile
index a289303..6e33113 100644
--- a/apps/workbench/Gemfile
+++ b/apps/workbench/Gemfile
@@ -3,6 +3,8 @@ source 'https://rubygems.org'
 gem 'rails', '~> 4.1.0'
 gem 'minitest', '>= 5.0.0'
 
+gem 'arvados', '>= 0.1.20140903145725'
+
 # Bundle edge Rails instead:
 # gem 'rails', :git => 'git://github.com/rails/rails.git'
 
diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock
index 3daa501..1d3531b 100644
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -36,8 +36,19 @@ GEM
       minitest (~> 5.1)
       thread_safe (~> 0.1)
       tzinfo (~> 1.1)
+    addressable (2.3.6)
     andand (1.3.3)
     arel (5.0.1.20140414130214)
+    arvados (0.1.20140903145725)
+      activesupport (>= 3.2.13)
+      andand
+      google-api-client (~> 0.6.3)
+      json (>= 1.7.7)
+      jwt (>= 0.1.5, < 1.0.0)
+    autoparse (0.3.3)
+      addressable (>= 2.3.1)
+      extlib (>= 0.9.15)
+      multi_json (>= 1.0.0)
     bootstrap-sass (3.1.0.1)
       sass (~> 3.2)
     bootstrap-x-editable-rails (1.5.1.1)
@@ -70,7 +81,20 @@ GEM
     deep_merge (1.0.1)
     erubis (2.7.0)
     execjs (2.0.2)
+    extlib (0.9.16)
+    faraday (0.8.9)
+      multipart-post (~> 1.2.0)
     ffi (1.9.3)
+    google-api-client (0.6.4)
+      addressable (>= 2.3.2)
+      autoparse (>= 0.3.3)
+      extlib (>= 0.9.15)
+      faraday (~> 0.8.4)
+      jwt (>= 0.1.5)
+      launchy (>= 2.1.1)
+      multi_json (>= 1.0.0)
+      signet (~> 0.4.5)
+      uuidtools (>= 2.1.0)
     headless (1.0.1)
     highline (1.6.20)
     hike (1.2.3)
@@ -80,6 +104,10 @@ GEM
       railties (>= 3.0, < 5.0)
       thor (>= 0.14, < 2.0)
     json (1.8.1)
+    jwt (0.1.13)
+      multi_json (>= 1.5)
+    launchy (2.4.2)
+      addressable (~> 2.3)
     less (2.4.0)
       commonjs (~> 0.2.7)
     less-rails (2.4.2)
@@ -93,6 +121,7 @@ GEM
     mini_portile (0.5.2)
     minitest (5.3.3)
     multi_json (1.10.0)
+    multipart-post (1.2.0)
     net-scp (1.1.2)
       net-ssh (>= 2.6.5)
     net-sftp (2.1.2)
@@ -151,6 +180,11 @@ GEM
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
       websocket (~> 1.0.4)
+    signet (0.4.5)
+      addressable (>= 2.2.3)
+      faraday (~> 0.8.1)
+      jwt (>= 0.1.5)
+      multi_json (>= 1.0.0)
     simplecov (0.7.1)
       multi_json (~> 1.0)
       simplecov-html (~> 0.7.1)
@@ -182,6 +216,7 @@ GEM
     uglifier (2.3.1)
       execjs (>= 0.3.0)
       json (>= 1.8.0)
+    uuidtools (2.1.5)
     websocket (1.0.7)
     websocket-driver (0.3.2)
     wiselinks (1.2.1)
@@ -194,6 +229,7 @@ PLATFORMS
 DEPENDENCIES
   RedCloth
   andand
+  arvados (>= 0.1.20140903145725)
   bootstrap-sass (~> 3.1.0)
   bootstrap-x-editable-rails
   capybara
diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb
index 010cd22..41b4771 100644
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -72,10 +72,14 @@ class CollectionsController < ApplicationController
   end
 
   def index
+    # API server index doesn't return manifest_text by default, but our
+    # callers want it unless otherwise specified.
+    @select ||= Collection.columns.map(&:name)
+    base_search = Collection.select(@select)
     if params[:search].andand.length.andand > 0
       tags = Link.where(any: ['contains', params[:search]])
-      @collections = (Collection.where(uuid: tags.collect(&:head_uuid)) |
-                      Collection.where(any: ['contains', params[:search]])).
+      @collections = (base_search.where(uuid: tags.collect(&:head_uuid)) |
+                      base_search.where(any: ['contains', params[:search]])).
         uniq { |c| c.uuid }
     else
       if params[:limit]
@@ -90,7 +94,7 @@ class CollectionsController < ApplicationController
         offset = 0
       end
 
-      @collections = Collection.limit(limit).offset(offset)
+      @collections = base_search.limit(limit).offset(offset)
     end
     @links = Link.limit(1000).
       where(head_uuid: @collections.collect(&:uuid))
@@ -275,10 +279,9 @@ class CollectionsController < ApplicationController
 
   def file_in_collection?(collection, filename)
     target = CollectionsHelper.file_path(File.split(filename))
-    collection.files.each do |file_spec|
-      return true if (CollectionsHelper.file_path(file_spec) == target)
+    collection.manifest.each_file.any? do |file_spec|
+      CollectionsHelper.file_path(file_spec) == target
     end
-    false
   end
 
   def file_enumerator(opts)
diff --git a/apps/workbench/app/models/collection.rb b/apps/workbench/app/models/collection.rb
index 78b0c36..24e6da5 100644
--- a/apps/workbench/app/models/collection.rb
+++ b/apps/workbench/app/models/collection.rb
@@ -1,3 +1,5 @@
+require "arvados/keep"
+
 class Collection < ArvadosBase
   MD5_EMPTY = 'd41d8cd98f00b204e9800998ecf8427e'
 
@@ -18,25 +20,33 @@ class Collection < ArvadosBase
     true
   end
 
+  def manifest
+    Keep::Manifest.new(manifest_text || "")
+  end
+
+  def files
+    # This method provides backwards compatibility for code that relied on
+    # the old files field in API results.  New code should use manifest
+    # methods directly.
+    if @files.nil? or manifest_text_changed?
+      @files = manifest.each_file.to_a
+    end
+    @files
+  end
+
   def content_summary
     ApplicationController.helpers.human_readable_bytes_html(total_bytes) + " " + super
   end
 
   def total_bytes
-    if files
-      tot = 0
-      files.each do |file|
-        tot += file[2]
-      end
-      tot
-    else
-      0
-    end
+    manifest.each_file.inject(0) { |sum, filespec| sum + filespec.last }
   end
 
   def files_tree
-    return [] if files.empty?
-    tree = files.group_by { |file_spec| File.split(file_spec.first) }
+    tree = manifest.each_file.group_by do |file_spec|
+      File.split(file_spec.first)
+    end
+    return [] if tree.empty?
     # Fill in entries for empty directories.
     tree.keys.map { |basedir, _| File.split(basedir) }.each do |splitdir|
       until tree.include?(splitdir)
diff --git a/sdk/ruby/arvados.gemspec b/sdk/ruby/arvados.gemspec
index 3670ccd..53ab7ce 100644
--- a/sdk/ruby/arvados.gemspec
+++ b/sdk/ruby/arvados.gemspec
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
   s.authors     = ["Arvados Authors"]
   s.email       = 'gem-dev at curoverse.com'
   s.licenses    = ['Apache License, Version 2.0']
-  s.files       = ["lib/arvados.rb"]
+  s.files       = ["lib/arvados.rb", "lib/arvados/keep.rb"]
   s.required_ruby_version = '>= 2.1.0'
   s.add_dependency('google-api-client', '~> 0.6.3')
   s.add_dependency('activesupport', '>= 3.2.13')
diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb
new file mode 100644
index 0000000..6b18ddf
--- /dev/null
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -0,0 +1,134 @@
+module Keep
+  class Locator
+    # A Locator is used to parse and manipulate Keep locator strings.
+    #
+    # Locators obey the following syntax:
+    #
+    #   locator      ::= address hint*
+    #   address      ::= digest size-hint
+    #   digest       ::= <32 hexadecimal digits>
+    #   size-hint    ::= "+" [0-9]+
+    #   hint         ::= "+" hint-type hint-content
+    #   hint-type    ::= [A-Z]
+    #   hint-content ::= [A-Za-z0-9 at _-]+
+    #
+    # Individual hints may have their own required format:
+    #
+    #   sign-hint      ::= "+A" <40 lowercase hex digits> "@" sign-timestamp
+    #   sign-timestamp ::= <8 lowercase hex digits>
+    attr_reader :hash, :hints, :size
+
+    def initialize(hasharg, sizearg, hintarg)
+      @hash = hasharg
+      @size = sizearg
+      @hints = hintarg
+    end
+
+    # Locator.parse returns a Locator object parsed from the string tok.
+    # Returns nil if tok could not be parsed as a valid locator.
+    def self.parse(tok)
+      begin
+        Locator.parse!(tok)
+      rescue ArgumentError => e
+        nil
+      end
+    end
+
+    # Locator.parse! returns a Locator object parsed from the string tok,
+    # raising an ArgumentError if tok cannot be parsed.
+    def self.parse!(tok)
+      if tok.nil? or tok.empty?
+        raise ArgumentError.new "locator is nil or empty"
+      end
+
+      m = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?(\+([[:upper:]][[:alnum:]+ at _-]*))?$/.match(tok.strip)
+      unless m
+        raise ArgumentError.new "not a valid locator #{tok}"
+      end
+
+      tokhash, _, toksize, _, trailer = m[1..5]
+      tokhints = []
+      if trailer
+        trailer.split('+').each do |hint|
+          if hint =~ /^[[:upper:]][[:alnum:]@_-]+$/
+            tokhints.push(hint)
+          else
+            raise ArgumentError.new "unknown hint #{hint}"
+          end
+        end
+      end
+
+      Locator.new(tokhash, toksize, tokhints)
+    end
+
+    # Returns the signature hint supplied with this locator,
+    # or nil if the locator was not signed.
+    def signature
+      @hints.grep(/^A/).first
+    end
+
+    # Returns an unsigned Locator.
+    def without_signature
+      Locator.new(@hash, @size, @hints.reject { |o| o.start_with?("A") })
+    end
+
+    def strip_hints
+      Locator.new(@hash, @size, [])
+    end
+
+    def strip_hints!
+      @hints = []
+      self
+    end
+
+    def to_s
+      if @size
+        [ @hash, @size, *@hints ].join('+')
+      else
+        [ @hash, *@hints ].join('+')
+      end
+    end
+  end
+
+  class Manifest
+    # Class to parse a manifest text and provide common views of that data.
+    def initialize(manifest_text)
+      @text = manifest_text
+    end
+
+    def each_stream
+      return to_enum(__method__) unless block_given?
+      @text.each_line do |line|
+        tokens = line.split
+        stream_name = unescape(tokens.shift)
+        blocks = []
+        while loc = Locator.parse(tokens.first)
+          blocks << loc
+          tokens.shift
+        end
+        yield [stream_name, blocks, tokens.map { |s| unescape(s) }]
+      end
+    end
+
+    def each_file
+      return to_enum(__method__) unless block_given?
+      each_stream do |streamname, blocklist, filelist|
+        filelist.each do |filespec|
+          start_pos, filesize, filename = filespec.split(':', 3)
+          yield [streamname, filename, filesize.to_i]
+        end
+      end
+    end
+
+    def unescape(s)
+      s.gsub(/\\(\\|[0-7]{3})/) do |_|
+        case $1
+        when '\\'
+          '\\'
+        else
+          $1.to_i(8).chr
+        end
+      end
+    end
+  end
+end
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb
new file mode 100644
index 0000000..99417b5
--- /dev/null
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -0,0 +1,87 @@
+require "minitest/autorun"
+require "arvados/keep"
+
+def random_block(size=nil)
+  sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
+end
+
+class ManifestTest < Minitest::Test
+  SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
+  MULTILEVEL_MANIFEST =
+    [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
+     "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
+
+  def test_simple_each_stream_array
+    manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
+    stream_name, block_s, file = SIMPLEST_MANIFEST.strip.split
+    stream_a = manifest.each_stream.to_a
+    assert_equal(1, stream_a.size, "wrong number of streams")
+    assert_equal(stream_name, stream_a[0][0])
+    assert_equal([block_s], stream_a[0][1].map(&:to_s))
+    assert_equal([file], stream_a[0][2])
+  end
+
+  def test_simple_each_stream_block
+    manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
+    result = []
+    manifest.each_stream do |stream, blocks, files|
+      result << files
+    end
+    assert_equal([[SIMPLEST_MANIFEST.split.last]], result,
+                 "wrong result from each_stream block")
+  end
+
+  def test_multilevel_each_stream
+    manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
+    seen = []
+    manifest.each_stream do |stream, blocks, files|
+      refute(seen.include?(stream),
+             "each_stream already yielded stream #{stream}")
+      seen << stream
+      assert_equal(3, files.size, "wrong file count for stream #{stream}")
+    end
+    assert_equal(4, seen.size, "wrong number of streams")
+  end
+
+  def test_empty_each_stream
+    assert_empty(Keep::Manifest.new("").each_stream.to_a)
+  end
+
+  def test_backslash_escape_parsing
+    m_text = "./dir\\040name #{random_block} 0:0:file\\\\name\\011\\here.txt\n"
+    manifest = Keep::Manifest.new(m_text)
+    streams = manifest.each_stream.to_a
+    assert_equal(1, streams.size, "wrong number of streams with whitespace")
+    assert_equal("./dir name", streams.first.first,
+                 "wrong stream name with whitespace")
+    assert_equal(["0:0:file\\name\t\\here.txt"], streams.first.last,
+                 "wrong filename(s) with whitespace")
+  end
+
+  def test_simple_each_file_array
+    manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
+    assert_equal([[".", "simple.txt", 9]], manifest.each_file.to_a)
+  end
+
+  def test_multilevel_each_file
+    manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
+    seen = Hash.new { |this, key| this[key] = [] }
+    manifest.each_file do |stream, basename, size|
+      refute(seen[stream].include?(basename),
+             "each_file repeated #{stream}/#{basename}")
+      seen[stream] << basename
+      assert_equal(3, size, "wrong size for #{stream}/#{basename}")
+    end
+    seen.each_pair do |stream, basenames|
+      assert_equal(%w(file1 file2 file3), basenames.sort,
+                   "wrong file list for #{stream}")
+    end
+  end
+
+  def test_each_file_handles_filenames_with_colons
+    manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:file:test.txt\n")
+    assert_equal([[".", "file:test.txt", 9]], manifest.each_file.to_a)
+  end
+end
diff --git a/services/api/Gemfile b/services/api/Gemfile
index 592d00d..3966705 100644
--- a/services/api/Gemfile
+++ b/services/api/Gemfile
@@ -71,6 +71,7 @@ gem 'database_cleaner'
 
 gem 'themes_for_rails'
 
+gem 'arvados', '>= 0.1.20140903145725'
 gem 'arvados-cli', '>= 0.1.20140827170424'
 
 # pg_power lets us use partial indexes in schema.rb in Rails 3
diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock
index 6167d7a..62fb6c1 100644
--- a/services/api/Gemfile.lock
+++ b/services/api/Gemfile.lock
@@ -35,7 +35,7 @@ GEM
     addressable (2.3.6)
     andand (1.3.3)
     arel (3.0.3)
-    arvados (0.1.20140827170424)
+    arvados (0.1.20140903145725)
       activesupport (>= 3.2.13)
       andand
       google-api-client (~> 0.6.3)
@@ -223,6 +223,7 @@ PLATFORMS
 DEPENDENCIES
   acts_as_api
   andand
+  arvados (>= 0.1.20140903145725)
   arvados-cli (>= 0.1.20140827170424)
   coffee-rails (~> 3.2.0)
   database_cleaner
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 3055132..45331a3 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,8 @@
+require "arvados/keep"
+
 class Arvados::V1::CollectionsController < ApplicationController
   def create
-    if resource_attrs[:uuid] and (loc = Locator.parse(resource_attrs[:uuid]))
+    if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
       resource_attrs[:portable_data_hash] = loc.to_s
       resource_attrs.delete :uuid
     end
@@ -8,15 +10,13 @@ class Arvados::V1::CollectionsController < ApplicationController
   end
 
   def find_object_by_uuid
-    if loc = Locator.parse(params[:id])
+    if loc = Keep::Locator.parse(params[:id])
       loc.strip_hints!
       if c = Collection.readable_by(*@read_users).where({ portable_data_hash: loc.to_s }).limit(1).first
         @object = {
           uuid: c.portable_data_hash,
           portable_data_hash: c.portable_data_hash,
           manifest_text: c.manifest_text,
-          files: c.files,
-          data_size: c.data_size
         }
       end
     else
@@ -51,7 +51,7 @@ class Arvados::V1::CollectionsController < ApplicationController
       end
     when String
       return if sp.empty?
-      if loc = Locator.parse(sp)
+      if loc = Keep::Locator.parse(sp)
         search_edges(visited, loc.to_s, :search_up)
       end
     end
@@ -62,7 +62,7 @@ class Arvados::V1::CollectionsController < ApplicationController
       return
     end
 
-    if loc = Locator.parse(uuid)
+    if loc = Keep::Locator.parse(uuid)
       loc.strip_hints!
       return if visited[loc.to_s]
     end
@@ -74,8 +74,6 @@ class Arvados::V1::CollectionsController < ApplicationController
       if c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).limit(1).first
         visited[loc.to_s] = {
           portable_data_hash: c.portable_data_hash,
-          files: c.files,
-          data_size: c.data_size
         }
       end
 
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index d25536e..18a25ec 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -1,3 +1,5 @@
+require 'arvados/keep'
+
 class Collection < ArvadosModel
   include HasUuid
   include KindAndEtag
@@ -9,8 +11,6 @@ class Collection < ArvadosModel
   validate :ensure_hash_matches_manifest_text
 
   api_accessible :user, extend: :common do |t|
-    t.add :data_size
-    t.add :files
     t.add :name
     t.add :description
     t.add :properties
@@ -18,12 +18,6 @@ class Collection < ArvadosModel
     t.add :manifest_text
   end
 
-  def self.attributes_required_columns
-    super.merge({ "data_size" => ["manifest_text"],
-                  "files" => ["manifest_text"],
-                })
-  end
-
   def check_signatures
     return false if self.manifest_text.nil?
 
@@ -48,7 +42,7 @@ class Collection < ArvadosModel
             # filenames. Safety first!
           elsif Blob.verify_signature tok, signing_opts
             # OK.
-          elsif Locator.parse(tok).andand.signature
+          elsif Keep::Locator.parse(tok).andand.signature
             # Signature provided, but verify_signature did not like it.
             logger.warn "Invalid signature on locator #{tok}"
             raise ArvadosModel::PermissionDeniedError
@@ -82,7 +76,7 @@ class Collection < ArvadosModel
       self.portable_data_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
     elsif portable_data_hash_changed?
       begin
-        loc = Locator.parse!(self.portable_data_hash)
+        loc = Keep::Locator.parse!(self.portable_data_hash)
         loc.strip_hints!
         if loc.size
           self.portable_data_hash = loc.to_s
@@ -125,81 +119,11 @@ class Collection < ArvadosModel
     end
   end
 
-  def data_size
-    inspect_manifest_text if @data_size.nil? or manifest_text_changed?
-    @data_size
-  end
-
-  def files
-    inspect_manifest_text if @files.nil? or manifest_text_changed?
-    @files
-  end
-
-  def inspect_manifest_text
-    if !manifest_text
-      @data_size = false
-      @files = []
-      return
-    end
-
-    @data_size = 0
-    tmp = {}
-
-    manifest_text.split("\n").each do |stream|
-      toks = stream.split(" ")
-
-      stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
-        case $1
-        when '\\' '\\'
-        else $1.to_i(8).chr
-        end
-      end
-
-      toks[1..-1].each do |tok|
-        if (re = tok.match /^[0-9a-f]{32}/)
-          blocksize = nil
-          tok.split('+')[1..-1].each do |hint|
-            if !blocksize and hint.match /^\d+$/
-              blocksize = hint.to_i
-            end
-            if (re = hint.match /^GS(\d+)$/)
-              blocksize = re[1].to_i
-            end
-          end
-          @data_size = false if !blocksize
-          @data_size += blocksize if @data_size
-        else
-          if (re = tok.match /^(\d+):(\d+):(\S+)$/)
-            filename = re[3].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
-              case $1
-              when '\\' '\\'
-              else $1.to_i(8).chr
-              end
-            end
-            fn = stream + '/' + filename
-            i = re[2].to_i
-            if tmp[fn]
-              tmp[fn] += i
-            else
-              tmp[fn] = i
-            end
-          end
-        end
-      end
-    end
-
-    @files = []
-    tmp.each do |k, v|
-      re = k.match(/^(.+)\/(.+)/)
-      @files << [re[1], re[2], v]
-    end
-  end
-
   def self.munge_manifest_locators(manifest)
     # Given a manifest text and a block, yield each locator,
     # and replace it with whatever the block returns.
     manifest.andand.gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) do |word|
-      if loc = Locator.parse(word.strip)
+      if loc = Keep::Locator.parse(word.strip)
         " " + yield(loc)
       else
         " " + word
@@ -234,12 +158,18 @@ class Collection < ArvadosModel
 
     # If the search term is a Collection locator that contains one file
     # that looks like a Docker image, return it.
-    if loc = Locator.parse(search_term)
+    if loc = Keep::Locator.parse(search_term)
       loc.strip_hints!
       coll_match = readable_by(*readers).where(portable_data_hash: loc.to_s).limit(1).first
-      if coll_match and (coll_match.files.size == 1) and
-          (coll_match.files[0][1] =~ /^[0-9A-Fa-f]{64}\.tar$/)
-        return [coll_match]
+      if coll_match
+        # Check if the Collection contains exactly one file whose name
+        # looks like a saved Docker image.  We only take up to two
+        # files, because any number >1 means there's no match.
+        coll_files = Keep::Manifest.new(coll_match.manifest_text).each_file.take(2)
+        if (coll_files.size == 1) and
+            (coll_files[0][1] =~ /^[0-9A-Fa-f]{64}\.tar$/)
+          return [coll_match]
+        end
       end
     end
 
diff --git a/services/api/app/models/locator.rb b/services/api/app/models/locator.rb
deleted file mode 100644
index 22dfc2c..0000000
--- a/services/api/app/models/locator.rb
+++ /dev/null
@@ -1,104 +0,0 @@
-# A Locator is used to parse and manipulate Keep locator strings.
-#
-# Locators obey the following syntax:
-#
-#   locator      ::= address hint*
-#   address      ::= digest size-hint
-#   digest       ::= <32 hexadecimal digits>
-#   size-hint    ::= "+" [0-9]+
-#   hint         ::= "+" hint-type hint-content
-#   hint-type    ::= [A-Z]
-#   hint-content ::= [A-Za-z0-9 at _-]+
-#
-# Individual hints may have their own required format:
-#
-#   sign-hint      ::= "+A" <40 lowercase hex digits> "@" sign-timestamp
-#   sign-timestamp ::= <8 lowercase hex digits>
-
-class Locator
-  def initialize(hasharg, sizearg, hintarg)
-    @hash = hasharg
-    @size = sizearg
-    @hints = hintarg
-  end
-
-  # Locator.parse returns a Locator object parsed from the string tok.
-  # Returns nil if tok could not be parsed as a valid locator.
-  def self.parse(tok)
-    begin
-      Locator.parse!(tok)
-    rescue ArgumentError => e
-      nil
-    end
-  end
-
-  # Locator.parse! returns a Locator object parsed from the string tok,
-  # raising an ArgumentError if tok cannot be parsed.
-  def self.parse!(tok)
-    if tok.nil? or tok.empty?
-      raise ArgumentError.new "locator is nil or empty"
-    end
-
-    m = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?(\+([[:upper:]][[:alnum:]+ at _-]*))?$/.match(tok.strip)
-    unless m
-      raise ArgumentError.new "not a valid locator #{tok}"
-    end
-    unless m[2]
-      Rails.logger.debug ArgumentError.new "missing size hint on #{tok}"
-    end
-
-    tokhash, _, toksize, _, trailer = m[1..5]
-    tokhints = []
-    if trailer
-      trailer.split('+').each do |hint|
-        if hint =~ /^[[:upper:]][[:alnum:]@_-]+$/
-          tokhints.push(hint)
-        else
-          raise ArgumentError.new "unknown hint #{hint}"
-        end
-      end
-    end
-
-    Locator.new(tokhash, toksize, tokhints)
-  end
-
-  # Returns the signature hint supplied with this locator,
-  # or nil if the locator was not signed.
-  def signature
-    @hints.grep(/^A/).first
-  end
-
-  # Returns an unsigned Locator.
-  def without_signature
-    Locator.new(@hash, @size, @hints.reject { |o| o.start_with?("A") })
-  end
-
-  def strip_hints
-    Locator.new(@hash, @size, [])
-  end
-
-  def strip_hints!
-    @hints = []
-    self
-  end
-
-  def hash
-    @hash
-  end
-
-  def size
-    @size
-  end
-
-  def hints
-    @hints
-  end
-
-  def to_s
-    if @size
-      [ @hash, @size, *@hints ].join('+')
-    else
-      [ @hash, *@hints ].join('+')
-    end
-  end
-end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 65dfca5..edc7041 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -26,31 +26,8 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
            "basic Collections index included manifest_text")
   end
 
-  test "can get non-database fields via index select" do
-    authorize_with :active
-    get(:index, filters: [["uuid", "=", collections(:foo_file).uuid]],
-        select: %w(uuid owner_uuid files))
-    assert_response :success
-    assert_equal(1, json_response["items"].andand.size,
-                 "wrong number of items returned for index")
-    assert_equal([[".", "foo", 3]], json_response["items"].first["files"],
-                 "wrong file list in index result")
-  end
-
-  test "can select only non-database fields for index" do
-    authorize_with :active
-    get(:index, select: %w(data_size files))
-    assert_response :success
-    assert(json_response["items"].andand.any?, "no items found in index")
-    json_response["items"].each do |coll|
-      assert_equal(coll["data_size"],
-                   coll["files"].inject(0) { |size, fspec| size + fspec.last },
-                   "mismatch between data size and file list")
-    end
-  end
-
   test "index with manifest_text selected returns signed locators" do
-    columns = %w(uuid owner_uuid data_size files manifest_text)
+    columns = %w(uuid owner_uuid manifest_text)
     authorize_with :active
     get :index, select: columns
     assert_response :success
@@ -139,29 +116,6 @@ EOS
     # Remove any permission hints in the response before comparing it to the source.
     stripped_manifest = resp['manifest_text'].gsub(/\+A[A-Za-z0-9 at _-]+/, '')
     assert_equal test_collection[:manifest_text], stripped_manifest
-    assert_equal 9, resp['data_size']
-    assert_equal [['.', 'foo.txt', 0],
-                  ['.', 'bar.txt', 6],
-                  ['./baz', 'bar.txt', 3]], resp['files']
-  end
-
-  test "list of files is correct for empty manifest" do
-    authorize_with :active
-    test_collection = {
-      manifest_text: "",
-      portable_data_hash: "d41d8cd98f00b204e9800998ecf8427e+0"
-    }
-    post :create, {
-      collection: test_collection
-    }
-    assert_response :success
-
-    get :show, {
-      id: "d41d8cd98f00b204e9800998ecf8427e+0"
-    }
-    assert_response :success
-    resp = JSON.parse(@response.body)
-    assert_equal [], resp['files']
   end
 
   test "create with owner_uuid set to owned group" do
@@ -385,7 +339,6 @@ EOS
       assert_not_nil assigns(:object)
       resp = JSON.parse(@response.body)
       assert_equal manifest_uuid, resp['portable_data_hash']
-      assert_equal 48, resp['data_size']
       # All of the locators in the output must be signed.
       resp['manifest_text'].lines.each do |entry|
         m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
@@ -433,7 +386,6 @@ EOS
     assert_not_nil assigns(:object)
     resp = JSON.parse(@response.body)
     assert_equal manifest_uuid, resp['portable_data_hash']
-    assert_equal 48, resp['data_size']
     # All of the locators in the output must be signed.
     resp['manifest_text'].lines.each do |entry|
       m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
@@ -522,7 +474,6 @@ EOS
     assert_not_nil assigns(:object)
     resp = JSON.parse(@response.body)
     assert_equal manifest_uuid, resp['portable_data_hash']
-    assert_equal 48, resp['data_size']
 
     # The manifest in the response will have had permission hints added.
     # Remove any permission hints in the response before comparing it to the source.
@@ -561,7 +512,6 @@ EOS
     assert_not_nil assigns(:object)
     resp = JSON.parse(@response.body)
     assert_equal manifest_uuid, resp['portable_data_hash']
-    assert_equal 48, resp['data_size']
     # All of the locators in the output must be signed.
     # Each line is of the form "path locator locator ... 0:0:file.txt"
     # entry.split[1..-2] will yield just the tokens in the middle of the line

commit b9533bd0187b757d4be01b76c1d11ebcad56696d
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Sep 3 17:11:51 2014 -0400

    3720: arvados-cli Gemfile refers to gemspec.
    
    This change helps avoid loops where you have to have arvados-cli
    installed to build arvados-cli.

diff --git a/sdk/cli/Gemfile b/sdk/cli/Gemfile
index e8fa5f1..638a00c 100644
--- a/sdk/cli/Gemfile
+++ b/sdk/cli/Gemfile
@@ -1,4 +1,4 @@
 source 'https://rubygems.org'
+gemspec
 gem 'minitest', '>= 5.0.0'
 gem 'rake'
-gem 'arvados-cli'
diff --git a/sdk/cli/Gemfile.lock b/sdk/cli/Gemfile.lock
index 14b2172..a435b05 100644
--- a/sdk/cli/Gemfile.lock
+++ b/sdk/cli/Gemfile.lock
@@ -1,24 +1,36 @@
-GEM
-  remote: https://rubygems.org/
+PATH
+  remote: .
   specs:
-    activesupport (3.2.16)
-      i18n (~> 0.6, >= 0.6.4)
-      multi_json (~> 1.0)
-    addressable (2.3.5)
-    andand (1.3.3)
-    arvados-cli (0.1.20140211100007)
+    arvados-cli (0.1.20140903145725)
       activesupport (~> 3.2, >= 3.2.13)
       andand (~> 1.3, >= 1.3.3)
+      arvados (~> 0.1.0)
       curb (~> 0.8)
       google-api-client (~> 0.6.3)
       json (~> 1.7, >= 1.7.7)
+      jwt (>= 0.1.5, < 1.0.0)
       oj (~> 2.0, >= 2.0.3)
       trollop (~> 2.0)
+
+GEM
+  remote: https://rubygems.org/
+  specs:
+    activesupport (3.2.17)
+      i18n (~> 0.6, >= 0.6.4)
+      multi_json (~> 1.0)
+    addressable (2.3.6)
+    andand (1.3.3)
+    arvados (0.1.20140903145725)
+      activesupport (>= 3.2.13)
+      andand
+      google-api-client (~> 0.6.3)
+      json (>= 1.7.7)
+      jwt (>= 0.1.5, < 1.0.0)
     autoparse (0.3.3)
       addressable (>= 2.3.1)
       extlib (>= 0.9.15)
       multi_json (>= 1.0.0)
-    curb (0.8.5)
+    curb (0.8.6)
     extlib (0.9.16)
     faraday (0.8.9)
       multipart-post (~> 1.2.0)
@@ -32,16 +44,16 @@ GEM
       multi_json (>= 1.0.0)
       signet (~> 0.4.5)
       uuidtools (>= 2.1.0)
-    i18n (0.6.9)
+    i18n (0.6.11)
     json (1.8.1)
-    jwt (0.1.11)
+    jwt (0.1.13)
       multi_json (>= 1.5)
     launchy (2.4.2)
       addressable (~> 2.3)
     minitest (5.2.3)
-    multi_json (1.8.4)
+    multi_json (1.10.1)
     multipart-post (1.2.0)
-    oj (2.5.4)
+    oj (2.10.2)
     rake (10.1.1)
     signet (0.4.5)
       addressable (>= 2.2.3)
@@ -49,12 +61,12 @@ GEM
       jwt (>= 0.1.5)
       multi_json (>= 1.0.0)
     trollop (2.0)
-    uuidtools (2.1.4)
+    uuidtools (2.1.5)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
-  arvados-cli
+  arvados-cli!
   minitest (>= 5.0.0)
   rake

commit 2569b6b26a36641d2434ed6fe947eb34d15eaf97
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Sep 3 14:57:25 2014 -0400

    3720: Clean up+skip Ruby SDK tests.
    
    First I changed these to get them passing again.  With that done, I
    added the skip.  We're not currently running these tests, and getting
    them going the right way again (e.g., with run_test_server.py or
    mocks) is out of scope for my current task.

diff --git a/sdk/ruby/test/test_big_request.rb b/sdk/ruby/test/test_big_request.rb
index d62141a..84700fd 100644
--- a/sdk/ruby/test/test_big_request.rb
+++ b/sdk/ruby/test/test_big_request.rb
@@ -3,31 +3,24 @@ require 'arvados'
 require 'digest/md5'
 
 class TestBigRequest < Minitest::Test
-  def setup
-    begin
-      Dir.mkdir './tmp'
-    rescue Errno::EEXIST
-    end
-    @@arv = Arvados.new
-  end
-
   def boring_manifest nblocks
     x = '.'
     (0..nblocks).each do |z|
       x += ' d41d8cd98f00b204e9800998ecf8427e+0'
     end
-    x += "0:0:foo.txt\n"
+    x += " 0:0:foo.txt\n"
     x
   end
 
   def test_create_manifest nblocks=1
+    skip "Test needs an API server to run against"
     manifest_text = boring_manifest nblocks
     uuid = Digest::MD5.hexdigest(manifest_text) + '+' + manifest_text.size.to_s
-    c = @@arv.collection.create(collection: {
-                                  uuid: uuid,
-                                  manifest_text: manifest_text
-                                })
-    assert_equal uuid, c[:uuid]
+    c = Arvados.new.collection.create(collection: {
+                                        uuid: uuid,
+                                        manifest_text: manifest_text,
+                                      })
+    assert_equal uuid, c[:portable_data_hash]
   end
 
   def test_create_big_manifest

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list