[ARVADOS] updated: dab62e9006a117b29314f1c2db9aa10b665cc4d7

git at public.curoverse.com git at public.curoverse.com
Mon May 25 13:47:33 EDT 2015


Summary of changes:
 apps/workbench/app/models/arvados_api_client.rb    |  8 ++-
 apps/workbench/config/application.default.yml      |  3 +
 .../test/controllers/big_collection_test.rb        | 49 ---------------
 apps/workbench/test/helpers/manifest_examples.rb   |  1 +
 .../collection_unit_test.rb                        | 66 ++++++++++++++++++++
 .../collections_controller_test.rb                 | 71 ++++++++++++++++++++++
 apps/workbench/test/unit/big_collection_test.rb    | 41 -------------
 services/api/app/models/blob.rb                    | 13 ++--
 services/api/app/models/collection.rb              |  2 -
 services/api/test/helpers/manifest_examples.rb     | 31 ++++++++++
 .../integration/collections_performance_test.rb    | 20 +++---
 .../api/test/unit/collection_performance_test.rb   | 34 +++++++++--
 12 files changed, 227 insertions(+), 112 deletions(-)
 delete mode 100644 apps/workbench/test/controllers/big_collection_test.rb
 create mode 120000 apps/workbench/test/helpers/manifest_examples.rb
 create mode 100644 apps/workbench/test/integration_performance/collection_unit_test.rb
 create mode 100644 apps/workbench/test/integration_performance/collections_controller_test.rb
 delete mode 100644 apps/workbench/test/unit/big_collection_test.rb
 create mode 100644 services/api/test/helpers/manifest_examples.rb

  discards  46b43471b3c39ac187f8f81a211b74842b360ba4 (commit)
       via  dab62e9006a117b29314f1c2db9aa10b665cc4d7 (commit)
       via  ccfa3ebf6aa31f59cf6289669e542174c77a2e2a (commit)
       via  cc988c81ad543eb4e8ad88416e4c2fcb937abd11 (commit)
       via  0ded94e10f668c961c257199d7b6d08af234b75a (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (46b43471b3c39ac187f8f81a211b74842b360ba4)
            \
             N -- N -- N (dab62e9006a117b29314f1c2db9aa10b665cc4d7)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit dab62e9006a117b29314f1c2db9aa10b665cc4d7
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 13:25:22 2015 -0400

    6087: Add big-manifest tests, with some finer-grained performance numbers on stderr.

diff --git a/apps/workbench/app/models/arvados_api_client.rb b/apps/workbench/app/models/arvados_api_client.rb
index 17c5ec6..ca09aa7 100644
--- a/apps/workbench/app/models/arvados_api_client.rb
+++ b/apps/workbench/app/models/arvados_api_client.rb
@@ -134,7 +134,7 @@ class ArvadosApiClient
 
     header = {"Accept" => "application/json"}
 
-    profile_checkpoint { "Prepare request #{url} #{query[:uuid]} #{query[:where]} #{query[:filters]} #{query[:order]}" }
+    profile_checkpoint { "Prepare request #{query["_method"] or "POST"} #{url} #{query[:uuid]} #{query.inspect[0,256]}" }
     msg = @client_mtx.synchronize do
       begin
         @api_client.post(url, query, header: header)
@@ -143,6 +143,12 @@ class ArvadosApiClient
       end
     end
     profile_checkpoint 'API transaction'
+    if @@profiling_enabled
+      if msg.headers['X-Runtime']
+        Rails.logger.info "API server: #{msg.headers['X-Runtime']} runtime reported"
+      end
+      Rails.logger.info "Content-Encoding #{msg.headers['Content-Encoding'].inspect}, Content-Length #{msg.headers['Content-Length'].inspect}, actual content size #{msg.content.size}"
+    end
 
     begin
       resp = Oj.load(msg.content, :symbol_keys => true)
diff --git a/apps/workbench/test/helpers/manifest_examples.rb b/apps/workbench/test/helpers/manifest_examples.rb
new file mode 120000
index 0000000..cb908ef
--- /dev/null
+++ b/apps/workbench/test/helpers/manifest_examples.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/manifest_examples.rb
\ No newline at end of file
diff --git a/apps/workbench/test/helpers/time_block.rb b/apps/workbench/test/helpers/time_block.rb
new file mode 120000
index 0000000..afb43e7
--- /dev/null
+++ b/apps/workbench/test/helpers/time_block.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/time_block.rb
\ No newline at end of file
diff --git a/apps/workbench/test/integration_performance/collection_unit_test.rb b/apps/workbench/test/integration_performance/collection_unit_test.rb
new file mode 100644
index 0000000..991757e
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collection_unit_test.rb
@@ -0,0 +1,66 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  teardown do
+    Thread.current[:arvados_api_client] = nil
+  end
+
+  # You can try with compress=false here too, but at last check it
+  # didn't make a significant difference.
+  [true].each do |compress|
+    test "crud cycle for collection with big manifest (compress=#{compress})" do
+      Rails.configuration.api_response_compression = compress
+      Thread.current[:arvados_api_client] = nil
+      crudtest
+    end
+  end
+
+  def crudtest
+    use_token :active
+    bigmanifest = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 30,
+                    bytes_per_block: 0)
+    end
+    c = time_block "new (manifest size = #{bigmanifest.length>>20}MiB)" do
+      Collection.new manifest_text: bigmanifest
+    end
+    time_block 'create' do
+      c.save!
+    end
+    time_block 'read' do
+      Collection.find c.uuid
+    end
+    time_block 'read(cached)' do
+      Collection.find c.uuid
+    end
+    time_block 'list' do
+      list = Collection.select(['uuid', 'manifest_text']).filter [['uuid','=',c.uuid]]
+      assert_equal 1, list.count
+      assert_equal c.uuid, list.first.uuid
+      assert_not_nil list.first.manifest_text
+    end
+    time_block 'update' do
+      c.manifest_text += ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:empty.txt\n"
+      c.save!
+    end
+    time_block 'delete' do
+      c.destroy
+    end
+    time_block 'read(404)' do
+      assert_empty Collection.filter([['uuid','=',c.uuid]])
+    end
+  end
+end
diff --git a/apps/workbench/test/integration_performance/collections_controller_test.rb b/apps/workbench/test/integration_performance/collections_controller_test.rb
new file mode 100644
index 0000000..0a4cd6b
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collections_controller_test.rb
@@ -0,0 +1,71 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionsControllerTest < ActionController::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  test "combine two big and two small collections" do
+    @controller = ActionsController.new
+    bigmanifest1 = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 30,
+                    bytes_per_block: 0)
+    end
+    bigmanifest2 = bigmanifest1.sub '.txt', '.txt2'
+    smallmanifest1 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small1.txt\n"
+    smallmanifest2 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small2.txt\n"
+    totalsize = bigmanifest1.length + bigmanifest2.length +
+      smallmanifest1.length + smallmanifest2.length
+    parts = time_block "create (total #{totalsize>>20}MiB)" do
+      use_token :active do
+        {
+          big1: Collection.create(manifest_text: bigmanifest1),
+          big2: Collection.create(manifest_text: bigmanifest2),
+          small1: Collection.create(manifest_text: smallmanifest1),
+          small2: Collection.create(manifest_text: smallmanifest2),
+        }
+      end
+    end
+    time_block 'combine' do
+      post :combine_selected_files_into_collection, {
+        selection: [parts[:big1].uuid,
+                    parts[:big2].uuid,
+                    parts[:small1].uuid + '/small1.txt',
+                    parts[:small2].uuid + '/small2.txt',
+                   ],
+        format: :html
+      }, session_for(:active)
+    end
+    assert_response :redirect
+  end
+
+  [:json, :html].each do |format|
+    test "show collection with big manifest (#{format})" do
+      bigmanifest = time_block 'build example' do
+        make_manifest(streams: 100,
+                      files_per_stream: 100,
+                      blocks_per_file: 30,
+                      bytes_per_block: 0)
+      end
+      @controller = CollectionsController.new
+      c = time_block "create (manifest size #{bigmanifest.length>>20}MiB)" do
+        use_token :active do
+          Collection.create(manifest_text: bigmanifest)
+        end
+      end
+      time_block 'show' do
+        get :show, {id: c.uuid, format: format}, session_for(:active)
+      end
+      assert_response :success
+    end
+  end
+end
diff --git a/services/api/test/helpers/manifest_examples.rb b/services/api/test/helpers/manifest_examples.rb
new file mode 100644
index 0000000..08712eb
--- /dev/null
+++ b/services/api/test/helpers/manifest_examples.rb
@@ -0,0 +1,31 @@
+module ManifestExamples
+  def make_manifest opts={}
+    opts = {
+      bytes_per_block: 1,
+      blocks_per_file: 1,
+      files_per_stream: 1,
+      streams: 1,
+    }.merge(opts)
+    datablip = "x" * opts[:bytes_per_block]
+    locator = Blob.sign_locator(Digest::MD5.hexdigest(datablip) +
+                                '+' + datablip.length.to_s,
+                                api_token: opts[:api_token])
+    filesize = datablip.length * opts[:blocks_per_file]
+    txt = ''
+    (1..opts[:streams]).each do |s|
+      streamtoken = "./stream#{s}"
+      streamsize = 0
+      blocktokens = []
+      filetokens = []
+      (1..opts[:files_per_stream]).each do |f|
+        filetokens << " #{streamsize}:#{filesize}:file#{f}.txt"
+        (1..opts[:blocks_per_file]).each do |b|
+          blocktokens << locator
+        end
+        streamsize += filesize
+      end
+      txt << ([streamtoken] + blocktokens + filetokens).join(' ') + "\n"
+    end
+    txt
+  end
+end
diff --git a/services/api/test/helpers/time_block.rb b/services/api/test/helpers/time_block.rb
new file mode 100644
index 0000000..a3b03ff
--- /dev/null
+++ b/services/api/test/helpers/time_block.rb
@@ -0,0 +1,11 @@
+class ActiveSupport::TestCase
+  def time_block label
+    t0 = Time.now
+    begin
+      yield
+    ensure
+      t1 = Time.now
+      $stderr.puts "#{t1 - t0}s #{label}"
+    end
+  end
+end
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb
new file mode 100644
index 0000000..7b790b7
--- /dev/null
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -0,0 +1,40 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
+  include ManifestExamples
+
+  test "crud cycle for a collection with a big manifest" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 10,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    json = time_block "JSON encode #{bigmanifest.length>>20}MiB manifest" do
+      Oj.dump({manifest_text: bigmanifest})
+    end
+    time_block 'create' do
+      post '/arvados/v1/collections', {collection: json}, auth(:active)
+      assert_response :success
+    end
+    uuid = json_response['uuid']
+    time_block 'read' do
+      get '/arvados/v1/collections/' + uuid, {}, auth(:active)
+      assert_response :success
+    end
+    time_block 'list' do
+      get '/arvados/v1/collections', {select: ['manifest_text'], filters: [['uuid', '=', uuid]].to_json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'update' do
+      put '/arvados/v1/collections/' + uuid, {collection: json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'delete' do
+      delete '/arvados/v1/collections/' + uuid, {}, auth(:active)
+    end
+  end
+end
diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb
new file mode 100644
index 0000000..d3e755d
--- /dev/null
+++ b/services/api/test/unit/collection_performance_test.rb
@@ -0,0 +1,61 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionModelPerformanceTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    # The Collection model needs to have a current token, not just a
+    # current user, to sign & verify manifests:
+    Thread.current[:api_client_authorization] =
+      api_client_authorizations(:active)
+  end
+
+  teardown do
+    Thread.current[:api_client_authorization] = nil
+  end
+
+  # "create read render update delete", not a typo
+  test "crrud cycle for a collection with a big manifest)" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 10,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    act_as_user users(:active) do
+      c = time_block "new (manifest_text is #{bigmanifest.length>>20}MiB)" do
+        Collection.new manifest_text: bigmanifest.dup
+      end
+      time_block 'check signatures' do
+        c.check_signatures
+      end
+      time_block 'check signatures + save' do
+        c.save!
+      end
+      c = time_block 'read' do
+        Collection.find_by_uuid(c.uuid)
+      end
+      time_block 'sign' do
+        c.signed_manifest_text
+      end
+      time_block 'sign + render' do
+        resp = c.as_api_response(nil)
+      end
+      loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
+                              api_token: api_token(:active))
+      # Note Collection's strip_manifest_text method has now removed
+      # the signatures from c.manifest_text, so we have to start from
+      # bigmanifest again here instead of just appending with "+=".
+      c.manifest_text = bigmanifest.dup + ". #{loc} 0:3:foo.txt\n"
+      time_block 'update' do
+        c.save!
+      end
+      time_block 'delete' do
+        c.destroy
+      end
+    end
+  end
+end

commit ccfa3ebf6aa31f59cf6289669e542174c77a2e2a
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:35:02 2015 -0400

    6087: Use HTTPClient's compression feature (instead of adding the
    Content-Encoding header ourselves). Rename config knob to describe
    purpose instead of implementation.

diff --git a/apps/workbench/app/models/arvados_api_client.rb b/apps/workbench/app/models/arvados_api_client.rb
index aa3269a..17c5ec6 100644
--- a/apps/workbench/app/models/arvados_api_client.rb
+++ b/apps/workbench/app/models/arvados_api_client.rb
@@ -91,6 +91,9 @@ class ArvadosApiClient
           # Use system CA certificates
           @api_client.ssl_config.add_trust_ca('/etc/ssl/certs')
         end
+        if Rails.configuration.api_response_compression
+          @api_client.transparent_gzip_decompression = true
+        end
       end
     end
 
@@ -130,9 +133,6 @@ class ArvadosApiClient
     end
 
     header = {"Accept" => "application/json"}
-    if Rails.configuration.include_accept_encoding_header_in_api_requests
-      header["Accept-Encoding"] = "gzip, deflate"
-    end
 
     profile_checkpoint { "Prepare request #{url} #{query[:uuid]} #{query[:where]} #{query[:filters]} #{query[:order]}" }
     msg = @client_mtx.synchronize do
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml
index 4061ee8..8226a63 100644
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -209,5 +209,5 @@ common:
   # in the directory where your API server is running.
   anonymous_user_token: false
 
-  # Enable response payload compression in Arvados API requests.
-  include_accept_encoding_header_in_api_requests: true
+  # Ask Arvados API server to compress its response payloads.
+  api_response_compression: true

commit cc988c81ad543eb4e8ad88416e4c2fcb937abd11
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:23:53 2015 -0400

    6087: Compute portable_data_hash only once during check_signatures.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 015efed..f7a715a 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -51,7 +51,8 @@ class Collection < ArvadosModel
     # subsequent passes without checking any signatures. This is
     # important because the signatures have probably been stripped off
     # by the time we get to a second validation pass!
-    return true if @signatures_checked and @signatures_checked == compute_pdh
+    computed_pdh = compute_pdh
+    return true if @signatures_checked and @signatures_checked == computed_pdh
 
     if self.manifest_text_changed?
       # Check permissions on the collection manifest.
@@ -87,7 +88,7 @@ class Collection < ArvadosModel
         end
       end
     end
-    @signatures_checked = compute_pdh
+    @signatures_checked = computed_pdh
   end
 
   def strip_manifest_text

commit 0ded94e10f668c961c257199d7b6d08af234b75a
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:19:31 2015 -0400

    6087: Use app-configured key by default for blob signing and verification.

diff --git a/services/api/app/models/blob.rb b/services/api/app/models/blob.rb
index 7ae13ef..56cdfb8 100644
--- a/services/api/app/models/blob.rb
+++ b/services/api/app/models/blob.rb
@@ -28,8 +28,8 @@ class Blob
   # Blob.sign_locator: return a signed and timestamped blob locator.
   #
   # The 'opts' argument should include:
-  #   [required] :key       - the Arvados server-side blobstore key
-  #   [required] :api_token - user's API token
+  #   [required] :api_token - API token (signatures only work for this token)
+  #   [optional] :key       - the Arvados server-side blobstore key
   #   [optional] :ttl       - number of seconds before signature should expire
   #   [optional] :expire    - unix timestamp when signature should expire
   #
@@ -44,14 +44,16 @@ class Blob
       end
       timestamp = opts[:expire]
     else
-      timestamp = db_current_time.to_i + (opts[:ttl] || 1209600)
+      timestamp = db_current_time.to_i +
+        (opts[:ttl] || Rails.configuration.blob_signature_ttl)
     end
     timestamp_hex = timestamp.to_s(16)
     # => "53163cb4"
 
     # Generate a signature.
     signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp_hex
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp_hex)
 
     blob_locator + '+A' + signature + '@' + timestamp_hex
   end
@@ -96,7 +98,8 @@ class Blob
     end
 
     my_signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp)
 
     if my_signature != given_signature
       raise Blob::InvalidSignatureError.new 'Signature is invalid.'
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 7f93e20..015efed 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -59,7 +59,6 @@ class Collection < ArvadosModel
       # which will return 403 Permission denied to the client.
       api_token = current_api_client_authorization.andand.api_token
       signing_opts = {
-        key: Rails.configuration.blob_signing_key,
         api_token: api_token,
         now: db_current_time.to_i,
       }
@@ -194,7 +193,6 @@ class Collection < ArvadosModel
 
   def self.sign_manifest manifest, token
     signing_opts = {
-      key: Rails.configuration.blob_signing_key,
       api_token: token,
       expire: db_current_time.to_i + Rails.configuration.blob_signature_ttl,
     }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list