[ARVADOS] created: 622a60e878900b94d71555d72bced70660f065bf

git at public.curoverse.com git at public.curoverse.com
Tue Jun 2 22:26:54 EDT 2015


        at  622a60e878900b94d71555d72bced70660f065bf (commit)


commit 622a60e878900b94d71555d72bced70660f065bf
Merge: 96069fd ac4a24f
Author: radhika <radhika at curoverse.com>
Date:   Tue Jun 2 22:26:22 2015 -0400

    Merge branch 'master' into 6203-collection-perf-api


commit 96069fd4bf8c8ad2b8549000ad20d66b366de403
Merge: 11af4c6 469356d
Author: radhika <radhika at curoverse.com>
Date:   Tue Jun 2 09:44:08 2015 -0400

    Merge branch 'master' into 6087-collection-timing


commit 11af4c66d27b5f192965b7402a982eaa30431a0d
Merge: 8884a9f 178d3f3
Author: radhika <radhika at curoverse.com>
Date:   Mon Jun 1 10:05:56 2015 -0400

    Merge branch 'master' into 6087-collection-timing


commit 8884a9fff1ee4d5f6df3fc3ef43aed3a9eec9ea2
Merge: dab62e9 7a53d87
Author: radhika <radhika at curoverse.com>
Date:   Thu May 28 14:53:44 2015 -0400

    Merge branch 'master' into 6087-collection-timing


commit dab62e9006a117b29314f1c2db9aa10b665cc4d7
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 13:25:22 2015 -0400

    6087: Add big-manifest tests, with some finer-grained performance numbers on stderr.

diff --git a/apps/workbench/app/models/arvados_api_client.rb b/apps/workbench/app/models/arvados_api_client.rb
index 17c5ec6..ca09aa7 100644
--- a/apps/workbench/app/models/arvados_api_client.rb
+++ b/apps/workbench/app/models/arvados_api_client.rb
@@ -134,7 +134,7 @@ class ArvadosApiClient
 
     header = {"Accept" => "application/json"}
 
-    profile_checkpoint { "Prepare request #{url} #{query[:uuid]} #{query[:where]} #{query[:filters]} #{query[:order]}" }
+    profile_checkpoint { "Prepare request #{query["_method"] or "POST"} #{url} #{query[:uuid]} #{query.inspect[0,256]}" }
     msg = @client_mtx.synchronize do
       begin
         @api_client.post(url, query, header: header)
@@ -143,6 +143,12 @@ class ArvadosApiClient
       end
     end
     profile_checkpoint 'API transaction'
+    if @@profiling_enabled
+      if msg.headers['X-Runtime']
+        Rails.logger.info "API server: #{msg.headers['X-Runtime']} runtime reported"
+      end
+      Rails.logger.info "Content-Encoding #{msg.headers['Content-Encoding'].inspect}, Content-Length #{msg.headers['Content-Length'].inspect}, actual content size #{msg.content.size}"
+    end
 
     begin
       resp = Oj.load(msg.content, :symbol_keys => true)
diff --git a/apps/workbench/test/helpers/manifest_examples.rb b/apps/workbench/test/helpers/manifest_examples.rb
new file mode 120000
index 0000000..cb908ef
--- /dev/null
+++ b/apps/workbench/test/helpers/manifest_examples.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/manifest_examples.rb
\ No newline at end of file
diff --git a/apps/workbench/test/helpers/time_block.rb b/apps/workbench/test/helpers/time_block.rb
new file mode 120000
index 0000000..afb43e7
--- /dev/null
+++ b/apps/workbench/test/helpers/time_block.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/time_block.rb
\ No newline at end of file
diff --git a/apps/workbench/test/integration_performance/collection_unit_test.rb b/apps/workbench/test/integration_performance/collection_unit_test.rb
new file mode 100644
index 0000000..991757e
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collection_unit_test.rb
@@ -0,0 +1,66 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  teardown do
+    Thread.current[:arvados_api_client] = nil
+  end
+
+  # You can try with compress=false here too, but at last check it
+  # didn't make a significant difference.
+  [true].each do |compress|
+    test "crud cycle for collection with big manifest (compress=#{compress})" do
+      Rails.configuration.api_response_compression = compress
+      Thread.current[:arvados_api_client] = nil
+      crudtest
+    end
+  end
+
+  def crudtest
+    use_token :active
+    bigmanifest = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 30,
+                    bytes_per_block: 0)
+    end
+    c = time_block "new (manifest size = #{bigmanifest.length>>20}MiB)" do
+      Collection.new manifest_text: bigmanifest
+    end
+    time_block 'create' do
+      c.save!
+    end
+    time_block 'read' do
+      Collection.find c.uuid
+    end
+    time_block 'read(cached)' do
+      Collection.find c.uuid
+    end
+    time_block 'list' do
+      list = Collection.select(['uuid', 'manifest_text']).filter [['uuid','=',c.uuid]]
+      assert_equal 1, list.count
+      assert_equal c.uuid, list.first.uuid
+      assert_not_nil list.first.manifest_text
+    end
+    time_block 'update' do
+      c.manifest_text += ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:empty.txt\n"
+      c.save!
+    end
+    time_block 'delete' do
+      c.destroy
+    end
+    time_block 'read(404)' do
+      assert_empty Collection.filter([['uuid','=',c.uuid]])
+    end
+  end
+end
diff --git a/apps/workbench/test/integration_performance/collections_controller_test.rb b/apps/workbench/test/integration_performance/collections_controller_test.rb
new file mode 100644
index 0000000..0a4cd6b
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collections_controller_test.rb
@@ -0,0 +1,71 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionsControllerTest < ActionController::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  test "combine two big and two small collections" do
+    @controller = ActionsController.new
+    bigmanifest1 = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 30,
+                    bytes_per_block: 0)
+    end
+    bigmanifest2 = bigmanifest1.sub '.txt', '.txt2'
+    smallmanifest1 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small1.txt\n"
+    smallmanifest2 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small2.txt\n"
+    totalsize = bigmanifest1.length + bigmanifest2.length +
+      smallmanifest1.length + smallmanifest2.length
+    parts = time_block "create (total #{totalsize>>20}MiB)" do
+      use_token :active do
+        {
+          big1: Collection.create(manifest_text: bigmanifest1),
+          big2: Collection.create(manifest_text: bigmanifest2),
+          small1: Collection.create(manifest_text: smallmanifest1),
+          small2: Collection.create(manifest_text: smallmanifest2),
+        }
+      end
+    end
+    time_block 'combine' do
+      post :combine_selected_files_into_collection, {
+        selection: [parts[:big1].uuid,
+                    parts[:big2].uuid,
+                    parts[:small1].uuid + '/small1.txt',
+                    parts[:small2].uuid + '/small2.txt',
+                   ],
+        format: :html
+      }, session_for(:active)
+    end
+    assert_response :redirect
+  end
+
+  [:json, :html].each do |format|
+    test "show collection with big manifest (#{format})" do
+      bigmanifest = time_block 'build example' do
+        make_manifest(streams: 100,
+                      files_per_stream: 100,
+                      blocks_per_file: 30,
+                      bytes_per_block: 0)
+      end
+      @controller = CollectionsController.new
+      c = time_block "create (manifest size #{bigmanifest.length>>20}MiB)" do
+        use_token :active do
+          Collection.create(manifest_text: bigmanifest)
+        end
+      end
+      time_block 'show' do
+        get :show, {id: c.uuid, format: format}, session_for(:active)
+      end
+      assert_response :success
+    end
+  end
+end
diff --git a/services/api/test/helpers/manifest_examples.rb b/services/api/test/helpers/manifest_examples.rb
new file mode 100644
index 0000000..08712eb
--- /dev/null
+++ b/services/api/test/helpers/manifest_examples.rb
@@ -0,0 +1,31 @@
+module ManifestExamples
+  def make_manifest opts={}
+    opts = {
+      bytes_per_block: 1,
+      blocks_per_file: 1,
+      files_per_stream: 1,
+      streams: 1,
+    }.merge(opts)
+    datablip = "x" * opts[:bytes_per_block]
+    locator = Blob.sign_locator(Digest::MD5.hexdigest(datablip) +
+                                '+' + datablip.length.to_s,
+                                api_token: opts[:api_token])
+    filesize = datablip.length * opts[:blocks_per_file]
+    txt = ''
+    (1..opts[:streams]).each do |s|
+      streamtoken = "./stream#{s}"
+      streamsize = 0
+      blocktokens = []
+      filetokens = []
+      (1..opts[:files_per_stream]).each do |f|
+        filetokens << " #{streamsize}:#{filesize}:file#{f}.txt"
+        (1..opts[:blocks_per_file]).each do |b|
+          blocktokens << locator
+        end
+        streamsize += filesize
+      end
+      txt << ([streamtoken] + blocktokens + filetokens).join(' ') + "\n"
+    end
+    txt
+  end
+end
diff --git a/services/api/test/helpers/time_block.rb b/services/api/test/helpers/time_block.rb
new file mode 100644
index 0000000..a3b03ff
--- /dev/null
+++ b/services/api/test/helpers/time_block.rb
@@ -0,0 +1,11 @@
+class ActiveSupport::TestCase
+  def time_block label
+    t0 = Time.now
+    begin
+      yield
+    ensure
+      t1 = Time.now
+      $stderr.puts "#{t1 - t0}s #{label}"
+    end
+  end
+end
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb
new file mode 100644
index 0000000..7b790b7
--- /dev/null
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -0,0 +1,40 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
+  include ManifestExamples
+
+  test "crud cycle for a collection with a big manifest" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 10,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    json = time_block "JSON encode #{bigmanifest.length>>20}MiB manifest" do
+      Oj.dump({manifest_text: bigmanifest})
+    end
+    time_block 'create' do
+      post '/arvados/v1/collections', {collection: json}, auth(:active)
+      assert_response :success
+    end
+    uuid = json_response['uuid']
+    time_block 'read' do
+      get '/arvados/v1/collections/' + uuid, {}, auth(:active)
+      assert_response :success
+    end
+    time_block 'list' do
+      get '/arvados/v1/collections', {select: ['manifest_text'], filters: [['uuid', '=', uuid]].to_json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'update' do
+      put '/arvados/v1/collections/' + uuid, {collection: json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'delete' do
+      delete '/arvados/v1/collections/' + uuid, {}, auth(:active)
+    end
+  end
+end
diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb
new file mode 100644
index 0000000..d3e755d
--- /dev/null
+++ b/services/api/test/unit/collection_performance_test.rb
@@ -0,0 +1,61 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionModelPerformanceTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    # The Collection model needs to have a current token, not just a
+    # current user, to sign & verify manifests:
+    Thread.current[:api_client_authorization] =
+      api_client_authorizations(:active)
+  end
+
+  teardown do
+    Thread.current[:api_client_authorization] = nil
+  end
+
+  # "create read render update delete", not a typo
+  test "crrud cycle for a collection with a big manifest)" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 10,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    act_as_user users(:active) do
+      c = time_block "new (manifest_text is #{bigmanifest.length>>20}MiB)" do
+        Collection.new manifest_text: bigmanifest.dup
+      end
+      time_block 'check signatures' do
+        c.check_signatures
+      end
+      time_block 'check signatures + save' do
+        c.save!
+      end
+      c = time_block 'read' do
+        Collection.find_by_uuid(c.uuid)
+      end
+      time_block 'sign' do
+        c.signed_manifest_text
+      end
+      time_block 'sign + render' do
+        resp = c.as_api_response(nil)
+      end
+      loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
+                              api_token: api_token(:active))
+      # Note Collection's strip_manifest_text method has now removed
+      # the signatures from c.manifest_text, so we have to start from
+      # bigmanifest again here instead of just appending with "+=".
+      c.manifest_text = bigmanifest.dup + ". #{loc} 0:3:foo.txt\n"
+      time_block 'update' do
+        c.save!
+      end
+      time_block 'delete' do
+        c.destroy
+      end
+    end
+  end
+end

commit ccfa3ebf6aa31f59cf6289669e542174c77a2e2a
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:35:02 2015 -0400

    6087: Use HTTPClient's compression feature (instead of adding the
    Content-Encoding header ourselves). Rename config knob to describe
    purpose instead of implementation.

diff --git a/apps/workbench/app/models/arvados_api_client.rb b/apps/workbench/app/models/arvados_api_client.rb
index aa3269a..17c5ec6 100644
--- a/apps/workbench/app/models/arvados_api_client.rb
+++ b/apps/workbench/app/models/arvados_api_client.rb
@@ -91,6 +91,9 @@ class ArvadosApiClient
           # Use system CA certificates
           @api_client.ssl_config.add_trust_ca('/etc/ssl/certs')
         end
+        if Rails.configuration.api_response_compression
+          @api_client.transparent_gzip_decompression = true
+        end
       end
     end
 
@@ -130,9 +133,6 @@ class ArvadosApiClient
     end
 
     header = {"Accept" => "application/json"}
-    if Rails.configuration.include_accept_encoding_header_in_api_requests
-      header["Accept-Encoding"] = "gzip, deflate"
-    end
 
     profile_checkpoint { "Prepare request #{url} #{query[:uuid]} #{query[:where]} #{query[:filters]} #{query[:order]}" }
     msg = @client_mtx.synchronize do
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml
index 4061ee8..8226a63 100644
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -209,5 +209,5 @@ common:
   # in the directory where your API server is running.
   anonymous_user_token: false
 
-  # Enable response payload compression in Arvados API requests.
-  include_accept_encoding_header_in_api_requests: true
+  # Ask Arvados API server to compress its response payloads.
+  api_response_compression: true

commit cc988c81ad543eb4e8ad88416e4c2fcb937abd11
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:23:53 2015 -0400

    6087: Compute portable_data_hash only once during check_signatures.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 015efed..f7a715a 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -51,7 +51,8 @@ class Collection < ArvadosModel
     # subsequent passes without checking any signatures. This is
     # important because the signatures have probably been stripped off
     # by the time we get to a second validation pass!
-    return true if @signatures_checked and @signatures_checked == compute_pdh
+    computed_pdh = compute_pdh
+    return true if @signatures_checked and @signatures_checked == computed_pdh
 
     if self.manifest_text_changed?
       # Check permissions on the collection manifest.
@@ -87,7 +88,7 @@ class Collection < ArvadosModel
         end
       end
     end
-    @signatures_checked = compute_pdh
+    @signatures_checked = computed_pdh
   end
 
   def strip_manifest_text

commit 0ded94e10f668c961c257199d7b6d08af234b75a
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon May 25 10:19:31 2015 -0400

    6087: Use app-configured key by default for blob signing and verification.

diff --git a/services/api/app/models/blob.rb b/services/api/app/models/blob.rb
index 7ae13ef..56cdfb8 100644
--- a/services/api/app/models/blob.rb
+++ b/services/api/app/models/blob.rb
@@ -28,8 +28,8 @@ class Blob
   # Blob.sign_locator: return a signed and timestamped blob locator.
   #
   # The 'opts' argument should include:
-  #   [required] :key       - the Arvados server-side blobstore key
-  #   [required] :api_token - user's API token
+  #   [required] :api_token - API token (signatures only work for this token)
+  #   [optional] :key       - the Arvados server-side blobstore key
   #   [optional] :ttl       - number of seconds before signature should expire
   #   [optional] :expire    - unix timestamp when signature should expire
   #
@@ -44,14 +44,16 @@ class Blob
       end
       timestamp = opts[:expire]
     else
-      timestamp = db_current_time.to_i + (opts[:ttl] || 1209600)
+      timestamp = db_current_time.to_i +
+        (opts[:ttl] || Rails.configuration.blob_signature_ttl)
     end
     timestamp_hex = timestamp.to_s(16)
     # => "53163cb4"
 
     # Generate a signature.
     signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp_hex
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp_hex)
 
     blob_locator + '+A' + signature + '@' + timestamp_hex
   end
@@ -96,7 +98,8 @@ class Blob
     end
 
     my_signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp)
 
     if my_signature != given_signature
       raise Blob::InvalidSignatureError.new 'Signature is invalid.'
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 7f93e20..015efed 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -59,7 +59,6 @@ class Collection < ArvadosModel
       # which will return 403 Permission denied to the client.
       api_token = current_api_client_authorization.andand.api_token
       signing_opts = {
-        key: Rails.configuration.blob_signing_key,
         api_token: api_token,
         now: db_current_time.to_i,
       }
@@ -194,7 +193,6 @@ class Collection < ArvadosModel
 
   def self.sign_manifest manifest, token
     signing_opts = {
-      key: Rails.configuration.blob_signing_key,
       api_token: token,
       expire: db_current_time.to_i + Rails.configuration.blob_signature_ttl,
     }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list