[ARVADOS] updated: e8f91c87ef238ddf6bd96914a196d2d90825e8ee

git at public.curoverse.com git at public.curoverse.com
Fri May 1 16:49:40 EDT 2015


Summary of changes:
 .../api/app/controllers/application_controller.rb  | 37 +++++++++++++++++---
 .../arvados/v1/collections_controller.rb           |  4 +++
 services/api/app/models/arvados_model.rb           |  7 ++++
 services/api/config/application.default.yml        | 10 ++++++
 .../arvados/v1/collections_controller_test.rb      | 39 ++++++++++++++++++++++
 5 files changed, 93 insertions(+), 4 deletions(-)

       via  e8f91c87ef238ddf6bd96914a196d2d90825e8ee (commit)
       via  3f51fa4899fe43c29e1bf49d7911a40eb41a55e8 (commit)
      from  91c467665ad493108b69c660d5424c77cce5668f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e8f91c87ef238ddf6bd96914a196d2d90825e8ee
Merge: 91c4676 3f51fa4
Author: Brett Smith <brett at curoverse.com>
Date:   Fri May 1 16:48:42 2015 -0400

    Merge branch '5834-api-max-response-size-wip'
    
    Closes #5834, #5871.


commit 3f51fa4899fe43c29e1bf49d7911a40eb41a55e8
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Apr 30 18:23:21 2015 -0400

    5834: Limit how much manifest text data API server will load for index.
    
    This prevents situations where clients cause the API server to balloon
    in memory use by requesting an index including many large manifest
    texts.  Data Manager has been doing this unwittingly lately.

diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 69c03bd..6810d91 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -191,9 +191,18 @@ class ApplicationController < ActionController::Base
            }.merge opts)
   end
 
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # find_objects_for_index will only fetch objects until it reads
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
   def find_objects_for_index
     @objects ||= model_class.readable_by(*@read_users)
     apply_where_limit_order_params
+    limit_database_read if (action_name == "index")
   end
 
   def apply_filters model_class=nil
@@ -268,10 +277,7 @@ class ApplicationController < ActionController::Base
         # Map attribute names in @select to real column names, resolve
         # those to fully-qualified SQL column names, and pass the
         # resulting string to the select method.
-        api_column_map = model_class.attributes_required_columns
-        columns_list = @select.
-          flat_map { |attr| api_column_map[attr] }.
-          uniq.
+        columns_list = model_class.columns_for_attributes(@select).
           map { |s| "#{ar_table_name}.#{ActiveRecord::Base.connection.quote_column_name s}" }
         @objects = @objects.select(columns_list.join(", "))
       end
@@ -289,6 +295,29 @@ class ApplicationController < ActionController::Base
     @objects = @objects.uniq(@distinct) if not @distinct.nil?
   end
 
+  def limit_database_read
+    limit_columns = self.class.limit_index_columns_read
+    limit_columns &= model_class.columns_for_attributes(@select) if @select
+    return if limit_columns.empty?
+    model_class.transaction do
+      limit_query = @objects.
+        select("(%s) as read_length" %
+               limit_columns.map { |s| "length(#{s})" }.join(" + "))
+      new_limit = 0
+      read_total = 0
+      limit_query.find_each do |record|
+        new_limit += 1
+        read_total += record.read_length.to_i
+        break if ((read_total >= Rails.configuration.max_index_database_read) or
+                  (new_limit >= @limit))
+      end
+      @limit = new_limit
+      @objects = @objects.limit(@limit)
+      # Force @objects to run its query inside this transaction.
+      @objects.each { |_| break }
+    end
+  end
+
   def resource_attrs
     return @attrs if @attrs
     @attrs = params[resource_name]
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 956de8e..44733cd 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,10 @@
 require "arvados/keep"
 
 class Arvados::V1::CollectionsController < ApplicationController
+  def self.limit_index_columns_read
+    ["manifest_text"]
+  end
+
   def create
     if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
       resource_attrs[:portable_data_hash] = loc.to_s
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 02e9386..936f823 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -103,6 +103,13 @@ class ArvadosModel < ActiveRecord::Base
     api_column_map
   end
 
+  def self.columns_for_attributes(select_attributes)
+    # Given an array of attribute names to select, return an array of column
+    # names that must be fetched from the database to satisfy the request.
+    api_column_map = attributes_required_columns
+    select_attributes.flat_map { |attr| api_column_map[attr] }.uniq
+  end
+
   def self.default_orders
     ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
   end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index e7dbf29..006ac55 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -276,6 +276,16 @@ common:
   # actually enforce the desired maximum request size on the server side.
   max_request_size: 134217728
 
+  # Stop collecting records for an index request after we read this much
+  # data (in bytes) from large database columns.
+  # Currently only `GET /collections` respects this parameter, when the
+  # user requests an index that includes manifest_text.  Once the API
+  # server collects records with a total manifest_text size at or above
+  # this amount, it returns those results immediately.
+  # Note this is a threshold, not a limit.  Record collection stops
+  # *after* reading this much data.
+  max_index_database_read: 134217728
+
   # When you run the db:delete_old_job_logs task, it will find jobs that
   # have been finished for at least this many seconds, and delete their
   # stderr logs from the logs table.
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 54ffe66..3257e49 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -91,6 +91,45 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
     assert_equal 99999, resp['offset']
   end
 
+  def request_capped_index(params={})
+    authorize_with :user1_with_load
+    coll1 = collections(:collection_1_of_201)
+    Rails.configuration.max_index_database_read =
+      yield(coll1.manifest_text.size)
+    get :index, {
+      select: %w(uuid manifest_text),
+      filters: [["owner_uuid", "=", coll1.owner_uuid]],
+      limit: 300,
+    }.merge(params)
+  end
+
+  test "index with manifest_text limited by max_index_database_read" do
+    request_capped_index() { |size| (size * 3) + 1 }
+    assert_response :success
+    assert_equal(4, json_response["items"].size)
+    assert_equal(4, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with limit" do
+    request_capped_index(limit: 5) { |size| size * 20 }
+    assert_response :success
+    assert_equal(5, json_response["items"].size)
+    assert_equal(5, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with order" do
+    request_capped_index(order: "name DESC") { |size| (size * 15) + 1 }
+    assert_response :success
+    assert_equal(16, json_response["items"].size)
+    assert_empty(json_response["items"].reject do |coll|
+                   coll["name"] !~ /^Collection_9/
+                 end)
+    assert_equal(16, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
   test "admin can create collection with unsigned manifest" do
     authorize_with :admin
     test_collection = {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list