[ARVADOS] created: f87f91cb064dddfca21dca5150f19f1df555577c

git at public.curoverse.com git at public.curoverse.com
Thu Apr 30 18:23:35 EDT 2015


        at  f87f91cb064dddfca21dca5150f19f1df555577c (commit)


commit f87f91cb064dddfca21dca5150f19f1df555577c
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Apr 30 18:23:21 2015 -0400

    5834: Limit how much manifest text data API server will load for index.
    
    This prevents situations where clients cause the API server to balloon
    in memory use by requesting an index including many large manifest
    texts.  Data Manager has been doing this unwittingly lately.

diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 69c03bd..6810d91 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -191,9 +191,18 @@ class ApplicationController < ActionController::Base
            }.merge opts)
   end
 
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # find_objects_for_index will only fetch objects until it reads
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
   def find_objects_for_index
     @objects ||= model_class.readable_by(*@read_users)
     apply_where_limit_order_params
+    limit_database_read if (action_name == "index")
   end
 
   def apply_filters model_class=nil
@@ -268,10 +277,7 @@ class ApplicationController < ActionController::Base
         # Map attribute names in @select to real column names, resolve
         # those to fully-qualified SQL column names, and pass the
         # resulting string to the select method.
-        api_column_map = model_class.attributes_required_columns
-        columns_list = @select.
-          flat_map { |attr| api_column_map[attr] }.
-          uniq.
+        columns_list = model_class.columns_for_attributes(@select).
           map { |s| "#{ar_table_name}.#{ActiveRecord::Base.connection.quote_column_name s}" }
         @objects = @objects.select(columns_list.join(", "))
       end
@@ -289,6 +295,29 @@ class ApplicationController < ActionController::Base
     @objects = @objects.uniq(@distinct) if not @distinct.nil?
   end
 
+  def limit_database_read
+    limit_columns = self.class.limit_index_columns_read
+    limit_columns &= model_class.columns_for_attributes(@select) if @select
+    return if limit_columns.empty?
+    model_class.transaction do
+      limit_query = @objects.
+        select("(%s) as read_length" %
+               limit_columns.map { |s| "length(#{s})" }.join(" + "))
+      new_limit = 0
+      read_total = 0
+      limit_query.find_each do |record|
+        new_limit += 1
+        read_total += record.read_length.to_i
+        break if ((read_total >= Rails.configuration.max_index_database_read) or
+                  (new_limit >= @limit))
+      end
+      @limit = new_limit
+      @objects = @objects.limit(@limit)
+      # Force @objects to run its query inside this transaction.
+      @objects.each { |_| break }
+    end
+  end
+
   def resource_attrs
     return @attrs if @attrs
     @attrs = params[resource_name]
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 956de8e..44733cd 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,10 @@
 require "arvados/keep"
 
 class Arvados::V1::CollectionsController < ApplicationController
+  def self.limit_index_columns_read
+    ["manifest_text"]
+  end
+
   def create
     if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
       resource_attrs[:portable_data_hash] = loc.to_s
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 02e9386..936f823 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -103,6 +103,13 @@ class ArvadosModel < ActiveRecord::Base
     api_column_map
   end
 
+  def self.columns_for_attributes(select_attributes)
+    # Given an array of attribute names to select, return an array of column
+    # names that must be fetched from the database to satisfy the request.
+    api_column_map = attributes_required_columns
+    select_attributes.flat_map { |attr| api_column_map[attr] }.uniq
+  end
+
   def self.default_orders
     ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
   end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index e7dbf29..0a6fb17 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -276,6 +276,14 @@ common:
   # actually enforce the desired maximum request size on the server side.
   max_request_size: 134217728
 
+  # Limit how much data we read from large database columns (in bytes)
+  # to answer an index request.
+  # Currently only `GET /collections` respects this parameter, when the
+  # user requests an index that includes manifest_text.  The API server
+  # will find matching records that include this many bytes of
+  # manifest texts, then return a result.
+  max_index_database_read: 134217728
+
   # When you run the db:delete_old_job_logs task, it will find jobs that
   # have been finished for at least this many seconds, and delete their
   # stderr logs from the logs table.
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 54ffe66..3257e49 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -91,6 +91,45 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
     assert_equal 99999, resp['offset']
   end
 
+  def request_capped_index(params={})
+    authorize_with :user1_with_load
+    coll1 = collections(:collection_1_of_201)
+    Rails.configuration.max_index_database_read =
+      yield(coll1.manifest_text.size)
+    get :index, {
+      select: %w(uuid manifest_text),
+      filters: [["owner_uuid", "=", coll1.owner_uuid]],
+      limit: 300,
+    }.merge(params)
+  end
+
+  test "index with manifest_text limited by max_index_database_read" do
+    request_capped_index() { |size| (size * 3) + 1 }
+    assert_response :success
+    assert_equal(4, json_response["items"].size)
+    assert_equal(4, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with limit" do
+    request_capped_index(limit: 5) { |size| size * 20 }
+    assert_response :success
+    assert_equal(5, json_response["items"].size)
+    assert_equal(5, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with order" do
+    request_capped_index(order: "name DESC") { |size| (size * 15) + 1 }
+    assert_response :success
+    assert_equal(16, json_response["items"].size)
+    assert_empty(json_response["items"].reject do |coll|
+                   coll["name"] !~ /^Collection_9/
+                 end)
+    assert_equal(16, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
   test "admin can create collection with unsigned manifest" do
     authorize_with :admin
     test_collection = {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list