[ARVADOS] created: cb109bfddd08bd8136b75e90b681e4af3d60ea30
Git user
git at public.curoverse.com
Fri May 5 15:31:39 EDT 2017
at cb109bfddd08bd8136b75e90b681e4af3d60ea30 (commit)
commit cb109bfddd08bd8136b75e90b681e4af3d60ea30
Author: Tom Clegg <tom at curoverse.com>
Date: Fri May 5 14:09:54 2017 -0400
11629: Limit database reads for all list responses, not just index.
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 831bcce..2bf7952 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -178,18 +178,9 @@ class ApplicationController < ActionController::Base
}.merge opts)
end
- def self.limit_index_columns_read
- # This method returns a list of column names.
- # If an index request reads that column from the database,
- # find_objects_for_index will only fetch objects until it reads
- # max_index_database_read bytes of data from those columns.
- []
- end
-
def find_objects_for_index
@objects ||= model_class.readable_by(*@read_users)
apply_where_limit_order_params
- limit_database_read if (action_name == "index")
end
def apply_filters model_class=nil
@@ -278,8 +269,14 @@ class ApplicationController < ActionController::Base
@objects = @objects.uniq(@distinct) if not @distinct.nil?
end
- def limit_database_read
- limit_columns = self.class.limit_index_columns_read
+ # limit_database_read ensures @objects (which must be an
+ # ActiveRelation) does not return too many results to fit in memory,
+ # by previewing the results and calling @objects.limit() if
+ # necessary.
+ def limit_database_read(model_class:)
+ return if @limit == 0 || @limit == 1
+ model_class ||= self.model_class
+ limit_columns = model_class.limit_index_columns_read
limit_columns &= model_class.columns_for_attributes(@select) if @select
return if limit_columns.empty?
model_class.transaction do
@@ -294,12 +291,12 @@ class ApplicationController < ActionController::Base
read_total += record.read_length.to_i
if read_total >= Rails.configuration.max_index_database_read
new_limit -= 1 if new_limit > 1
+ @limit = new_limit
break
elsif new_limit >= @limit
break
end
end
- @limit = new_limit
@objects = @objects.limit(@limit)
# Force @objects to run its query inside this transaction.
@objects.each { |_| break }
@@ -461,7 +458,10 @@ class ApplicationController < ActionController::Base
end
accept_param_as_json :reader_tokens, Array
- def object_list
+ def object_list(model_class:)
+ if @objects.respond_to?(:except)
+ limit_database_read(model_class: model_class)
+ end
list = {
:kind => "arvados##{(@response_resource_name || resource_name).camelize(:lower)}List",
:etag => "",
@@ -485,7 +485,7 @@ class ApplicationController < ActionController::Base
end
def render_list
- send_json object_list
+ send_json object_list(model_class: self.model_class)
end
def remote_ip
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 2beb1e7..939ca21 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -3,10 +3,6 @@ require "arvados/keep"
class Arvados::V1::CollectionsController < ApplicationController
include DbCurrentTime
- def self.limit_index_columns_read
- ["manifest_text"]
- end
-
def create
if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
resource_attrs[:portable_data_hash] = loc.to_s
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb
index ded28c7..7b0c0f0 100644
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -138,13 +138,20 @@ class Arvados::V1::GroupsController < ApplicationController
@objects = klass.readable_by(*@read_users).
order(request_order).where(where_conds)
- @limit = limit_all - all_objects.count
+ klass_limit = limit_all - all_objects.count
+ @limit = klass_limit
apply_where_limit_order_params klass
- klass_object_list = object_list
+ klass_object_list = object_list(model_class: klass)
klass_items_available = klass_object_list[:items_available] || 0
@items_available += klass_items_available
@offset = [@offset - klass_items_available, 0].max
all_objects += klass_object_list[:items]
+
+ if klass_object_list[:limit] < klass_limit
+ # object_list() had to reduce @limit to comply with
+ # max_index_database_read. We have to stop now.
+ break
+ end
end
@objects = all_objects
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 84897d0..89f9a88 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -184,6 +184,14 @@ class ArvadosModel < ActiveRecord::Base
["id", "uuid"]
end
+ def self.limit_index_columns_read
+ # This method returns a list of column names.
+ # If an index request reads that column from the database,
+ # APIs that return lists will only fetch objects until reaching
+ # max_index_database_read bytes of data from those columns.
+ []
+ end
+
# If current user can manage the object, return an array of uuids of
# users and groups that have permission to write the object. The
# first two elements are always [self.owner_uuid, current user's
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 33f6bc2..ebaaecf 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -65,6 +65,10 @@ class Collection < ArvadosModel
super + ["updated_at", "file_names"]
end
+ def self.limit_index_columns_read
+ ["manifest_text"]
+ end
+
FILE_TOKEN = /^[[:digit:]]+:[[:digit:]]+:/
def check_signatures
return false if self.manifest_text.nil?
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 9420ef3..15a9c50 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -66,6 +66,10 @@ class Container < ArvadosModel
Running => [Complete, Cancelled]
}
+ def self.limit_index_columns_read
+ ["mounts"]
+ end
+
def state_transitions
State_transitions
end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 628ef88..4387dfa 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -73,6 +73,10 @@ class ContainerRequest < ArvadosModel
:runtime_constraints, :state, :container_uuid, :use_existing,
:scheduling_parameters, :output_name, :output_ttl]
+ def self.limit_index_columns_read
+ ["mounts"]
+ end
+
def state_transitions
State_transitions
end
diff --git a/services/api/app/models/workflow.rb b/services/api/app/models/workflow.rb
index 54fcf9b..91040e3 100644
--- a/services/api/app/models/workflow.rb
+++ b/services/api/app/models/workflow.rb
@@ -43,4 +43,8 @@ class Workflow < ArvadosModel
def self.full_text_searchable_columns
super - ["definition"]
end
+
+ def self.limit_index_columns_read
+ ["definition"]
+ end
end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 8118914..32c7b24 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -195,12 +195,10 @@ common:
# normally be returned in a single response).
# Note 1: This setting never reduces the number of returned rows to
# zero, no matter how big the first data row is.
- # Note 2: Currently, this only limits the
- # arvados.v1.collections.list API (GET /arvados/v1/collections), and
- # only takes the size of manifest_text into account. Other fields
- # (e.g., "properties" hashes) are not counted against this limit
- # when returning collections, and the limit is not applied at all
- # for other data types.
+ # Note 2: Currently, this is only checked against a specific set of
+ # columns that tend to get large (collections.manifest_text,
+ # containers.mounts, workflows.definition). Other fields (e.g.,
+ # "properties" hashes) are not counted against this limit.
max_index_database_read: 134217728
# Maximum number of items to return when responding to a APIs that
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb
index 646089d..4c5810d 100644
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -462,4 +462,23 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
}
check_project_contents_response %w'arvados#pipelineInstance arvados#job'
end
+
+ test 'get contents with low max_index_database_read' do
+ Rails.configuration.max_index_database_read = 12
+ authorize_with :active
+ get :contents, {
+ id: groups(:aproject).uuid,
+ format: :json,
+ }
+ assert_response :success
+ assert_operator(json_response['items'].count,
+ :<, json_response['items_available'])
+ collections = 0
+ json_response['items'].each do |item|
+ if item['kind'] == 'arvados#collection'
+ collections += 1
+ end
+ end
+ assert_equal 1, collections
+ end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list