[ARVADOS] created: cb109bfddd08bd8136b75e90b681e4af3d60ea30

Git user git at public.curoverse.com
Fri May 5 15:31:39 EDT 2017


        at  cb109bfddd08bd8136b75e90b681e4af3d60ea30 (commit)


commit cb109bfddd08bd8136b75e90b681e4af3d60ea30
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri May 5 14:09:54 2017 -0400

    11629: Limit database reads for all list responses, not just index.

diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 831bcce..2bf7952 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -178,18 +178,9 @@ class ApplicationController < ActionController::Base
            }.merge opts)
   end
 
-  def self.limit_index_columns_read
-    # This method returns a list of column names.
-    # If an index request reads that column from the database,
-    # find_objects_for_index will only fetch objects until it reads
-    # max_index_database_read bytes of data from those columns.
-    []
-  end
-
   def find_objects_for_index
     @objects ||= model_class.readable_by(*@read_users)
     apply_where_limit_order_params
-    limit_database_read if (action_name == "index")
   end
 
   def apply_filters model_class=nil
@@ -278,8 +269,14 @@ class ApplicationController < ActionController::Base
     @objects = @objects.uniq(@distinct) if not @distinct.nil?
   end
 
-  def limit_database_read
-    limit_columns = self.class.limit_index_columns_read
+  # limit_database_read ensures @objects (which must be an
+  # ActiveRelation) does not return too many results to fit in memory,
+  # by previewing the results and calling @objects.limit() if
+  # necessary.
+  def limit_database_read(model_class:)
+    return if @limit == 0 || @limit == 1
+    model_class ||= self.model_class
+    limit_columns = model_class.limit_index_columns_read
     limit_columns &= model_class.columns_for_attributes(@select) if @select
     return if limit_columns.empty?
     model_class.transaction do
@@ -294,12 +291,12 @@ class ApplicationController < ActionController::Base
         read_total += record.read_length.to_i
         if read_total >= Rails.configuration.max_index_database_read
           new_limit -= 1 if new_limit > 1
+          @limit = new_limit
           break
         elsif new_limit >= @limit
           break
         end
       end
-      @limit = new_limit
       @objects = @objects.limit(@limit)
       # Force @objects to run its query inside this transaction.
       @objects.each { |_| break }
@@ -461,7 +458,10 @@ class ApplicationController < ActionController::Base
   end
   accept_param_as_json :reader_tokens, Array
 
-  def object_list
+  def object_list(model_class:)
+    if @objects.respond_to?(:except)
+      limit_database_read(model_class: model_class)
+    end
     list = {
       :kind  => "arvados##{(@response_resource_name || resource_name).camelize(:lower)}List",
       :etag => "",
@@ -485,7 +485,7 @@ class ApplicationController < ActionController::Base
   end
 
   def render_list
-    send_json object_list
+    send_json object_list(model_class: self.model_class)
   end
 
   def remote_ip
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 2beb1e7..939ca21 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -3,10 +3,6 @@ require "arvados/keep"
 class Arvados::V1::CollectionsController < ApplicationController
   include DbCurrentTime
 
-  def self.limit_index_columns_read
-    ["manifest_text"]
-  end
-
   def create
     if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
       resource_attrs[:portable_data_hash] = loc.to_s
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb
index ded28c7..7b0c0f0 100644
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -138,13 +138,20 @@ class Arvados::V1::GroupsController < ApplicationController
 
       @objects = klass.readable_by(*@read_users).
         order(request_order).where(where_conds)
-      @limit = limit_all - all_objects.count
+      klass_limit = limit_all - all_objects.count
+      @limit = klass_limit
       apply_where_limit_order_params klass
-      klass_object_list = object_list
+      klass_object_list = object_list(model_class: klass)
       klass_items_available = klass_object_list[:items_available] || 0
       @items_available += klass_items_available
       @offset = [@offset - klass_items_available, 0].max
       all_objects += klass_object_list[:items]
+
+      if klass_object_list[:limit] < klass_limit
+        # object_list() had to reduce @limit to comply with
+        # max_index_database_read. We have to stop now.
+        break
+      end
     end
 
     @objects = all_objects
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 84897d0..89f9a88 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -184,6 +184,14 @@ class ArvadosModel < ActiveRecord::Base
     ["id", "uuid"]
   end
 
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # APIs that return lists will only fetch objects until reaching
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
   # If current user can manage the object, return an array of uuids of
   # users and groups that have permission to write the object. The
   # first two elements are always [self.owner_uuid, current user's
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 33f6bc2..ebaaecf 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -65,6 +65,10 @@ class Collection < ArvadosModel
     super + ["updated_at", "file_names"]
   end
 
+  def self.limit_index_columns_read
+    ["manifest_text"]
+  end
+
   FILE_TOKEN = /^[[:digit:]]+:[[:digit:]]+:/
   def check_signatures
     return false if self.manifest_text.nil?
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 9420ef3..15a9c50 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -66,6 +66,10 @@ class Container < ArvadosModel
     Running => [Complete, Cancelled]
   }
 
+  def self.limit_index_columns_read
+    ["mounts"]
+  end
+
   def state_transitions
     State_transitions
   end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 628ef88..4387dfa 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -73,6 +73,10 @@ class ContainerRequest < ArvadosModel
   :runtime_constraints, :state, :container_uuid, :use_existing,
   :scheduling_parameters, :output_name, :output_ttl]
 
+  def self.limit_index_columns_read
+    ["mounts"]
+  end
+
   def state_transitions
     State_transitions
   end
diff --git a/services/api/app/models/workflow.rb b/services/api/app/models/workflow.rb
index 54fcf9b..91040e3 100644
--- a/services/api/app/models/workflow.rb
+++ b/services/api/app/models/workflow.rb
@@ -43,4 +43,8 @@ class Workflow < ArvadosModel
   def self.full_text_searchable_columns
     super - ["definition"]
   end
+
+  def self.limit_index_columns_read
+    ["definition"]
+  end
 end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 8118914..32c7b24 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -195,12 +195,10 @@ common:
   # normally be returned in a single response).
   # Note 1: This setting never reduces the number of returned rows to
   # zero, no matter how big the first data row is.
-  # Note 2: Currently, this only limits the
-  # arvados.v1.collections.list API (GET /arvados/v1/collections), and
-  # only takes the size of manifest_text into account. Other fields
-  # (e.g., "properties" hashes) are not counted against this limit
-  # when returning collections, and the limit is not applied at all
-  # for other data types.
+  # Note 2: Currently, this is only checked against a specific set of
+  # columns that tend to get large (collections.manifest_text,
+  # containers.mounts, workflows.definition). Other fields (e.g.,
+  # "properties" hashes) are not counted against this limit.
   max_index_database_read: 134217728
 
   # Maximum number of items to return when responding to a APIs that
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb
index 646089d..4c5810d 100644
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -462,4 +462,23 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
     }
     check_project_contents_response %w'arvados#pipelineInstance arvados#job'
   end
+
+  test 'get contents with low max_index_database_read' do
+    Rails.configuration.max_index_database_read = 12
+    authorize_with :active
+    get :contents, {
+          id: groups(:aproject).uuid,
+          format: :json,
+        }
+    assert_response :success
+    assert_operator(json_response['items'].count,
+                    :<, json_response['items_available'])
+    collections = 0
+    json_response['items'].each do |item|
+      if item['kind'] == 'arvados#collection'
+        collections += 1
+      end
+    end
+    assert_equal 1, collections
+  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list