[ARVADOS] updated: 602706ef5510b3f07fc5fa988019952d2133320c
git at public.curoverse.com
git at public.curoverse.com
Thu Jun 4 01:03:47 EDT 2015
Summary of changes:
.../api/app/controllers/application_controller.rb | 2 +-
services/api/config/application.default.yml | 28 ++++++++++++----------
.../arvados/v1/collections_controller_test.rb | 26 ++++++++++++++++----
3 files changed, 39 insertions(+), 17 deletions(-)
via 602706ef5510b3f07fc5fa988019952d2133320c (commit)
via 39a1340d56f7acbddb771f6bef36b68ee9076885 (commit)
from 22c8b6367a9cd79b17240b7dca1ac8f7d8e7ee77 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 602706ef5510b3f07fc5fa988019952d2133320c
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Jun 4 00:52:48 2015 -0400
6074: Use each instead of find_each, so our order() and limit() constraints are respected.
According to http://apidock.com/rails/ActiveRecord/Batches/find_each,
both order and limit are ignored.
The existing test "max_index_database_read does not interfere with
order" had two fatal bugs that prevented it from catching the
find_each problem:
1. It didn't select the 'name' column, so the 'name' order was
ignored. But the test passed because the name wasn't returned,
item['name'] was nil, and... nil !~ /pattern/ is true. Both
problems are fixed here.
(This explains why it seemed to find 15 names starting with
Collection_9, rather than just the 11 that exist (_9 and _90..99).)
2. It tested only that the returned results followed the requested
order, not that the order was followed when deciding what the limit
should be. All of the items_available were the same size, so _any_
order would have set the limit at 15 and passed the test.
The second problem is fixed by adding a separate test.
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 1c8450b..e91e3ce 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -306,7 +306,7 @@ class ApplicationController < ActionController::Base
limit_columns.map { |s| "octet_length(#{s})" }.join(" + "))
new_limit = 0
read_total = 0
- limit_query.find_each do |record|
+ limit_query.each do |record|
new_limit += 1
read_total += record.read_length.to_i
if read_total >= Rails.configuration.max_index_database_read
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 28de3f8..d2901a0 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -111,6 +111,23 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
assert_equal(201, json_response["items_available"])
end
+ test "max_index_database_read size check follows same order as real query" do
+ authorize_with :user1_with_load
+ txt = '.' + ' d41d8cd98f00b204e9800998ecf8427e+0'*1000 + " 0:0:empty.txt\n"
+ c = Collection.create! manifest_text: txt, name: '0000000000000000000'
+ request_capped_index(select: %w(uuid manifest_text name),
+ order: ['name asc'],
+ filters: [['name','>=',c.name]]) do |_|
+ txt.length - 1
+ end
+ assert_response :success
+ assert_equal(1, json_response["items"].size)
+ assert_equal(1, json_response["limit"])
+ assert_equal(c.uuid, json_response["items"][0]["uuid"])
+ # The effectiveness of the test depends on >1 item matching the filters.
+ assert_operator(1, :<, json_response["items_available"])
+ end
+
test "index with manifest_text limited by max_index_database_read" do
request_capped_index() { |size| (size * 3) + 1 }
assert_response :success
@@ -128,13 +145,14 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
end
test "max_index_database_read does not interfere with order" do
- request_capped_index(order: "name DESC") { |size| (size * 15) + 1 }
+ request_capped_index(select: %w(uuid manifest_text name),
+ order: "name DESC") { |size| (size * 11) + 1 }
assert_response :success
- assert_equal(15, json_response["items"].size)
+ assert_equal(11, json_response["items"].size)
assert_empty(json_response["items"].reject do |coll|
- coll["name"] !~ /^Collection_9/
+ coll["name"] =~ /^Collection_9/
end)
- assert_equal(15, json_response["limit"])
+ assert_equal(11, json_response["limit"])
assert_equal(201, json_response["items_available"])
end
commit 39a1340d56f7acbddb771f6bef36b68ee9076885
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Jun 3 23:41:06 2015 -0400
6074: Update config docs to match new max_index_database_read behavior.
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 994748b..5488003 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -295,20 +295,24 @@ common:
# collection's replication_desired attribute is nil.
default_collection_replication: 2
- # Maximum size (in bytes) allowed for a single API request that will be
- # published in the discovery document for use by clients.
- # Note you must separately configure the upstream web server or proxy to
- # actually enforce the desired maximum request size on the server side.
+ # Maximum size (in bytes) allowed for a single API request. This
+ # limit is published in the discovery document for use by clients.
+ # Note: You must separately configure the upstream web server or
+ # proxy to actually enforce the desired maximum request size on the
+ # server side.
max_request_size: 134217728
- # Stop collecting records for an index request after we read this much
- # data (in bytes) from large database columns.
- # Currently only `GET /collections` respects this parameter, when the
- # user requests an index that includes manifest_text. Once the API
- # server collects records with a total manifest_text size at or above
- # this amount, it returns those results immediately.
- # Note this is a threshold, not a limit. Record collection stops
- # *after* reading this much data.
+ # Limit the number of bytes read from the database during an index
+ # request (by retrieving and returning fewer rows than would
+ # normally be returned in a single response).
+ # Note 1: This setting never reduces the number of returned rows to
+ # zero, no matter how big the first data row is.
+ # Note 2: Currently, this only limits the
+ # arvados.v1.collections.list API (GET /arvados/v1/collections), and
+ # only takes the size of manifest_text into account. Other fields
+ # (e.g., "properties" hashes) are not counted against this limit
+ # when returning collections, and the limit is not applied at all
+ # for other data types.
max_index_database_read: 134217728
# When you run the db:delete_old_job_logs task, it will find jobs that
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list