[ARVADOS] updated: 1.3.0-606-g670987617
Git user
git at public.curoverse.com
Wed Mar 27 21:28:39 UTC 2019
Summary of changes:
services/api/app/models/collection.rb | 2 +-
services/api/app/models/container.rb | 58 ++++++----------------
.../20190322174136_add_file_info_to_collection.rb | 22 +++++++-
services/api/test/unit/container_test.rb | 8 ++-
4 files changed, 45 insertions(+), 45 deletions(-)
via 6709876170511ade8e24fe60bf77da24bc4a03d4 (commit)
via fe08de0ddf1d3e536cb3518dcb9c82ca62197273 (commit)
via d6fb38cac6a7f9f98f534b4638ce5918ba94c135 (commit)
from a737669021ac34683deecda8130e21b243e14174 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 6709876170511ade8e24fe60bf77da24bc4a03d4
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date: Wed Mar 27 17:28:31 2019 -0400
14484: Fixes tests that intentionally pass in invalid manifests
Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 2cebd5438..73a8df11f 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -199,7 +199,7 @@ class Collection < ArvadosModel
end
def set_file_count_and_total_size
- if self.manifest_text_changed?
+ if self.manifest_text_changed? && self.valid?
m = Keep::Manifest.new(self.manifest_text)
self.file_size_total = m.files_size
self.file_count = m.files_count
commit fe08de0ddf1d3e536cb3518dcb9c82ca62197273
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date: Wed Mar 27 16:56:47 2019 -0400
14484: Adds test for pdh grouping functionality in the container model
Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index f3da80082..0f48a7501 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -411,8 +411,7 @@ class Container < ArvadosModel
#
# Correctly groups pdhs to use for batch database updates. Helps avoid
# updating too many database rows in a single transaction.
- def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, log_prefix)
- batch_size_max = 1 << 28 # 256 MiB
+ def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
batch_size = 0
batch_pdhs = {}
last_pdh = '0'
diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
index 99db8133d..3e87b0c8b 100755
--- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
+++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
@@ -46,7 +46,11 @@ class AddFileInfoToCollection < ActiveRecord::Migration
end
}
- Container.group_pdhs_for_multiple_transactions(ordered_pdh_query, distinct_pdh_count, "AddFileInfoToCollection") do |pdhs|
+ batch_size_max = 1 << 28 # 256 MiB
+ Container.group_pdhs_for_multiple_transactions(ordered_pdh_query,
+ distinct_pdh_count,
+ batch_size_max,
+ "AddFileInfoToCollection") do |pdhs|
do_batch(pdhs)
end
end
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index 2b7fda8d7..783d2a985 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -962,8 +962,14 @@ class ContainerTest < ActiveSupport::TestCase
test "pdh_grouping_by_manifest_size" do
batch_size_max = 200
pdhs_in = ['x1+30', 'x2+30', 'x3+201', 'x4+100', 'x5+100']
+ pdh_lambda = lambda { |last_pdh, &block|
+ pdhs = pdhs_in.select{|pdh| pdh > last_pdh}
+ pdhs.each do |p|
+ block.call(p)
+ end
+ }
batched_pdhs = []
- Container.group_pdhs_by_manifest_size(pdhs_in, batch_size_max) do |pdhs|
+ Container.group_pdhs_for_multiple_transactions(pdh_lambda, pdhs_in.size, batch_size_max, "") do |pdhs|
batched_pdhs << pdhs
end
expected = [['x1+30', 'x2+30'], ['x3+201'], ['x4+100', 'x5+100']]
commit d6fb38cac6a7f9f98f534b4638ce5918ba94c135
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date: Wed Mar 27 16:35:14 2019 -0400
14484: Simplifies pdh transaction grouping, keeps SQL in the migration
Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 694aa5a0d..f3da80082 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -411,58 +411,33 @@ class Container < ArvadosModel
#
# Correctly groups pdhs to use for batch database updates. Helps avoid
# updating too many database rows in a single transaction.
- def self.group_pdhs_for_multiple_transactions(log_prefix)
+ def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, log_prefix)
batch_size_max = 1 << 28 # 256 MiB
+ batch_size = 0
+ batch_pdhs = {}
last_pdh = '0'
done = 0
any = true
- total = ActiveRecord::Base.connection.exec_query(
- "SELECT DISTINCT portable_data_hash FROM collections"
- ).rows.count
-
while any
any = false
- pdhs_res = ActiveRecord::Base.connection.exec_query(
- "SELECT DISTINCT portable_data_hash FROM collections "\
- "WHERE portable_data_hash > '#{last_pdh}' "\
- "GROUP BY portable_data_hash LIMIT 1000"
- )
- break if pdhs_res.rows.count.zero?
-
- pdhs = pdhs_res.rows.collect { |r| r[0] }
- Container.group_pdhs_by_manifest_size(pdhs, batch_size_max) do |grouped_pdhs|
+ distinct_ordered_pdhs.call(last_pdh) do |pdh|
any = true
- yield grouped_pdhs
- done += grouped_pdhs.size
- last_pdh = pdhs[-1]
- Rails.logger.info(log_prefix + ": #{done}/#{total}")
- end
- end
- Rails.logger.info(log_prefix + ": finished")
- end
-
- # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
- #
- # Change with caution!
- #
- # Given an array of pdhs, yield a subset array of pdhs when the total
- # size of all manifest_texts is no more than batch_size_max. Pdhs whose manifest_text
- # is bigger than batch_size_max are yielded by themselves
- def self.group_pdhs_by_manifest_size(pdhs, batch_size_max)
- batch_size = 0
- batch_pdhs = {}
- pdhs.each do |pdh|
- manifest_size = pdh.split('+')[1].to_i
- if batch_size > 0 && batch_size + manifest_size > batch_size_max
- yield batch_pdhs.keys
- batch_pdhs = {}
- batch_size = 0
+ last_pdh = pdh
+ manifest_size = pdh.split('+')[1].to_i
+ if batch_size > 0 && batch_size + manifest_size > batch_size_max
+ yield batch_pdhs.keys
+ done += batch_pdhs.size
+ Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
+ batch_pdhs = {}
+ batch_size = 0
+ end
+ batch_pdhs[pdh] = true
+ batch_size += manifest_size
end
- batch_pdhs[pdh] = true
- batch_size += manifest_size
end
yield batch_pdhs.keys
+ Rails.logger.info(log_prefix + ": finished")
end
protected
diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
index c1c336247..99db8133d 100755
--- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
+++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
@@ -30,7 +30,23 @@ class AddFileInfoToCollection < ActiveRecord::Migration
add_column :collections, :file_count, :integer, default: 0, null: false
add_column :collections, :file_size_total, :integer, default: 0, null: false
- Container.group_pdhs_for_multiple_transactions("AddFileInfoToCollection") do |pdhs|
+ distinct_pdh_count = ActiveRecord::Base.connection.exec_query(
+ "SELECT DISTINCT portable_data_hash FROM collections"
+ ).rows.count
+
+ # Generator that queries for all the distince pdhs greater than last_pdh
+ ordered_pdh_query = lambda { |last_pdh, &block|
+ pdhs = ActiveRecord::Base.connection.exec_query(
+ "SELECT DISTINCT portable_data_hash FROM collections "\
+ "WHERE portable_data_hash > '#{last_pdh}' "\
+ "ORDER BY portable_data_hash LIMIT 1000"
+ )
+ pdhs.rows.each do |row|
+ block.call(row[0])
+ end
+ }
+
+ Container.group_pdhs_for_multiple_transactions(ordered_pdh_query, distinct_pdh_count, "AddFileInfoToCollection") do |pdhs|
do_batch(pdhs)
end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list