[ARVADOS] updated: 1.3.0-610-g59a1fc872
Git user
git at public.curoverse.com
Fri Mar 29 18:46:35 UTC 2019
Summary of changes:
services/api/app/models/container.rb | 34 -------------------
.../20190322174136_add_file_info_to_collection.rb | 5 +--
services/api/lib/group_pdhs.rb | 39 ++++++++++++++++++++++
3 files changed, 42 insertions(+), 36 deletions(-)
create mode 100644 services/api/lib/group_pdhs.rb
via 59a1fc872723c0bafa9764b95756723f54419631 (commit)
from ce0caee0ecb9c8f6c6cbefc1a12a37560d0f7554 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 59a1fc872723c0bafa9764b95756723f54419631
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date: Fri Mar 29 14:46:30 2019 -0400
14484: Moves pdh grouping into a lib module
Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 0f48a7501..abcfdbd29 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -405,40 +405,6 @@ class Container < ArvadosModel
end
end
- # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
- #
- # Change with caution!
- #
- # Correctly groups pdhs to use for batch database updates. Helps avoid
- # updating too many database rows in a single transaction.
- def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
- batch_size = 0
- batch_pdhs = {}
- last_pdh = '0'
- done = 0
- any = true
-
- while any
- any = false
- distinct_ordered_pdhs.call(last_pdh) do |pdh|
- any = true
- last_pdh = pdh
- manifest_size = pdh.split('+')[1].to_i
- if batch_size > 0 && batch_size + manifest_size > batch_size_max
- yield batch_pdhs.keys
- done += batch_pdhs.size
- Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
- batch_pdhs = {}
- batch_size = 0
- end
- batch_pdhs[pdh] = true
- batch_size += manifest_size
- end
- end
- yield batch_pdhs.keys
- Rails.logger.info(log_prefix + ": finished")
- end
-
protected
def fill_field_defaults
diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
index 47f539826..146e105af 100755
--- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
+++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
@@ -3,6 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0
require "arvados/keep"
+require "group_pdhs"
class AddFileInfoToCollection < ActiveRecord::Migration
def do_batch(pdhs)
@@ -34,7 +35,7 @@ class AddFileInfoToCollection < ActiveRecord::Migration
"SELECT DISTINCT portable_data_hash FROM collections"
).rows.count
- # Generator that queries for all the distince pdhs greater than last_pdh
+ # Generator that queries for all the distinct pdhs greater than last_pdh
ordered_pdh_query = lambda { |last_pdh, &block|
pdhs = ActiveRecord::Base.connection.exec_query(
"SELECT DISTINCT portable_data_hash FROM collections "\
@@ -47,7 +48,7 @@ class AddFileInfoToCollection < ActiveRecord::Migration
}
batch_size_max = 1 << 28 # 256 MiB
- Container.group_pdhs_for_multiple_transactions(ordered_pdh_query,
+ GroupPdhs.group_pdhs_for_multiple_transactions(ordered_pdh_query,
distinct_pdh_count,
batch_size_max,
"AddFileInfoToCollection") do |pdhs|
diff --git a/services/api/lib/group_pdhs.rb b/services/api/lib/group_pdhs.rb
new file mode 100644
index 000000000..0630ef8b5
--- /dev/null
+++ b/services/api/lib/group_pdhs.rb
@@ -0,0 +1,39 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module GroupPdhs
+ # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+ #
+ # Change with caution!
+ #
+ # Correctly groups pdhs to use for batch database updates. Helps avoid
+ # updating too many database rows in a single transaction.
+ def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
+ batch_size = 0
+ batch_pdhs = {}
+ last_pdh = '0'
+ done = 0
+ any = true
+
+ while any
+ any = false
+ distinct_ordered_pdhs.call(last_pdh) do |pdh|
+ any = true
+ last_pdh = pdh
+ manifest_size = pdh.split('+')[1].to_i
+ if batch_size > 0 && batch_size + manifest_size > batch_size_max
+ yield batch_pdhs.keys
+ done += batch_pdhs.size
+ Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
+ batch_pdhs = {}
+ batch_size = 0
+ end
+ batch_pdhs[pdh] = true
+ batch_size += manifest_size
+ end
+ end
+ yield batch_pdhs.keys
+ Rails.logger.info(log_prefix + ": finished")
+ end
+end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list