[ARVADOS] updated: 1.3.0-610-g59a1fc872

Git user git at public.curoverse.com
Fri Mar 29 18:46:35 UTC 2019


Summary of changes:
 services/api/app/models/container.rb               | 34 -------------------
 .../20190322174136_add_file_info_to_collection.rb  |  5 +--
 services/api/lib/group_pdhs.rb                     | 39 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 36 deletions(-)
 create mode 100644 services/api/lib/group_pdhs.rb

       via  59a1fc872723c0bafa9764b95756723f54419631 (commit)
      from  ce0caee0ecb9c8f6c6cbefc1a12a37560d0f7554 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 59a1fc872723c0bafa9764b95756723f54419631
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Fri Mar 29 14:46:30 2019 -0400

    14484: Moves pdh grouping into a lib module
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 0f48a7501..abcfdbd29 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -405,40 +405,6 @@ class Container < ArvadosModel
     end
   end
 
-  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
-  #
-  # Change with caution!
-  #
-  # Correctly groups pdhs to use for batch database updates. Helps avoid
-  # updating too many database rows in a single transaction.
-  def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
-    batch_size = 0
-    batch_pdhs = {}
-    last_pdh = '0'
-    done = 0
-    any = true
-
-    while any
-      any = false
-      distinct_ordered_pdhs.call(last_pdh) do |pdh|
-        any = true
-        last_pdh = pdh
-        manifest_size = pdh.split('+')[1].to_i
-        if batch_size > 0 && batch_size + manifest_size > batch_size_max
-          yield batch_pdhs.keys
-          done += batch_pdhs.size
-          Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
-          batch_pdhs = {}
-          batch_size = 0
-        end
-        batch_pdhs[pdh] = true
-        batch_size += manifest_size
-      end
-    end
-    yield batch_pdhs.keys
-    Rails.logger.info(log_prefix + ": finished")
-  end
-
   protected
 
   def fill_field_defaults
diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
index 47f539826..146e105af 100755
--- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
+++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 require "arvados/keep"
+require "group_pdhs"
 
 class AddFileInfoToCollection < ActiveRecord::Migration
   def do_batch(pdhs)
@@ -34,7 +35,7 @@ class AddFileInfoToCollection < ActiveRecord::Migration
       "SELECT DISTINCT portable_data_hash FROM collections"
     ).rows.count
 
-    # Generator that queries for all the distince pdhs greater than last_pdh
+    # Generator that queries for all the distinct pdhs greater than last_pdh
     ordered_pdh_query = lambda { |last_pdh, &block|
       pdhs = ActiveRecord::Base.connection.exec_query(
         "SELECT DISTINCT portable_data_hash FROM collections "\
@@ -47,7 +48,7 @@ class AddFileInfoToCollection < ActiveRecord::Migration
     }
 
     batch_size_max = 1 << 28 # 256 MiB
-    Container.group_pdhs_for_multiple_transactions(ordered_pdh_query,
+    GroupPdhs.group_pdhs_for_multiple_transactions(ordered_pdh_query,
                                                    distinct_pdh_count,
                                                    batch_size_max,
                                                    "AddFileInfoToCollection") do |pdhs|
diff --git a/services/api/lib/group_pdhs.rb b/services/api/lib/group_pdhs.rb
new file mode 100644
index 000000000..0630ef8b5
--- /dev/null
+++ b/services/api/lib/group_pdhs.rb
@@ -0,0 +1,39 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module GroupPdhs
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+  #
+  # Change with caution!
+  #
+  # Correctly groups pdhs to use for batch database updates. Helps avoid
+  # updating too many database rows in a single transaction.
+  def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
+    batch_size = 0
+    batch_pdhs = {}
+    last_pdh = '0'
+    done = 0
+    any = true
+
+    while any
+      any = false
+      distinct_ordered_pdhs.call(last_pdh) do |pdh|
+        any = true
+        last_pdh = pdh
+        manifest_size = pdh.split('+')[1].to_i
+        if batch_size > 0 && batch_size + manifest_size > batch_size_max
+          yield batch_pdhs.keys
+          done += batch_pdhs.size
+          Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
+          batch_pdhs = {}
+          batch_size = 0
+        end
+        batch_pdhs[pdh] = true
+        batch_size += manifest_size
+      end
+    end
+    yield batch_pdhs.keys
+    Rails.logger.info(log_prefix + ": finished")
+  end
+end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list