[ARVADOS] updated: 1.3.0-598-g4e03c7e92

Git user git at public.curoverse.com
Tue Mar 26 16:52:19 UTC 2019


Summary of changes:
 services/api/app/models/container.rb               |  6 +--
 .../20190322174136_add_file_info_to_collection.rb  | 45 +++++++++++++++++++++-
 2 files changed, 46 insertions(+), 5 deletions(-)

       via  4e03c7e92230d5ceb5adf09844f514eacbfc3a41 (commit)
       via  7253776cc43c48cbb383f90aa582be2aa73cf09b (commit)
      from  8c82f404b48a159797bd0e96e3d0098f0cf3ba16 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 4e03c7e92230d5ceb5adf09844f514eacbfc3a41
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Tue Mar 26 12:47:06 2019 -0400

    14484: Adds migration for collection file count and total size
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
index 816514e67..97bab1e56 100755
--- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
+++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb
@@ -3,8 +3,51 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 class AddFileInfoToCollection < ActiveRecord::Migration
-  def change
+  def do_batch(pdhs)
+    pdhs_str = ''
+    pdhs.each do |pdh|
+      pdhs_str << "'" << pdh[0] << "'" << ','
+    end
+
+    collections = ActiveRecord::Base.connection.exec_query(
+      'SELECT DISTINCT portable_data_hash, manifest_text FROM collections '\
+      "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) "
+    )
+
+    collections.rows.each do |row|
+      file_count = 0
+      file_size_total = 0
+      row[1].scan(/\S+/) do |token|
+        is_file = token.match(/^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/)
+        if is_file
+          _, filesize, filename = token.split(':', 3)
+
+          # Avoid counting empty dir placeholders
+          break if filename == '.' && filesize.zero?
+
+          file_count += 1
+          file_size_total += filesize.to_i
+        end
+      end
+      ActiveRecord::Base.connection.exec_query('BEGIN')
+      ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{file_count}, "\
+                                               "file_size_total=#{file_size_total} "\
+                                               "WHERE portable_data_hash='#{row[0]}'")
+      ActiveRecord::Base.connection.exec_query('COMMIT')
+    end
+  end
+
+  def up
     add_column :collections, :file_count, :integer, default: 0, null: false
     add_column :collections, :file_size_total, :integer, default: 0, null: false
+
+    Container.group_pdhs_for_multiple_transactions('AddFileInfoToCollection') do |pdhs|
+      do_batch(pdhs)
+    end
+  end
+
+  def down
+    remove_column :collections, :file_count
+    remove_column :collections, :file_size_total
   end
 end

commit 7253776cc43c48cbb383f90aa582be2aa73cf09b
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Tue Mar 26 12:46:15 2019 -0400

    14484: Fixes bug in pdh transaction grouping
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index b3328d9c7..e46ef6fd3 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -428,11 +428,9 @@ class Container < ArvadosModel
         "WHERE portable_data_hash > '#{last_pdh}' "\
         'GROUP BY portable_data_hash LIMIT 1000'
       )
-      if pdhs.rows.count.zero?
-        break
-      end
+      break if pdhs.rows.count.zero?
 
-      Container.group_pdhs_by_manifest_size(pdhs, batch_size_max) do |grouped_pdhs|
+      Container.group_pdhs_by_manifest_size(pdhs.rows, batch_size_max) do |grouped_pdhs|
         any = true
         yield grouped_pdhs
         done += grouped_pdhs.size

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list