[ARVADOS] updated: 1.3.0-596-g8c82f404b

Git user git at public.curoverse.com
Mon Mar 25 22:31:00 UTC 2019


Summary of changes:
 services/api/app/models/container.rb     | 61 ++++++++++++++++++++++++++++++++
 services/api/db/structure.sql            |  6 +++-
 services/api/test/unit/container_test.rb | 13 +++++++
 3 files changed, 79 insertions(+), 1 deletion(-)

       via  8c82f404b48a159797bd0e96e3d0098f0cf3ba16 (commit)
       via  b0dbec2e1e496e29551dcb01a85328f8982f026f (commit)
      from  19ea2726815a0d74f718339b1e42f76cc4bb463c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 8c82f404b48a159797bd0e96e3d0098f0cf3ba16
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Mon Mar 25 18:30:40 2019 -0400

    14484: Adds functionality and test for pdh grouping in the container model
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index abcfdbd29..b3328d9c7 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -405,6 +405,67 @@ class Container < ArvadosModel
     end
   end
 
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+  #
+  # Change with caution!
+  #
+  # Correctly groups pdhs to use for batch database updates. Helps avoid
+  # updating too many database rows in a single transaction.
+  def self.group_pdhs_for_multiple_transactions(log_prefix)
+    batch_size_max = 1 << 28 # 256 MiB
+    last_pdh = '0'
+    done = 0
+    any = true
+
+    total = ActiveRecord::Base.connection.exec_query(
+      'SELECT DISTINCT portable_data_hash FROM collections'
+    ).rows.count
+
+    while any
+      any = false
+      pdhs = ActiveRecord::Base.connection.exec_query(
+        'SELECT DISTINCT portable_data_hash FROM collections '\
+        "WHERE portable_data_hash > '#{last_pdh}' "\
+        'GROUP BY portable_data_hash LIMIT 1000'
+      )
+      if pdhs.rows.count.zero?
+        break
+      end
+
+      Container.group_pdhs_by_manifest_size(pdhs, batch_size_max) do |grouped_pdhs|
+        any = true
+        yield grouped_pdhs
+        done += grouped_pdhs.size
+        last_pdh = pdhs[-1]
+        Rails.logger.info(log_prefix + ": #{done}/#{total}")
+      end
+    end
+    Rails.logger.info(log_prefix + ': finished')
+  end
+
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+  #
+  # Change with caution!
+  #
+  # Given an array of pdhs, yield a subset array of pdhs when the total
+  # size of all manifest_texts is no more than batch_size_max. Pdhs whose manifest_text 
+  # is bigger than batch_size_max are yielded by themselves
+  def self.group_pdhs_by_manifest_size(pdhs, batch_size_max)
+    batch_size = 0
+    batch_pdhs = {}
+    pdhs.each do |pdh|
+      manifest_size = pdh.split('+')[1].to_i
+      if batch_size > 0 && batch_size + manifest_size > batch_size_max
+        yield batch_pdhs.keys
+        batch_pdhs = {}
+        batch_size = 0
+      end
+      batch_pdhs[pdh] = true
+      batch_size += manifest_size
+    end
+    yield batch_pdhs.keys
+  end
+
   protected
 
   def fill_field_defaults
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index 1a53df7da..2b7fda8d7 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -956,4 +956,17 @@ class ContainerTest < ActiveSupport::TestCase
       assert_no_secrets_logged
     end
   end
+
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb 
+  # relies on this test. Change with caution!
+  test "pdh_grouping_by_manifest_size" do
+    batch_size_max = 200
+    pdhs_in = ['x1+30', 'x2+30', 'x3+201', 'x4+100', 'x5+100']
+    batched_pdhs = []
+    Container.group_pdhs_by_manifest_size(pdhs_in, batch_size_max) do |pdhs|
+      batched_pdhs << pdhs
+    end
+    expected = [['x1+30', 'x2+30'], ['x3+201'], ['x4+100', 'x5+100']]
+    assert_equal(batched_pdhs, expected)
+  end
 end

commit b0dbec2e1e496e29551dcb01a85328f8982f026f
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Mon Mar 25 16:17:37 2019 -0400

    14484: Updates the db structure after db:migrate
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index f766f33e1..1eefc1300 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -175,7 +175,9 @@ CREATE TABLE public.collections (
     storage_classes_confirmed_at timestamp without time zone,
     current_version_uuid character varying,
     version integer DEFAULT 1 NOT NULL,
-    preserve_version boolean DEFAULT false
+    preserve_version boolean DEFAULT false,
+    file_count integer DEFAULT 0 NOT NULL,
+    file_size_total integer DEFAULT 0 NOT NULL
 );
 
 
@@ -3220,3 +3222,5 @@ INSERT INTO schema_migrations (version) VALUES ('20181213183234');
 
 INSERT INTO schema_migrations (version) VALUES ('20190214214814');
 
+INSERT INTO schema_migrations (version) VALUES ('20190322174136');
+

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list