[ARVADOS] updated: db6c09cc6dfa9d50a18261332deb59dc6f7c0421

git at public.curoverse.com git at public.curoverse.com
Wed Jun 10 10:15:16 EDT 2015


Summary of changes:
 services/api/app/models/blob.rb                    |  2 +-
 services/api/app/models/collection.rb              | 88 ++++++++++------------
 .../api/test/unit/collection_performance_test.rb   |  1 +
 services/api/test/unit/collection_test.rb          | 20 +++++
 4 files changed, 62 insertions(+), 49 deletions(-)

       via  db6c09cc6dfa9d50a18261332deb59dc6f7c0421 (commit)
       via  02156d71070b21872d2dac3c5912c9a1ccd8d684 (commit)
       via  a46c4dd42c265a8bbbb25ea258f683571f82c0ca (commit)
       via  7d838bbf3337b2600fecb93322e6466b9a690a42 (commit)
       via  1cd7fd3867acabeb29196da4cf505a0eb703b287 (commit)
       via  ecd7fa019eb947129a574b45359907ec4044b985 (commit)
       via  b61f983768aab3d4debe493c10f3e5633eece3bd (commit)
       via  a39f70770e58e95a143d143a640870a6e06627f6 (commit)
       via  cee6438c3ab1ed1bf66fffeb4b1b34282ca7ec7f (commit)
       via  48b208a03d4ef74c47858beb0fe2a0ad84ee0428 (commit)
       via  a42a2a77fe6e8dfd9068506748f6c65609f57c71 (commit)
      from  f560e8aeb2a61cd4941f84e4fe119d27eefe6d8f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit db6c09cc6dfa9d50a18261332deb59dc6f7c0421
Merge: 7d838bb 02156d7
Author: radhika <radhika at curoverse.com>
Date:   Wed Jun 10 10:06:35 2015 -0400

    Merge branch '6203-collection-perf-api-TC' of git.curoverse.com:arvados into 6203-collection-perf-api-TC
    
    Conflicts:
    	services/api/app/models/collection.rb


commit 02156d71070b21872d2dac3c5912c9a1ccd8d684
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 10:10:26 2015 -0400

    6203: Accept (and discard) hints in client-provided portable_data_hash.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 9407186..bbced41 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -128,8 +128,8 @@ class Collection < ArvadosModel
       true
     elsif portable_data_hash.nil? or not portable_data_hash_changed?
       self.portable_data_hash = computed_pdh
-    elsif portable_data_hash !~ /^[0-9a-f]{32}(\+\d+)?$/
-      errors.add(:portable_data_hash, "is not a valid hash or hash+size")
+    elsif portable_data_hash !~ Keep::Locator::LOCATOR_REGEXP
+      errors.add(:portable_data_hash, "is not a valid locator")
       false
     elsif portable_data_hash[0..31] != computed_pdh[0..31]
       errors.add(:portable_data_hash,

commit a46c4dd42c265a8bbbb25ea258f683571f82c0ca
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 03:11:22 2015 -0400

    6203: Remove unused vars. Remove unnecessary newline manipulation.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 4b9d568..9407186 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -218,9 +218,8 @@ class Collection < ArvadosModel
     new_lines = []
     manifest.each_line do |line|
       line.rstrip!
-      words = line.split(' ')
       new_words = []
-      words.each do |word|
+      line.split(' ').each do |word|
         if match = Keep::Locator::LOCATOR_REGEXP.match(word)
           new_words << yield(match)
         else
@@ -229,17 +228,16 @@ class Collection < ArvadosModel
       end
       new_lines << new_words.join(' ')
     end
-
-    manifest = new_lines.join("\n") + "\n"
+    new_lines.join("\n") + "\n"
   end
 
   def self.each_manifest_locator manifest
     # Given a manifest text and a block, yield the regexp match object
     # for each locator.
     manifest.each_line do |line|
-      line.rstrip!
-      words = line.split(' ')
-      words.each do |word|
+      # line will have a trailing newline, but the last token is never
+      # a locator, so it's harmless here.
+      line.split(' ').each do |word|
         if match = Keep::Locator::LOCATOR_REGEXP.match(word)
           yield(match)
         end
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index ca19da6..42748ac 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -140,7 +140,8 @@ class CollectionTest < ActiveSupport::TestCase
    [true, pdhmd5],
    [true, pdhmd5+'+12345'],
    [true, pdhmd5+'+'+pdhmanifest.length.to_s],
-   [false, pdhmd5+'+Foo'],
+   [true, pdhmd5+'+12345+Foo'],
+   [true, pdhmd5+'+Foo'],
    [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
    [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
    [false, pdhmd5[0..30]],

commit 7d838bbf3337b2600fecb93322e6466b9a690a42
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 03:11:22 2015 -0400

    6203: Remove unused vars. Remove unnecessary newline manipulation.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 4b9d568..2b47968 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -218,9 +218,8 @@ class Collection < ArvadosModel
     new_lines = []
     manifest.each_line do |line|
       line.rstrip!
-      words = line.split(' ')
       new_words = []
-      words.each do |word|
+      line.split(' ').each do |word|
         if match = Keep::Locator::LOCATOR_REGEXP.match(word)
           new_words << yield(match)
         else
@@ -229,17 +228,16 @@ class Collection < ArvadosModel
       end
       new_lines << new_words.join(' ')
     end
-
-    manifest = new_lines.join("\n") + "\n"
+    new_lines.join("\n") + "\n"
   end
 
   def self.each_manifest_locator manifest
     # Given a manifest text and a block, yield the regexp match object
     # for each locator.
     manifest.each_line do |line|
-      line.rstrip!
-      words = line.split(' ')
-      words.each do |word|
+      # line will have a trailing newline, but the last token is never
+      # a locator, so it's harmless here.
+      line.each_line(' ') do |word|
         if match = Keep::Locator::LOCATOR_REGEXP.match(word)
           yield(match)
         end

commit 1cd7fd3867acabeb29196da4cf505a0eb703b287
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 02:23:57 2015 -0400

    6203: Use faster =~ instead of match.

diff --git a/services/api/app/models/blob.rb b/services/api/app/models/blob.rb
index 56cdfb8..34600d7 100644
--- a/services/api/app/models/blob.rb
+++ b/services/api/app/models/blob.rb
@@ -90,7 +90,7 @@ class Blob
     if !timestamp
       raise Blob::InvalidSignatureError.new 'No signature provided.'
     end
-    if !timestamp.match /^[\da-f]+$/
+    unless timestamp =~ /^[\da-f]+$/
       raise Blob::InvalidSignatureError.new 'Timestamp is not a base16 number.'
     end
     if timestamp.to_i(16) < (opts[:now] or db_current_time.to_i)

commit ecd7fa019eb947129a574b45359907ec4044b985
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 02:23:37 2015 -0400

    6203: Fix cheating test.

diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb
index 075b4c5..37da5fd 100644
--- a/services/api/test/unit/collection_performance_test.rb
+++ b/services/api/test/unit/collection_performance_test.rb
@@ -33,6 +33,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase
         c.check_signatures
       end
       time_block 'check signatures + save' do
+        c.instance_eval do @signatures_checked = false end
         c.save!
       end
       c = time_block 'read' do

commit b61f983768aab3d4debe493c10f3e5633eece3bd
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 02:23:20 2015 -0400

    6203: Use each_line instead of split.each.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 81da63d..4b9d568 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -61,7 +61,7 @@ class Collection < ArvadosModel
         api_token: api_token,
         now: db_current_time.to_i,
       }
-      self.manifest_text.lines.each do |entry|
+      self.manifest_text.each_line do |entry|
         entry.split[1..-1].each do |tok|
           if tok =~ FILE_TOKEN
             # This is a filename token, not a blob locator. Note that we

commit a39f70770e58e95a143d143a640870a6e06627f6
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 02:22:33 2015 -0400

    6203: Remove redundant split before each_line.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index c4740f9..81da63d 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -216,7 +216,6 @@ class Collection < ArvadosModel
     return '' if manifest == ''
 
     new_lines = []
-    lines = manifest.split("\n")
     manifest.each_line do |line|
       line.rstrip!
       words = line.split(' ')

commit cee6438c3ab1ed1bf66fffeb4b1b34282ca7ec7f
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 01:50:30 2015 -0400

    6203: Apply special case only to a 0-byte manifest: don't ignore white space.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 95154c7..c4740f9 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -213,7 +213,7 @@ class Collection < ArvadosModel
     # for each locator. Return a new manifest in which each locator
     # has been replaced by the block's return value.
     return nil if !manifest
-    return '' if !manifest.present?
+    return '' if manifest == ''
 
     new_lines = []
     lines = manifest.split("\n")

commit 48b208a03d4ef74c47858beb0fe2a0ad84ee0428
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 01:49:06 2015 -0400

    6203: Eliminate unneeded variable.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index ca97161..95154c7 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -331,14 +331,13 @@ class Collection < ArvadosModel
 
   protected
   def portable_manifest_text
-    portable_manifest = self.class.munge_manifest_locators(manifest_text) do |match|
+    self.class.munge_manifest_locators(manifest_text) do |match|
       if match[2] # size
         match[1] + match[2]
       else
         match[1]
       end
     end
-    portable_manifest
   end
 
   def compute_pdh

commit a42a2a77fe6e8dfd9068506748f6c65609f57c71
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jun 8 01:48:45 2015 -0400

    6203: Merge pdh validations into one method. Update comments. Add tests.

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 16e32d9..ca97161 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -11,7 +11,6 @@ class Collection < ArvadosModel
   before_validation :check_encoding
   before_validation :check_signatures
   before_validation :strip_signatures_and_update_replication_confirmed
-  before_validation :set_portable_data_hash
   validate :ensure_pdh_matches_manifest_text
   before_save :set_file_names
 
@@ -92,9 +91,6 @@ class Collection < ArvadosModel
 
   def strip_signatures_and_update_replication_confirmed
     if self.manifest_text_changed?
-      # If the new manifest_text contains locators whose hashes
-      # weren't in the old manifest_text, storage replication is no
-      # longer confirmed.
       in_old_manifest = {}
       if not self.replication_confirmed.nil?
         self.class.each_manifest_locator(manifest_text_was) do |match|
@@ -102,49 +98,47 @@ class Collection < ArvadosModel
         end
       end
 
-      # Remove any permission signatures from the manifest.
-      self[:manifest_text] = self.class.munge_manifest_locators(self[:manifest_text]) do |match|
+      stripped_manifest = self.class.munge_manifest_locators(manifest_text) do |match|
         if not self.replication_confirmed.nil? and not in_old_manifest[match[1]]
+          # If the new manifest_text contains locators whose hashes
+          # weren't in the old manifest_text, storage replication is no
+          # longer confirmed.
           self.replication_confirmed_at = nil
           self.replication_confirmed = nil
         end
+
+        # Return the locator with all permission signatures removed,
+        # but otherwise intact.
         match[0].gsub(/\+A[^+]*/, '')
       end
-    end
-    @computed_pdh_for_manifest_text = self[:manifest_text] if @computed_pdh_for_manifest_text
-    true
-  end
 
-  def set_portable_data_hash
-    if (portable_data_hash.nil? or
-        portable_data_hash == "" or
-        (manifest_text_changed? and !portable_data_hash_changed?))
-      self.portable_data_hash = computed_pdh
-    elsif portable_data_hash_changed?
-      begin
-        loc = Keep::Locator.parse!(self.portable_data_hash)
-        loc.strip_hints!
-        if loc.size
-          self.portable_data_hash = loc.to_s
-        else
-          self.portable_data_hash = "#{loc.hash}+#{portable_manifest_text.bytesize}"
-        end
-      rescue ArgumentError => e
-        errors.add(:portable_data_hash, "#{e}")
-        return false
+      if @computed_pdh_for_manifest_text == manifest_text
+        # If the cached PDH was valid before stripping, it is still
+        # valid after stripping.
+        @computed_pdh_for_manifest_text = stripped_manifest.dup
       end
+
+      self[:manifest_text] = stripped_manifest
     end
     true
   end
 
   def ensure_pdh_matches_manifest_text
-    return true unless manifest_text_changed? or portable_data_hash_changed?
-    # No need verify it if :set_portable_data_hash just computed it!
-    expect_pdh = computed_pdh
-    if expect_pdh != portable_data_hash
+    if not manifest_text_changed? and not portable_data_hash_changed?
+      true
+    elsif portable_data_hash.nil? or not portable_data_hash_changed?
+      self.portable_data_hash = computed_pdh
+    elsif portable_data_hash !~ /^[0-9a-f]{32}(\+\d+)?$/
+      errors.add(:portable_data_hash, "is not a valid hash or hash+size")
+      false
+    elsif portable_data_hash[0..31] != computed_pdh[0..31]
       errors.add(:portable_data_hash,
-                 "does not match computed hash #{expect_pdh}")
-      return false
+                 "does not match computed hash #{computed_pdh}")
+      false
+    else
+      # Ignore the client-provided size part: always store
+      # computed_pdh in the database.
+      self.portable_data_hash = computed_pdh
     end
   end
 
@@ -215,8 +209,9 @@ class Collection < ArvadosModel
   end
 
   def self.munge_manifest_locators manifest
-    # Given a manifest text and a block, yield each locator,
-    # and replace it with whatever the block returns.
+    # Given a manifest text and a block, yield the regexp MatchData
+    # for each locator. Return a new manifest in which each locator
+    # has been replaced by the block's return value.
     return nil if !manifest
     return '' if !manifest.present?
 
@@ -240,7 +235,8 @@ class Collection < ArvadosModel
   end
 
   def self.each_manifest_locator manifest
-    # Given a manifest text and a block, yield each locator.
+    # Given a manifest text and a block, yield the regexp match object
+    # for each locator.
     manifest.each_line do |line|
       line.rstrip!
       words = line.split(' ')
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index f479c6a..ca19da6 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -134,6 +134,25 @@ class CollectionTest < ActiveSupport::TestCase
     end
   end
 
+  pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
+  pdhmd5 = Digest::MD5.hexdigest pdhmanifest
+  [[true, nil],
+   [true, pdhmd5],
+   [true, pdhmd5+'+12345'],
+   [true, pdhmd5+'+'+pdhmanifest.length.to_s],
+   [false, pdhmd5+'+Foo'],
+   [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
+   [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
+   [false, pdhmd5[0..30]],
+   [false, pdhmd5[0..30]+'z'],
+   [false, pdhmd5[0..24]+'000000000'],
+   [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
+    test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
+      c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
+      assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
+    end
+  end
+
   [0, 2, 4, nil].each do |ask|
     test "set replication_desired to #{ask.inspect}" do
       Rails.configuration.default_collection_replication = 2

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list