[ARVADOS] created: d59fa34b9c12853c4a8d2b8d91844476ccc705c7

Git user git at public.curoverse.com
Fri Apr 29 09:13:06 EDT 2016


        at  d59fa34b9c12853c4a8d2b8d91844476ccc705c7 (commit)


commit d59fa34b9c12853c4a8d2b8d91844476ccc705c7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Apr 29 09:11:15 2016 -0400

    8998: Monkey patch URI.decode_www_form_component to validate efficiently.
    
    Rack uses the standard library method URI.decode_www_form_component to process
    parameters.  This method first validates the string with a regular expression,
    and then decodes it using another regular expression.  The bug is in the
    validation; the regular expression that is used generates many backtracking
    points, which results in exponential memory growth when matching large strings.
    The fix is to tweak the validation regex to use "posessive" matching (?>)
    and (.*+) which eliminates backtracking.  The optimized regex requires minimal
    memory and is around 50% faster.

diff --git a/services/api/app/middlewares/arvados_api_token.rb b/services/api/app/middlewares/arvados_api_token.rb
index 57d3ad0..b5c9745 100644
--- a/services/api/app/middlewares/arvados_api_token.rb
+++ b/services/api/app/middlewares/arvados_api_token.rb
@@ -1,3 +1,20 @@
+
+module URI
+  # Rack uses the standard library method URI.decode_www_form_component to
+  # process parameters.  This method first validates the string with a regular
+  # expression, and then decodes it using another regular expression.  The bug
+  # is in the validation; the regular expression that is used generates many
+  # backtracking points, which results in exponential memory growth when
+  # matching large strings.  The fix is to tweak the validation regex to use
+  # "posessive" matching (?>) and (.*+) which eliminates backtracking.  The
+  # optimized regex requires minimal memory and is around 50% faster.
+  def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+    raise ArgumentError, "invalid %-encoding (#{str})" unless /\A[^%]*+(?>%\h\h[^%]*+)*\z/ =~ str
+    str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+  end
+end
+
+
 # Perform api_token checking very early in the request process.  We want to do
 # this in the Rack stack instead of in ApplicationController because
 # websockets needs access to authentication but doesn't use any of the rails
diff --git a/services/api/test/helpers/time_block.rb b/services/api/test/helpers/time_block.rb
index a3b03ff..c126b88 100644
--- a/services/api/test/helpers/time_block.rb
+++ b/services/api/test/helpers/time_block.rb
@@ -8,4 +8,16 @@ class ActiveSupport::TestCase
       $stderr.puts "#{t1 - t0}s #{label}"
     end
   end
+
+  def vmpeak c
+     open("/proc/self/status").each_line do |line|
+       print "Begin #{c} #{line}" if (line =~ /^VmHWM:/)
+     end
+     n = yield
+     open("/proc/self/status").each_line do |line|
+       print "End #{c} #{line}" if (line =~ /^VmHWM:/)
+     end
+     n
+  end
+
 end
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb
index 892060a..7f9f841 100644
--- a/services/api/test/integration/collections_performance_test.rb
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -37,4 +37,18 @@ class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
       delete '/arvados/v1/collections/' + uuid, {}, auth(:active)
     end
   end
+
+  test "test memory usage" do
+     hugemanifest = make_manifest(streams: 1,
+                                  files_per_stream: 2000,
+                                  blocks_per_file: 200,
+                                  bytes_per_block: 2**26,
+                                  api_token: api_token(:active))
+    json = time_block "JSON encode #{hugemanifest.length>>20}MiB manifest" do
+      Oj.dump({manifest_text: hugemanifest})
+    end
+     vmpeak "post" do
+       post '/arvados/v1/collections', {collection: json}, auth(:active)
+     end
+  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list