[ARVADOS] created: d59fa34b9c12853c4a8d2b8d91844476ccc705c7
Git user
git at public.curoverse.com
Fri Apr 29 09:13:06 EDT 2016
at d59fa34b9c12853c4a8d2b8d91844476ccc705c7 (commit)
commit d59fa34b9c12853c4a8d2b8d91844476ccc705c7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Apr 29 09:11:15 2016 -0400
8998: Monkey patch URI.decode_www_form_component to validate efficiently.
Rack uses the standard library method URI.decode_www_form_component to process
parameters. This method first validates the string with a regular expression,
and then decodes it using another regular expression. The bug is in the
validation; the regular expression that is used generates many backtracking
points, which results in exponential memory growth when matching large strings.
The fix is to tweak the validation regex to use "posessive" matching (?>)
and (.*+) which eliminates backtracking. The optimized regex requires minimal
memory and is around 50% faster.
diff --git a/services/api/app/middlewares/arvados_api_token.rb b/services/api/app/middlewares/arvados_api_token.rb
index 57d3ad0..b5c9745 100644
--- a/services/api/app/middlewares/arvados_api_token.rb
+++ b/services/api/app/middlewares/arvados_api_token.rb
@@ -1,3 +1,20 @@
+
+module URI
+ # Rack uses the standard library method URI.decode_www_form_component to
+ # process parameters. This method first validates the string with a regular
+ # expression, and then decodes it using another regular expression. The bug
+ # is in the validation; the regular expression that is used generates many
+ # backtracking points, which results in exponential memory growth when
+ # matching large strings. The fix is to tweak the validation regex to use
+ # "posessive" matching (?>) and (.*+) which eliminates backtracking. The
+ # optimized regex requires minimal memory and is around 50% faster.
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ raise ArgumentError, "invalid %-encoding (#{str})" unless /\A[^%]*+(?>%\h\h[^%]*+)*\z/ =~ str
+ str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+ end
+end
+
+
# Perform api_token checking very early in the request process. We want to do
# this in the Rack stack instead of in ApplicationController because
# websockets needs access to authentication but doesn't use any of the rails
diff --git a/services/api/test/helpers/time_block.rb b/services/api/test/helpers/time_block.rb
index a3b03ff..c126b88 100644
--- a/services/api/test/helpers/time_block.rb
+++ b/services/api/test/helpers/time_block.rb
@@ -8,4 +8,16 @@ class ActiveSupport::TestCase
$stderr.puts "#{t1 - t0}s #{label}"
end
end
+
+ def vmpeak c
+ open("/proc/self/status").each_line do |line|
+ print "Begin #{c} #{line}" if (line =~ /^VmHWM:/)
+ end
+ n = yield
+ open("/proc/self/status").each_line do |line|
+ print "End #{c} #{line}" if (line =~ /^VmHWM:/)
+ end
+ n
+ end
+
end
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb
index 892060a..7f9f841 100644
--- a/services/api/test/integration/collections_performance_test.rb
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -37,4 +37,18 @@ class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
delete '/arvados/v1/collections/' + uuid, {}, auth(:active)
end
end
+
+ test "test memory usage" do
+ hugemanifest = make_manifest(streams: 1,
+ files_per_stream: 2000,
+ blocks_per_file: 200,
+ bytes_per_block: 2**26,
+ api_token: api_token(:active))
+ json = time_block "JSON encode #{hugemanifest.length>>20}MiB manifest" do
+ Oj.dump({manifest_text: hugemanifest})
+ end
+ vmpeak "post" do
+ post '/arvados/v1/collections', {collection: json}, auth(:active)
+ end
+ end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list