[arvados] created: 2.5.0-229-ge1c7395cf
git repository hosting
git at public.arvados.org
Sun Mar 5 02:40:31 UTC 2023
at e1c7395cf6e649876132030c2011434581d3a66a (commit)
commit e1c7395cf6e649876132030c2011434581d3a66a
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Mar 4 21:31:06 2023 -0500
19981: Reuse containers with various keep_cache constraints
See the comments for rationale and discussion.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index fa6b6f296..e6643d4c7 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -228,10 +228,7 @@ class Container < ArvadosModel
rc['keep_cache_ram'] = Rails.configuration.Containers.DefaultKeepCacheRAM
end
if rc['keep_cache_disk'] == 0 and rc['keep_cache_ram'] == 0
- # If neither ram nor disk cache was specified and
- # DefaultKeepCacheRAM==0, default to disk cache with size equal
- # to RAM constraint (but at least 2 GiB and at most 32 GiB).
- rc['keep_cache_disk'] = [[rc['ram'] || 0, 2 << 30].max, 32 << 30].min
+ rc['keep_cache_disk'] = bound_keep_cache_disk(rc['ram'])
end
rc
end
@@ -299,30 +296,55 @@ class Container < ArvadosModel
candidates = candidates.where('secret_mounts_md5 = ?', secret_mounts_md5)
log_reuse_info(candidates) { "after filtering on secret_mounts_md5 #{secret_mounts_md5.inspect}" }
- if attrs[:runtime_constraints]['cuda'].nil?
- attrs[:runtime_constraints]['cuda'] = {
- 'device_count' => 0,
- 'driver_version' => '',
- 'hardware_capability' => '',
- }
- end
- resolved_runtime_constraints = [resolve_runtime_constraints(attrs[:runtime_constraints])]
- if resolved_runtime_constraints[0]['cuda']['device_count'] == 0
- # If no CUDA requested, extend search to include older container
- # records that don't have a 'cuda' section in runtime_constraints
- resolved_runtime_constraints << resolved_runtime_constraints[0].except('cuda')
- end
- if resolved_runtime_constraints[0]['keep_cache_disk'] == 0
- # If no disk cache requested, extend search to include older container
- # records that don't have a 'keep_cache_disk' field in runtime_constraints
- if resolved_runtime_constraints.length == 2
- # exclude the one that also excludes CUDA
- resolved_runtime_constraints << resolved_runtime_constraints[1].except('keep_cache_disk')
- end
- resolved_runtime_constraints << resolved_runtime_constraints[0].except('keep_cache_disk')
- end
-
- candidates = candidates.where_serialized(:runtime_constraints, resolved_runtime_constraints, md5: true, multivalue: true)
+ resolved_runtime_constraints = resolve_runtime_constraints(attrs[:runtime_constraints])
+ # Ideally we would completely ignore Keep cache constraints when making
+ # reuse considerations, but our database structure makes that impractical.
+ # The best we can do is generate a search that matches on all likely values.
+ runtime_constraint_variations = {
+ keep_cache_disk: [
+ # Check for constraints without keep_cache_disk
+ # (containers that predate the constraint)
+ nil,
+ # Containers that use keep_cache_ram instead
+ 0,
+ # The default value
+ bound_keep_cache_disk(resolved_runtime_constraints['ram']),
+ # The minimum default bound
+ bound_keep_cache_disk(0),
+ # The maximum default bound (presumably)
+ bound_keep_cache_disk(1 << 60),
+ # The requested value
+ resolved_runtime_constraints.delete('keep_cache_disk'),
+ ].uniq,
+ keep_cache_ram: [
+ # Containers that use keep_cache_disk instead
+ 0,
+ # The default value
+ Rails.configuration.Containers.DefaultKeepCacheRAM,
+ # The requested value
+ resolved_runtime_constraints.delete('keep_cache_ram'),
+ ].uniq,
+ }
+ resolved_cuda = resolved_runtime_constraints['cuda']
+ if resolved_cuda.nil? or resolved_cuda['device_count'] == 0
+ runtime_constraint_variations[:cuda] = [
+ # Check for constraints without cuda
+ # (containers that predate the constraint)
+ nil,
+ # The default "don't need CUDA" value
+ {
+ 'device_count' => 0,
+ 'driver_version' => '',
+ 'hardware_capability' => '',
+ },
+ # The requested value
+ resolved_runtime_constraints.delete('cuda')
+ ].uniq
+ end
+ reusable_runtime_constraints = hash_product(runtime_constraint_variations)
+ .map { |v| resolved_runtime_constraints.merge(v) }
+
+ candidates = candidates.where_serialized(:runtime_constraints, reusable_runtime_constraints, md5: true, multivalue: true)
log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
log_reuse_info { "checking for state=Complete with readable output and log..." }
@@ -447,6 +469,31 @@ class Container < ArvadosModel
protected
+ def self.bound_keep_cache_disk(value)
+ value ||= 0
+ min_value = 2 << 30
+ max_value = 32 << 30
+ if value < min_value
+ min_value
+ elsif value > max_value
+ max_value
+ else
+ value
+ end
+ end
+
+ def self.hash_product(**kwargs)
+ # kwargs is a hash that maps parameters to an array of values.
+ # This function enumerates every possible hash where each key has one of
+ # the values from its array.
+ # The output keys are strings since that's what container hash attributes
+ # want.
+ # A nil value yields a hash without that key.
+ [[:_, nil]].product(
+ *kwargs.map { |(key, values)| [key.to_s].product(values) },
+ ).map { |param_pairs| Hash[param_pairs].compact }
+ end
+
def fill_field_defaults
self.state ||= Queued
self.environment ||= {}
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index 5da89c7ea..286aa32ae 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -40,6 +40,25 @@ class ContainerTest < ActiveSupport::TestCase
runtime_auth_scopes: ["all"]
}
+ REUSABLE_ATTRS_SLIM = {
+ command: ["echo", "slim"],
+ container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
+ cwd: "test",
+ environment: {},
+ mounts: {},
+ output_path: "test",
+ runtime_auth_scopes: ["all"],
+ runtime_constraints: {
+ "API" => false,
+ "keep_cache_disk" => 0,
+ "keep_cache_ram" => 0,
+ "ram" => 8 << 30,
+ "vcpus" => 4
+ },
+ runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz",
+ secret_mounts: {},
+ }
+
def request_only attrs
attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k}
end
@@ -1212,4 +1231,111 @@ class ContainerTest < ActiveSupport::TestCase
assert_equal(false, actual["preemptible"] || false)
assert_equal(0, actual["max_run_time"] || 0)
end
+
+ def run_container(request_params, final_attrs)
+ final_attrs[:state] ||= Container::Complete
+ if final_attrs[:state] == Container::Complete
+ final_attrs[:exit_code] ||= 0
+ final_attrs[:log] ||= collections(:log_collection).portable_data_hash
+ final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash
+ end
+ container, request = minimal_new(request_params)
+ container.lock
+ container.update_attributes!(state: Container::Running)
+ container.update_attributes!(final_attrs)
+ return container, request
+ end
+
+ def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations)
+ container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value)
+ orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM
+ begin
+ Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram
+ set_user_from_auth :admin
+ expected, _ = run_container(container_params, {})
+ variations.each do |variation|
+ full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation)
+ parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation)
+ actual = Container.find_reusable(parameters)
+ assert_equal(expected.uuid, actual&.uuid,
+ "request with #{vary_attr}=#{variation} did not reuse container")
+ end
+ ensure
+ Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default
+ end
+ end
+
+ # Test that we can reuse a container with a known keep_cache_ram constraint,
+ # no matter what keep_cache_* constraints the new request uses.
+ [0, 2 << 30, 4 << 30].product(
+ [0, 1],
+ [true, false],
+ ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)|
+ test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ "keep_cache_ram" => default_keep_cache_ram * multiplier,
+ )
+ if not keep_disk_constraint
+ # Simulate a container that predates keep_cache_disk by deleting
+ # the constraint entirely.
+ runtime_constraints.delete("keep_cache_disk")
+ end
+ # Important values are:
+ # * 0
+ # * 2GiB, the minimum default keep_cache_disk
+ # * 8GiB, the default keep_cache_disk based on container ram
+ # * 32GiB, the maximum default keep_cache_disk
+ # Check these values and values in between.
+ vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+ variations = vary_parameters(keep_cache_ram: vary_values)
+ .chain(vary_parameters(keep_cache_disk: vary_values))
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ variations,
+ )
+ end
+ end
+
+ # Test that we can reuse a container with a known keep_cache_disk constraint,
+ # no matter what keep_cache_* constraints the new request uses.
+ # keep_cache_disk values are the important values discussed in the test above.
+ [0, 2 << 30, 4 << 30]
+ .product([0, 2 << 30, 8 << 30, 32 << 30])
+ .each do |(default_keep_cache_ram, keep_cache_disk)|
+ test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ "keep_cache_disk" => keep_cache_disk,
+ )
+ vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+ variations = vary_parameters(keep_cache_ram: vary_values)
+ .chain(vary_parameters(keep_cache_disk: vary_values))
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ variations,
+ )
+ end
+ end
+
+ # Test that a container request can reuse a container with an exactly
+ # matching keep_cache_* constraint, no matter what the defaults.
+ [0, 2 << 30, 4 << 30].product(
+ ["keep_cache_disk", "keep_cache_ram"],
+ [135790, 13 << 30, 135 << 30],
+ ).each do |(default_keep_cache_ram, constraint_key, constraint_value)|
+ test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ constraint_key => constraint_value,
+ )
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ [runtime_constraints],
+ )
+ end
+ end
end
commit 901f7a4f3723b982fd45562f74fb92c7900e0d47
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Mar 4 10:54:21 2023 -0500
19981: Generalize vary_parameters test method
This is a pure reorganization commit intended to make this method
usable for tests that vary runtime constraints as well.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index 289f6d7a6..5da89c7ea 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -1076,10 +1076,13 @@ class ContainerTest < ActiveSupport::TestCase
})
end
- def vary_scheduling_parameters(**kwargs)
- # kwargs is a hash that maps scheduling parameters to an array of values.
- # This function enumerates every possible combination of
- # scheduling parameters from those keys and their associated values.
+ def vary_parameters(**kwargs)
+ # kwargs is a hash that maps parameters to an array of values.
+ # This function enumerates every possible hash where each key has one of
+ # the values from its array.
+ # The output keys are strings since that's what container hash attributes
+ # want.
+ # A nil value yields a hash without that key.
[[:_, nil]].product(
*kwargs.map { |(key, values)| [key.to_s].product(values) },
).map { |param_pairs| Hash[param_pairs].compact }
@@ -1119,7 +1122,7 @@ class ContainerTest < ActiveSupport::TestCase
).each do |preemptible_a|
test "retry requests scheduled with preemptible=#{preemptible_a}" do
configure_preemptible_instance_type
- param_hashes = vary_scheduling_parameters(preemptible: preemptible_a)
+ param_hashes = vary_parameters(preemptible: preemptible_a)
container = retry_with_scheduling_parameters(param_hashes)
assert_equal(preemptible_a.all?,
container.scheduling_parameters["preemptible"] || false)
@@ -1131,7 +1134,7 @@ class ContainerTest < ActiveSupport::TestCase
partition_values.permutation(2),
).each do |partitions_a|
test "retry requests scheduled with partitions=#{partitions_a}" do
- param_hashes = vary_scheduling_parameters(partitions: partitions_a)
+ param_hashes = vary_parameters(partitions: partitions_a)
container = retry_with_scheduling_parameters(param_hashes)
expected = if partitions_a.any? { |value| value.nil? or value.empty? }
[]
@@ -1149,7 +1152,7 @@ class ContainerTest < ActiveSupport::TestCase
runtime_values.permutation(3),
).each do |max_run_time_a|
test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
- param_hashes = vary_scheduling_parameters(max_run_time: max_run_time_a)
+ param_hashes = vary_parameters(max_run_time: max_run_time_a)
container = retry_with_scheduling_parameters(param_hashes)
expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
0
@@ -1184,7 +1187,7 @@ class ContainerTest < ActiveSupport::TestCase
test "retry requests with unset scheduling parameters" do
configure_preemptible_instance_type
- param_hashes = vary_scheduling_parameters(
+ param_hashes = vary_parameters(
preemptible: [nil, true],
partitions: [nil, ["alpha"]],
max_run_time: [nil, 5],
@@ -1198,7 +1201,7 @@ class ContainerTest < ActiveSupport::TestCase
test "retry requests with default scheduling parameters" do
configure_preemptible_instance_type
- param_hashes = vary_scheduling_parameters(
+ param_hashes = vary_parameters(
preemptible: [false, true],
partitions: [[], ["bravo"]],
max_run_time: [0, 1],
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list