[arvados] created: 2.5.0-25-g1daba259f
git repository hosting
git at public.arvados.org
Thu Jan 19 21:13:29 UTC 2023
at 1daba259fa9aec331819a4f29500e6dfa7a58d6a (commit)
commit 1daba259fa9aec331819a4f29500e6dfa7a58d6a
Author: Brett Smith <brett.smith at curii.com>
Date: Thu Jan 19 15:39:29 2023 -0500
19917: Synthesize scheduling parameters when retrying a container
When we retry a cancelled container, there may be any number of
container requests that want it, each with their own scheduling
parameters. Create a new set of scheduling parameters for the new
container where each parameter has the most lax setting from all the
outstanding container requests.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 21e8fcf50..42d0ed49b 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -718,6 +718,31 @@ class Container < ArvadosModel
end
if retryable_requests.any?
+ scheduling_parameters = {
+ # partitions: empty if any are empty, else the union of all parameters
+ "partitions": retryable_requests
+ .map { |req| req.scheduling_parameters["partitions"] || [] }
+ .reduce { |cur, new| (cur.empty? or new.empty?) ? [] : (cur | new) },
+
+ # preemptible: true if all are true, else false
+ "preemptible": retryable_requests
+ .map { |req| req.scheduling_parameters["preemptible"] }
+ .all?,
+
+ # max_run_time: 0 if any are 0 (unlimited), else the maximum
+ "max_run_time": retryable_requests
+ .map { |req| req.scheduling_parameters["max_run_time"] || 0 }
+ .reduce do |cur, new|
+ if cur == 0 or new == 0
+ 0
+ elsif new > cur
+ new
+ else
+ cur
+ end
+ end,
+ }
+
c_attrs = {
command: self.command,
cwd: self.cwd,
@@ -726,7 +751,7 @@ class Container < ArvadosModel
container_image: self.container_image,
mounts: self.mounts,
runtime_constraints: self.runtime_constraints,
- scheduling_parameters: self.scheduling_parameters,
+ scheduling_parameters: scheduling_parameters,
secret_mounts: prev_secret_mounts,
runtime_token: prev_runtime_token,
runtime_user_uuid: self.runtime_user_uuid,
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index f804aca2d..396fba6ae 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -1063,4 +1063,133 @@ class ContainerTest < ActiveSupport::TestCase
assert_no_secrets_logged
end
end
+
+ def configure_preemptible_instance_type
+ Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({
+ "a1.small.pre" => {
+ "Preemptible" => true,
+ "Price" => 0.1,
+ "ProviderType" => "a1.small",
+ "VCPUs" => 1,
+ "RAM" => 1000000000,
+ },
+ })
+ end
+
+ def retry_with_scheduling_parameter(scheduling_key, schedule_values)
+ set_user_from_auth :admin
+ containers = {}
+ requests = []
+ schedule_values.each do |schedule_value|
+ scheduling_params = {}
+ if not schedule_value.nil?
+ scheduling_params[scheduling_key] = schedule_value
+ end
+ container, request = minimal_new(scheduling_parameters: scheduling_params)
+ containers[container.uuid] = container
+ requests << request
+ end
+ assert_equal(1, containers.length)
+ _, container1 = containers.shift
+ container1.lock
+ container1.update_attributes!(state: Container::Cancelled)
+ container1.reload
+ request1 = requests.shift
+ request1.reload
+ assert_not_equal(container1.uuid, request1.container_uuid)
+ requests.each do |request|
+ request.reload
+ assert_equal(request1.container_uuid, request.container_uuid)
+ end
+ container2 = Container.find_by_uuid(request1.container_uuid)
+ assert_not_nil(container2)
+ return container2
+ end
+
+ preemptible_values = [true, false, nil]
+ preemptible_values.permutation(1).chain(
+ preemptible_values.product(preemptible_values),
+ preemptible_values.product(preemptible_values, preemptible_values),
+ ).each do |preemptible_a|
+ test "retry requests scheduled with preemptible=#{preemptible_a}" do
+ configure_preemptible_instance_type
+ container = retry_with_scheduling_parameter("preemptible", preemptible_a)
+ assert_equal(preemptible_a.all?,
+ container.scheduling_parameters["preemptible"] || false)
+ end
+ end
+
+ partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]]
+ partition_values.permutation(1).chain(
+ partition_values.permutation(2),
+ ).each do |partitions_a|
+ test "retry requests scheduled with partitions=#{partitions_a}" do
+ container = retry_with_scheduling_parameter("partitions", partitions_a)
+ expected = if partitions_a.any? { |value| value.nil? or value.empty? }
+ []
+ else
+ partitions_a.flatten.uniq
+ end
+ actual = container.scheduling_parameters["partitions"] || []
+ assert_equal(expected.sort, actual.sort)
+ end
+ end
+
+ runtime_values = [nil, 0, 1, 2, 3]
+ runtime_values.permutation(1).chain(
+ runtime_values.permutation(2),
+ runtime_values.permutation(3),
+ ).each do |max_run_time_a|
+ test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
+ container = retry_with_scheduling_parameter("max_run_time", max_run_time_a)
+ expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
+ 0
+ else
+ max_run_time_a.max
+ end
+ actual = container.scheduling_parameters["max_run_time"] || 0
+ assert_equal(expected, actual)
+ end
+ end
+
+ test "retry requests with multi-varied scheduling parameters" do
+ configure_preemptible_instance_type
+ set_user_from_auth :admin
+ containers = {}
+ requests = []
+ [{
+ "partitions": ["alpha", "bravo"],
+ "preemptible": true,
+ "max_run_time": 10,
+ }, {
+ "partitions": ["alpha", "charlie"],
+ "max_run_time": 20,
+ }, {
+ "partitions": ["bravo", "charlie"],
+ "preemptible": false,
+ "max_run_time": 30,
+ }].each do |scheduling_params|
+ container, request = minimal_new(scheduling_parameters: scheduling_params)
+ containers[container.uuid] = container
+ requests << request
+ end
+ assert_equal(1, containers.length)
+ _, container1 = containers.shift
+ container1.lock
+ container1.update_attributes!(state: Container::Cancelled)
+ container1.reload
+ request1 = requests.shift
+ request1.reload
+ assert_not_equal(container1.uuid, request1.container_uuid)
+ requests.each do |request|
+ request.reload
+ assert_equal(request1.container_uuid, request.container_uuid)
+ end
+ container2 = Container.find_by_uuid(request1.container_uuid)
+ assert_not_nil(container2)
+ actual = container2.scheduling_parameters
+ assert_equal(["alpha", "bravo", "charlie"], actual["partitions"]&.sort)
+ assert_equal(false, actual["preemptible"] || false)
+ assert_equal(30, actual["max_run_time"])
+ end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list