[arvados] created: 2.5.0-25-ga62b2bbb0

git repository hosting git at public.arvados.org
Thu Jan 19 20:43:25 UTC 2023


        at  a62b2bbb0cc62d8527d0e4fb1d78915cf2e6a4d5 (commit)


commit a62b2bbb0cc62d8527d0e4fb1d78915cf2e6a4d5
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu Jan 19 15:39:29 2023 -0500

    19917: Synthesize scheduling parameters when retrying a container
    
    When we retry a cancelled container, there may be any number of
    container requests that want it, each with their own scheduling
    parameters. Create a new set of scheduling parameters for the new
    container where each parameter has the most lax setting from all the
    outstanding container requests.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 21e8fcf50..42d0ed49b 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -718,6 +718,31 @@ class Container < ArvadosModel
           end
 
           if retryable_requests.any?
+            scheduling_parameters = {
+              # partitions: empty if any are empty, else the union of all parameters
+              "partitions": retryable_requests
+                              .map { |req| req.scheduling_parameters["partitions"] || [] }
+                              .reduce { |cur, new| (cur.empty? or new.empty?) ? [] : (cur | new) },
+
+              # preemptible: true if all are true, else false
+              "preemptible": retryable_requests
+                               .map { |req| req.scheduling_parameters["preemptible"] }
+                               .all?,
+
+              # max_run_time: 0 if any are 0 (unlimited), else the maximum
+              "max_run_time": retryable_requests
+                                .map { |req| req.scheduling_parameters["max_run_time"] || 0 }
+                                .reduce do |cur, new|
+                if cur == 0 or new == 0
+                  0
+                elsif new > cur
+                  new
+                else
+                  cur
+                end
+              end,
+            }
+
             c_attrs = {
               command: self.command,
               cwd: self.cwd,
@@ -726,7 +751,7 @@ class Container < ArvadosModel
               container_image: self.container_image,
               mounts: self.mounts,
               runtime_constraints: self.runtime_constraints,
-              scheduling_parameters: self.scheduling_parameters,
+              scheduling_parameters: scheduling_parameters,
               secret_mounts: prev_secret_mounts,
               runtime_token: prev_runtime_token,
               runtime_user_uuid: self.runtime_user_uuid,
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index f804aca2d..aafb643b4 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -1063,4 +1063,133 @@ class ContainerTest < ActiveSupport::TestCase
       assert_no_secrets_logged
     end
   end
+
+  def configure_preemptible_instance_type
+    Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({
+      "a1.small.pre" => {
+        "Preemptible" => true,
+        "Price" => 0.1,
+        "ProviderType" => "a1.small",
+        "VCPUs" => 1,
+        "RAM" => 1000000000,
+      },
+    })
+  end
+
+  def retry_with_scheduling_parameter(scheduling_key, schedule_values)
+    set_user_from_auth :admin
+    containers = {}
+    requests = []
+    schedule_values.each do |schedule_value|
+      scheduling_params = {}
+      if not schedule_value.nil?
+        scheduling_params[scheduling_key] = schedule_value
+      end
+      container, request = minimal_new(scheduling_parameters: scheduling_params)
+      containers[container.uuid] = container
+      requests << request
+    end
+    assert_equal(1, containers.length)
+    _, container1 = containers.shift
+    container1.lock
+    container1.update_attributes!(state: Container::Cancelled)
+    container1.reload
+    request1 = requests.shift
+    request1.reload
+    assert_not_equal(container1.uuid, request1.container_uuid)
+    requests.each do |request|
+      request.reload
+      assert_equal(request1.container_uuid, request.container_uuid)
+    end
+    container2 = Container.find_by_uuid(request1.container_uuid)
+    assert_not_nil(container2)
+    return container2
+  end
+
+  preemptible_values = [true, false, nil]
+  preemptible_values.permutation(1).chain(
+    preemptible_values.product(preemptible_values),
+    preemptible_values.product(preemptible_values, preemptible_values),
+  ).each do |preemptible_a|
+    test "retry requests scheduled with preemptible=#{preemptible_a}" do
+      configure_preemptible_instance_type
+      container = retry_with_scheduling_parameter("preemptible", preemptible_a)
+      assert_equal(preemptible_a.all?,
+                   container.scheduling_parameters["preemptible"] || false)
+    end
+  end
+
+  partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]]
+  partition_values.permutation(1).chain(
+    partition_values.permutation(2),
+  ).each do |partitions_a|
+    test "retry requests scheduled with partitions=#{partitions_a}" do
+      container = retry_with_scheduling_parameter("partitions", partitions_a)
+      expected = if partitions_a.any? { |value| value.nil? or value.empty? }
+                   []
+                 else
+                   partitions_a.flatten.uniq
+                 end
+      actual = container.scheduling_parameters["partitions"] || []
+      assert_equal(expected.sort, actual.sort)
+    end
+  end
+
+  runtime_values = [nil, 0, 1, 2, 3]
+  runtime_values.permutation(1).chain(
+    runtime_values.permutation(2),
+    runtime_values.permutation(3),
+  ).each do |max_run_time_a|
+    test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
+      container = retry_with_scheduling_parameter("max_run_time", max_run_time_a)
+      expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
+                   0
+                 else
+                   max_run_time_a.max
+                 end
+      actual = container.scheduling_parameters["max_run_time"] || 0
+      assert_equal(expected, actual)
+    end
+  end
+
+  test "retry requests with multi-varied scheduling parameters" do
+    configure_preemptible_instance_type
+    set_user_from_auth :admin
+    containers = {}
+    requests = []
+    [{
+       "partitions": ["alpha", "bravo"],
+       "preemptible": true,
+       "max_run_time": 10,
+     }, {
+       "partitions": ["alpha", "charlie"],
+       "max_run_time": 20,
+     }, {
+       "partitions": ["bravo", "charlie"],
+       "preemptible": false,
+       "max_run_time": 30,
+     }].each do |scheduling_params|
+      container, request = minimal_new(scheduling_parameters: scheduling_params)
+      containers[container.uuid] = container
+      requests << request
+    end
+    assert_equal(1, containers.length)
+    _, container1 = containers.shift
+    container1.lock
+    container1.update_attributes!(state: Container::Cancelled)
+    container1.reload
+    request1 = requests.shift
+    request1.reload
+    assert_not_equal(container1.uuid, request1.container_uuid)
+    requests.each do |request|
+      request.reload
+      assert_equal(request1.container_uuid, request.container_uuid)
+    end
+    container2 = Container.find_by_uuid(request1.container_uuid)
+    assert_not_nil(container2)
+    actual = container2.scheduling_parameters
+    assert_equal(["alpha", "bravo", "charlie"], actual["partitions"])
+    assert_equal(false, actual["preemptible"] || false)
+    assert_equal(30, actual["max_run_time"])
+  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list