[arvados] updated: 2.5.0-212-g4eb591b3d
git repository hosting
git at public.arvados.org
Fri Mar 3 15:48:56 UTC 2023
Summary of changes:
lib/config/config.default.yml | 25 +++++++++++++---------
lib/config/export.go | 1 -
lib/dispatchcloud/dispatcher.go | 6 +++++-
lib/dispatchcloud/scheduler/run_queue.go | 6 +-----
lib/dispatchcloud/scheduler/run_queue_test.go | 11 ++++++----
sdk/go/arvados/config.go | 2 +-
services/api/app/models/container.rb | 5 +++++
services/api/app/models/container_request.rb | 4 ++++
.../v1/container_requests_controller_test.rb | 19 ++++++++++++++--
9 files changed, 55 insertions(+), 24 deletions(-)
via 4eb591b3d541cc5ac035fb0eed7c39dc8e81dd02 (commit)
from 53a0cb93d935f02248f4e347ea0689fccf5e818e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 4eb591b3d541cc5ac035fb0eed7c39dc8e81dd02
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Mar 3 10:44:30 2023 -0500
20182: API server sets "supervisor" flag now
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 882ee62c3..527439571 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1055,16 +1055,6 @@ Clusters:
# Container runtime: "docker" (default) or "singularity"
RuntimeEngine: docker
- # Number of "supervisor" containers eligible to run at any given
- # time expressed as a fraction of CloudVMs.MaxInstances. A
- # supervisor is a container who's purpose is to submit other
- # containers, such as a container running arvados-cwl-runner.
- # If there is a hard limit on the amount of concurrent
- # containers that the cluster can run, it is important to avoid
- # crowding out the containers doing useful work with containers
- # who just create more work.
- SupervisorFraction: 0.3
-
# When running a container, run a dedicated keepstore process,
# using the specified number of 64 MiB memory buffers per
# allocated CPU core (VCPUs in the container's runtime
@@ -1339,6 +1329,21 @@ Clusters:
# down.
MaxInstances: 64
+ # Maximum fraction of CloudVMs.MaxInstances allowed to run
+ # "supervisor" containers at any given time. A supervisor is a
+ # container whose purpose is mainly to submit and manage other
+ # containers, such as arvados-cwl-runner workflow runner.
+ #
+ # If there is a hard limit on the amount of concurrent
+ # containers that the cluster can run, it is important to
+ # avoid crowding out the containers doing useful work with
+ # containers who just create more work.
+ #
+ # For example, with the default MaxInstances of 64, it will
+ # schedule at most floor(64*0.30) = 19 concurrent workflows,
+ # ensuring 45 slots are available for work.
+ SupervisorFraction: 0.30
+
# Interval between cloud provider syncs/updates ("list all
# instances").
SyncInterval: 1m
diff --git a/lib/config/export.go b/lib/config/export.go
index fc688d68d..a8a535eeb 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -142,7 +142,6 @@ var whitelist = map[string]bool{
"Containers.ShellAccess.User": true,
"Containers.SLURM": false,
"Containers.StaleLockTimeout": false,
- "Containers.SupervisorFraction": false,
"Containers.SupportedDockerImageFormats": true,
"Containers.SupportedDockerImageFormats.*": true,
"Git": false,
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 1f8272afd..06a558d5f 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -193,7 +193,11 @@ func (disp *dispatcher) run() {
if pollInterval <= 0 {
pollInterval = defaultPollInterval
}
- sched := scheduler.New(disp.Context, disp.ArvClient, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval, int(float64(disp.Cluster.Containers.CloudVMs.MaxInstances)*disp.Cluster.Containers.SupervisorFraction))
+ maxSupervisors := int(float64(disp.Cluster.Containers.CloudVMs.MaxInstances) * disp.Cluster.Containers.CloudVMs.SupervisorFraction)
+ if maxSupervisors == 0 && disp.Cluster.Containers.CloudVMs.SupervisorFraction > 0 {
+ maxSupervisors = 1
+ }
+ sched := scheduler.New(disp.Context, disp.ArvClient, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval, maxSupervisors)
sched.Start()
defer sched.Stop()
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index 65ca904be..b8158579a 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -15,10 +15,6 @@ import (
var quietAfter503 = time.Minute
-func isSupervisor(ctr arvados.Container) bool {
- return (len(ctr.Command) > 0 && ctr.Command[0] == "arvados-cwl-runner") || ctr.SchedulingParameters.Supervisor
-}
-
func (sch *Scheduler) runQueue() {
unsorted, _ := sch.queue.Entries()
sorted := make([]container.QueueEnt, 0, len(unsorted))
@@ -97,7 +93,7 @@ tryrun:
"ContainerUUID": ctr.UUID,
"InstanceType": it.Name,
})
- if isSupervisor(ctr) {
+ if ctr.SchedulingParameters.Supervisor {
supervisors += 1
if sch.maxSupervisors > 0 && supervisors > sch.maxSupervisors {
continue
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index db161b7d7..3278c7de6 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -549,8 +549,7 @@ func (*SchedulerSuite) TestContainersMetrics(c *check.C) {
c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 0)
}
-// Assign priority=4 container to idle node. Create new instances for
-// the priority=3 and 1 containers. Ignore the supervisor at priority 2.
+// Assign priority=4, 3 and 1 containers to idle nodes. Ignore the supervisor at priority 2.
func (*SchedulerSuite) TestSkipSupervisors(c *check.C) {
ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
queue := test.Queue{
@@ -573,7 +572,9 @@ func (*SchedulerSuite) TestSkipSupervisors(c *check.C) {
VCPUs: 1,
RAM: 1 << 30,
},
- Command: []string{"arvados-cwl-runner"},
+ SchedulingParameters: arvados.SchedulingParameters{
+ Supervisor: true,
+ },
},
{
UUID: test.ContainerUUID(3),
@@ -595,7 +596,9 @@ func (*SchedulerSuite) TestSkipSupervisors(c *check.C) {
VCPUs: 1,
RAM: 1 << 30,
},
- Command: []string{"arvados-cwl-runner"},
+ SchedulingParameters: arvados.SchedulingParameters{
+ Supervisor: true,
+ },
},
},
}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index d167addd3..bf70a7603 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -506,7 +506,6 @@ type ContainersConfig struct {
SupportedDockerImageFormats StringSet
AlwaysUsePreemptibleInstances bool
PreemptiblePriceFactor float64
- SupervisorFraction float64
RuntimeEngine string
LocalKeepBlobBuffersPerVCPU int
LocalKeepLogsToContainerLog string
@@ -563,6 +562,7 @@ type CloudVMsConfig struct {
MaxProbesPerSecond int
MaxConcurrentInstanceCreateOps int
MaxInstances int
+ SupervisorFraction float64
PollInterval Duration
ProbeInterval Duration
SSHPort string
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 42d0ed49b..fa6b6f296 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -729,6 +729,11 @@ class Container < ArvadosModel
.map { |req| req.scheduling_parameters["preemptible"] }
.all?,
+ # supervisor: true if all any true, else false
+ "supervisor": retryable_requests
+ .map { |req| req.scheduling_parameters["supervisor"] }
+ .any?,
+
# max_run_time: 0 if any are 0 (unlimited), else the maximum
"max_run_time": retryable_requests
.map { |req| req.scheduling_parameters["max_run_time"] || 0 }
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 47e2769a8..e4da8399f 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -320,6 +320,10 @@ class ContainerRequest < ArvadosModel
return false
end
if state_changed? and state == Committed and container_uuid.nil?
+ if self.command.length > 0 and self.command[0] == "arvados-cwl-runner"
+ # Special case, arvados-cwl-runner processes are always considered "supervisors"
+ self.scheduling_parameters['supervisor'] = true
+ end
while true
c = Container.resolve(self)
c.lock!
diff --git a/services/api/test/functional/arvados/v1/container_requests_controller_test.rb b/services/api/test/functional/arvados/v1/container_requests_controller_test.rb
index e99af39c9..f287a11fa 100644
--- a/services/api/test/functional/arvados/v1/container_requests_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/container_requests_controller_test.rb
@@ -8,8 +8,9 @@ class Arvados::V1::ContainerRequestsControllerTest < ActionController::TestCase
def minimal_cr
{
command: ['echo', 'hello'],
- container_image: 'test',
+ container_image: 'arvados/apitestfixture:latest',
output_path: 'test',
+ runtime_constraints: {vcpus: 1, ram: 1}
}
end
@@ -18,7 +19,7 @@ class Arvados::V1::ContainerRequestsControllerTest < ActionController::TestCase
sp = {'partitions' => ['test1', 'test2']}
post :create, params: {
- container_request: minimal_cr.merge(scheduling_parameters: sp.dup)
+ container_request: minimal_cr.merge(scheduling_parameters: sp.dup, state: "Committed")
}
assert_response :success
@@ -26,6 +27,20 @@ class Arvados::V1::ContainerRequestsControllerTest < ActionController::TestCase
assert_not_nil cr, 'Expected container request'
assert_equal sp['partitions'], cr['scheduling_parameters']['partitions']
assert_equal false, cr['scheduling_parameters']['preemptible']
+ assert_equal false, cr['scheduling_parameters']['supervisor']
+ end
+
+ test 'create a-c-r should be supervisor' do
+ authorize_with :active
+
+ post :create, params: {
+ container_request: minimal_cr.merge(command: ["arvados-cwl-runner", "my-workflow.cwl"], state: "Committed")
+ }
+ assert_response :success
+
+ cr = JSON.parse(@response.body)
+ assert_not_nil cr, 'Expected container request'
+ assert_equal true, cr['scheduling_parameters']['supervisor']
end
test "secret_mounts not in #create responses" do
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list