[arvados] updated: 2.7.0-6106-g18795339bd
git repository hosting
git at public.arvados.org
Tue Mar 12 22:30:46 UTC 2024
Summary of changes:
doc/api/dispatch.html.textile.liquid | 2 +-
.../methods/container_requests.html.textile.liquid | 2 +-
lib/dispatchcloud/dispatcher.go | 36 +++++++++++-----------
lib/dispatchcloud/dispatcher_test.go | 2 +-
lib/dispatchcloud/scheduler/run_queue.go | 31 +++++++++++++------
.../arvados/v1/container_requests_controller.rb | 4 +++
6 files changed, 46 insertions(+), 31 deletions(-)
via 18795339bddd7aede141f142f90c9e9e05ec597a (commit)
via 57bece692f5e5d9f66ca4dfc77035867b1b32289 (commit)
via c91f68273656821bfa4a2315866794b5ede88b00 (commit)
via 52059aab9c65a9cbdff0b8d379ab8b0a25f62880 (commit)
from 35d4eea994c98b4bb65111c9da6e57abbc7e014f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 18795339bddd7aede141f142f90c9e9e05ec597a
Author: Tom Clegg <tom at curii.com>
Date: Tue Mar 12 18:30:25 2024 -0400
21123: Add container_status to discovery doc.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/api/app/controllers/arvados/v1/container_requests_controller.rb b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
index 6b6e96a1f7..8800f58f49 100644
--- a/services/api/app/controllers/arvados/v1/container_requests_controller.rb
+++ b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
@@ -31,6 +31,10 @@ class Arvados::V1::ContainerRequestsController < ApplicationController
})
end
+ def container_status
+ send_json({"errors" => "controller-only API, not handled by rails"}, status: 400)
+ end
+
def update
if (resource_attrs.keys.map(&:to_sym) - [:owner_uuid, :name, :description, :properties]).empty? or @object.container_uuid.nil?
# If no attributes are being updated besides these, there are no
commit 57bece692f5e5d9f66ca4dfc77035867b1b32289
Author: Tom Clegg <tom at curii.com>
Date: Tue Mar 12 17:49:32 2024 -0400
21123: Define scheduling status values as consts.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index 2f4bce8987..d270972295 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -24,6 +24,17 @@ type QueueEnt struct {
SchedulingStatus string `json:"scheduling_status"`
}
+const (
+ schedStatusPreparingRuntimeEnvironment = "preparing runtime environment"
+ schedStatusPriorityZero = "not scheduling: priority 0" // ", state X" appended at runtime
+ schedStatusContainerLimitReached = "not starting: supervisor container limit has been reached"
+ schedStatusWaitingForPreviousAttempt = "waiting for previous attempt to exit"
+ schedStatusWaitingNewInstance = "waiting for new instance to be ready"
+ schedStatusWaitingInstanceType = "waiting for suitable instance type to become available" // ": queue position X" appended at runtime
+ schedStatusWaitingCloudResources = "waiting for cloud resources"
+ schedStatusWaitingClusterCapacity = "waiting while cluster is running at capacity" // ": queue position X" appended at runtime
+)
+
// Queue returns the sorted queue from the last scheduling iteration.
func (sch *Scheduler) Queue() []QueueEnt {
ents, _ := sch.lastQueue.Load().([]QueueEnt)
@@ -188,17 +199,17 @@ tryrun:
}
if _, running := running[ctr.UUID]; running {
if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
- sorted[i].SchedulingStatus = "preparing runtime environment"
+ sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
}
continue
}
if ctr.Priority < 1 {
- sorted[i].SchedulingStatus = "not scheduling: priority 0, state " + string(ctr.State)
+ sorted[i].SchedulingStatus = schedStatusPriorityZero + ", state " + string(ctr.State)
continue
}
if ctr.SchedulingParameters.Supervisor && maxSupervisors > 0 && supervisors > maxSupervisors {
overmaxsuper = append(overmaxsuper, sorted[i])
- sorted[i].SchedulingStatus = "not starting: supervisor container limit has been reached"
+ sorted[i].SchedulingStatus = schedStatusContainerLimitReached
continue
}
// If we have unalloc instances of any of the eligible
@@ -270,13 +281,13 @@ tryrun:
// same instance type. Don't let this
// one sneak in ahead of it.
} else if sch.pool.KillContainer(ctr.UUID, "about to start") {
- sorted[i].SchedulingStatus = "waiting for previous attempt to exit"
+ sorted[i].SchedulingStatus = schedStatusWaitingForPreviousAttempt
logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
} else if sch.pool.StartContainer(unallocType, ctr) {
- sorted[i].SchedulingStatus = "preparing runtime environment"
+ sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
logger.Trace("StartContainer => true")
} else {
- sorted[i].SchedulingStatus = "waiting for new instance to be ready"
+ sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
logger.Trace("StartContainer => false")
containerAllocatedWorkerBootingCount += 1
dontstart[unallocType] = true
@@ -307,7 +318,7 @@ tryrun:
// runQueue(), rather than run
// container B now.
qpos++
- sorted[i].SchedulingStatus = fmt.Sprintf("waiting for suitable instance type to become available: queue position %d", qpos)
+ sorted[i].SchedulingStatus = schedStatusWaitingInstanceType + fmt.Sprintf(": queue position %d", qpos)
logger.Trace("all eligible types at capacity")
continue
}
@@ -322,7 +333,7 @@ tryrun:
// asynchronously and does its own logging
// about the eventual outcome, so we don't
// need to.)
- sorted[i].SchedulingStatus = "waiting for new instance to be ready"
+ sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
logger.Info("creating new instance")
// Don't bother trying to start the container
// yet -- obviously the instance will take
@@ -337,9 +348,9 @@ tryrun:
var qreason string
if sch.pool.AtQuota() {
- qreason = "waiting for cloud resources"
+ qreason = schedStatusWaitingCloudResources
} else {
- qreason = "waiting while cluster is running at capacity"
+ qreason = schedStatusWaitingClusterCapacity
}
for i, ent := range sorted {
if ent.SchedulingStatus == "" && (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) {
commit c91f68273656821bfa4a2315866794b5ede88b00
Author: Tom Clegg <tom at curii.com>
Date: Tue Mar 12 17:49:12 2024 -0400
21123: Rename sQueue* to schedQueue*.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 611d13306f..04283df48f 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -69,13 +69,13 @@ type dispatcher struct {
stop chan struct{}
stopped chan struct{}
- sQueueMtx sync.Mutex
- sQueueRefreshed time.Time
- sQueue []scheduler.QueueEnt
- sQueueMap map[string]scheduler.QueueEnt
+ schedQueueMtx sync.Mutex
+ schedQueueRefreshed time.Time
+ schedQueue []scheduler.QueueEnt
+ schedQueueMap map[string]scheduler.QueueEnt
}
-var sQueueRefresh = time.Second
+var schedQueueRefresh = time.Second
// Start starts the dispatcher. Start can be called multiple times
// with no ill effect.
@@ -221,23 +221,23 @@ func (disp *dispatcher) run() {
}
// Get a snapshot of the scheduler's queue, no older than
-// sQueueRefresh.
+// schedQueueRefresh.
//
// First return value is in the sorted order used by the scheduler.
// Second return value is a map of the same entries, for efficiently
// looking up a single container.
-func (disp *dispatcher) sQueueCurrent() ([]scheduler.QueueEnt, map[string]scheduler.QueueEnt) {
- disp.sQueueMtx.Lock()
- defer disp.sQueueMtx.Unlock()
- if time.Since(disp.sQueueRefreshed) > sQueueRefresh {
- disp.sQueue = disp.sched.Queue()
- disp.sQueueMap = make(map[string]scheduler.QueueEnt)
- for _, ent := range disp.sQueue {
- disp.sQueueMap[ent.Container.UUID] = ent
+func (disp *dispatcher) schedQueueCurrent() ([]scheduler.QueueEnt, map[string]scheduler.QueueEnt) {
+ disp.schedQueueMtx.Lock()
+ defer disp.schedQueueMtx.Unlock()
+ if time.Since(disp.schedQueueRefreshed) > schedQueueRefresh {
+ disp.schedQueue = disp.sched.Queue()
+ disp.schedQueueMap = make(map[string]scheduler.QueueEnt)
+ for _, ent := range disp.schedQueue {
+ disp.schedQueueMap[ent.Container.UUID] = ent
}
- disp.sQueueRefreshed = time.Now()
+ disp.schedQueueRefreshed = time.Now()
}
- return disp.sQueue, disp.sQueueMap
+ return disp.schedQueue, disp.schedQueueMap
}
// Management API: scheduling queue entries for all active and queued
@@ -246,13 +246,13 @@ func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
var resp struct {
Items []scheduler.QueueEnt `json:"items"`
}
- resp.Items, _ = disp.sQueueCurrent()
+ resp.Items, _ = disp.schedQueueCurrent()
json.NewEncoder(w).Encode(resp)
}
// Management API: scheduling queue entry for a specified container.
func (disp *dispatcher) apiContainer(w http.ResponseWriter, r *http.Request) {
- _, sq := disp.sQueueCurrent()
+ _, sq := disp.schedQueueCurrent()
ent, ok := sq[r.FormValue("container_uuid")]
if !ok {
httpserver.Error(w, "container not found", http.StatusNotFound)
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 3c23d81d58..d651e73a67 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -404,7 +404,7 @@ func (s *DispatcherSuite) TestManagementAPI_Containers(c *check.C) {
Items []queueEnt
}
getContainers := func() containersResponse {
- sQueueRefresh = time.Millisecond
+ schedQueueRefresh = time.Millisecond
req := httptest.NewRequest("GET", "/arvados/v1/dispatch/containers", nil)
req.Header.Set("Authorization", "Bearer abcdefgh")
resp := httptest.NewRecorder()
commit 52059aab9c65a9cbdff0b8d379ab8b0a25f62880
Author: Tom Clegg <tom at curii.com>
Date: Tue Mar 12 17:48:18 2024 -0400
21123: Clarify empty means empty string.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/api/dispatch.html.textile.liquid b/doc/api/dispatch.html.textile.liquid
index 488545c7d4..7a916f0646 100644
--- a/doc/api/dispatch.html.textile.liquid
+++ b/doc/api/dispatch.html.textile.liquid
@@ -32,7 +32,7 @@ Return a list of containers that are either ready to dispatch, or being started/
Each entry in the returned list of @items@ includes:
* an @instance_type@ entry with the name and attributes of the instance type that will be used to schedule the container (chosen from the @InstanceTypes@ section of your cluster config file); and
* a @container@ entry with selected attributes of the container itself, including @uuid@, @priority@, @runtime_constraints@, and @state at . Other fields of the container records are not loaded by the dispatcher, and will have empty/zero values here (e.g., @{...,"created_at":"0001-01-01T00:00:00Z","command":[],...}@).
-* a @scheduling_status@ entry: a brief explanation of the container's status in the dispatch queue, or empty if scheduling is not applicable, e.g., the container has already started running.
+* a @scheduling_status@ field with a brief explanation of the container's status in the dispatch queue, or the empty string if scheduling is not applicable, e.g., the container has already started running.
Example response:
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid
index 770b56b697..130aa73c05 100644
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -240,7 +240,7 @@ table(table table-bordered table-condensed).
|_. Attribute|_. Type|_. Description|_. Examples|
|uuid|string|The UUID of the container assigned to this request.||
|state|string|The state of the container assigned to this request (see "container resource attributes":containers.html).||
-|scheduling_status|string|A brief explanation of the container's status in the dispatch queue. Empty if scheduling is not applicable, e.g., the container is running or finished.|@waiting for cloud resources: queue position 3@
+|scheduling_status|string|A brief explanation of the container's status in the dispatch queue, or the empty string if scheduling is not applicable, e.g., the container is running or finished.|@waiting for cloud resources: queue position 3@
@creating new instance@
@preparing runtime environment@|
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list