[arvados] updated: 2.7.0-6106-g18795339bd

git repository hosting git at public.arvados.org
Tue Mar 12 22:30:46 UTC 2024


Summary of changes:
 doc/api/dispatch.html.textile.liquid               |  2 +-
 .../methods/container_requests.html.textile.liquid |  2 +-
 lib/dispatchcloud/dispatcher.go                    | 36 +++++++++++-----------
 lib/dispatchcloud/dispatcher_test.go               |  2 +-
 lib/dispatchcloud/scheduler/run_queue.go           | 31 +++++++++++++------
 .../arvados/v1/container_requests_controller.rb    |  4 +++
 6 files changed, 46 insertions(+), 31 deletions(-)

       via  18795339bddd7aede141f142f90c9e9e05ec597a (commit)
       via  57bece692f5e5d9f66ca4dfc77035867b1b32289 (commit)
       via  c91f68273656821bfa4a2315866794b5ede88b00 (commit)
       via  52059aab9c65a9cbdff0b8d379ab8b0a25f62880 (commit)
      from  35d4eea994c98b4bb65111c9da6e57abbc7e014f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 18795339bddd7aede141f142f90c9e9e05ec597a
Author: Tom Clegg <tom at curii.com>
Date:   Tue Mar 12 18:30:25 2024 -0400

    21123: Add container_status to discovery doc.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/api/app/controllers/arvados/v1/container_requests_controller.rb b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
index 6b6e96a1f7..8800f58f49 100644
--- a/services/api/app/controllers/arvados/v1/container_requests_controller.rb
+++ b/services/api/app/controllers/arvados/v1/container_requests_controller.rb
@@ -31,6 +31,10 @@ class Arvados::V1::ContainerRequestsController < ApplicationController
       })
   end
 
+  def container_status
+    send_json({"errors" => "controller-only API, not handled by rails"}, status: 400)
+  end
+
   def update
     if (resource_attrs.keys.map(&:to_sym) - [:owner_uuid, :name, :description, :properties]).empty? or @object.container_uuid.nil?
       # If no attributes are being updated besides these, there are no

commit 57bece692f5e5d9f66ca4dfc77035867b1b32289
Author: Tom Clegg <tom at curii.com>
Date:   Tue Mar 12 17:49:32 2024 -0400

    21123: Define scheduling status values as consts.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index 2f4bce8987..d270972295 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -24,6 +24,17 @@ type QueueEnt struct {
 	SchedulingStatus string `json:"scheduling_status"`
 }
 
+const (
+	schedStatusPreparingRuntimeEnvironment = "preparing runtime environment"
+	schedStatusPriorityZero                = "not scheduling: priority 0" // ", state X" appended at runtime
+	schedStatusContainerLimitReached       = "not starting: supervisor container limit has been reached"
+	schedStatusWaitingForPreviousAttempt   = "waiting for previous attempt to exit"
+	schedStatusWaitingNewInstance          = "waiting for new instance to be ready"
+	schedStatusWaitingInstanceType         = "waiting for suitable instance type to become available" // ": queue position X" appended at runtime
+	schedStatusWaitingCloudResources       = "waiting for cloud resources"
+	schedStatusWaitingClusterCapacity      = "waiting while cluster is running at capacity" // ": queue position X" appended at runtime
+)
+
 // Queue returns the sorted queue from the last scheduling iteration.
 func (sch *Scheduler) Queue() []QueueEnt {
 	ents, _ := sch.lastQueue.Load().([]QueueEnt)
@@ -188,17 +199,17 @@ tryrun:
 		}
 		if _, running := running[ctr.UUID]; running {
 			if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
-				sorted[i].SchedulingStatus = "preparing runtime environment"
+				sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
 			}
 			continue
 		}
 		if ctr.Priority < 1 {
-			sorted[i].SchedulingStatus = "not scheduling: priority 0, state " + string(ctr.State)
+			sorted[i].SchedulingStatus = schedStatusPriorityZero + ", state " + string(ctr.State)
 			continue
 		}
 		if ctr.SchedulingParameters.Supervisor && maxSupervisors > 0 && supervisors > maxSupervisors {
 			overmaxsuper = append(overmaxsuper, sorted[i])
-			sorted[i].SchedulingStatus = "not starting: supervisor container limit has been reached"
+			sorted[i].SchedulingStatus = schedStatusContainerLimitReached
 			continue
 		}
 		// If we have unalloc instances of any of the eligible
@@ -270,13 +281,13 @@ tryrun:
 					// same instance type. Don't let this
 					// one sneak in ahead of it.
 				} else if sch.pool.KillContainer(ctr.UUID, "about to start") {
-					sorted[i].SchedulingStatus = "waiting for previous attempt to exit"
+					sorted[i].SchedulingStatus = schedStatusWaitingForPreviousAttempt
 					logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
 				} else if sch.pool.StartContainer(unallocType, ctr) {
-					sorted[i].SchedulingStatus = "preparing runtime environment"
+					sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
 					logger.Trace("StartContainer => true")
 				} else {
-					sorted[i].SchedulingStatus = "waiting for new instance to be ready"
+					sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
 					logger.Trace("StartContainer => false")
 					containerAllocatedWorkerBootingCount += 1
 					dontstart[unallocType] = true
@@ -307,7 +318,7 @@ tryrun:
 				// runQueue(), rather than run
 				// container B now.
 				qpos++
-				sorted[i].SchedulingStatus = fmt.Sprintf("waiting for suitable instance type to become available: queue position %d", qpos)
+				sorted[i].SchedulingStatus = schedStatusWaitingInstanceType + fmt.Sprintf(": queue position %d", qpos)
 				logger.Trace("all eligible types at capacity")
 				continue
 			}
@@ -322,7 +333,7 @@ tryrun:
 			// asynchronously and does its own logging
 			// about the eventual outcome, so we don't
 			// need to.)
-			sorted[i].SchedulingStatus = "waiting for new instance to be ready"
+			sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
 			logger.Info("creating new instance")
 			// Don't bother trying to start the container
 			// yet -- obviously the instance will take
@@ -337,9 +348,9 @@ tryrun:
 
 	var qreason string
 	if sch.pool.AtQuota() {
-		qreason = "waiting for cloud resources"
+		qreason = schedStatusWaitingCloudResources
 	} else {
-		qreason = "waiting while cluster is running at capacity"
+		qreason = schedStatusWaitingClusterCapacity
 	}
 	for i, ent := range sorted {
 		if ent.SchedulingStatus == "" && (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) {

commit c91f68273656821bfa4a2315866794b5ede88b00
Author: Tom Clegg <tom at curii.com>
Date:   Tue Mar 12 17:49:12 2024 -0400

    21123: Rename sQueue* to schedQueue*.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 611d13306f..04283df48f 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -69,13 +69,13 @@ type dispatcher struct {
 	stop      chan struct{}
 	stopped   chan struct{}
 
-	sQueueMtx       sync.Mutex
-	sQueueRefreshed time.Time
-	sQueue          []scheduler.QueueEnt
-	sQueueMap       map[string]scheduler.QueueEnt
+	schedQueueMtx       sync.Mutex
+	schedQueueRefreshed time.Time
+	schedQueue          []scheduler.QueueEnt
+	schedQueueMap       map[string]scheduler.QueueEnt
 }
 
-var sQueueRefresh = time.Second
+var schedQueueRefresh = time.Second
 
 // Start starts the dispatcher. Start can be called multiple times
 // with no ill effect.
@@ -221,23 +221,23 @@ func (disp *dispatcher) run() {
 }
 
 // Get a snapshot of the scheduler's queue, no older than
-// sQueueRefresh.
+// schedQueueRefresh.
 //
 // First return value is in the sorted order used by the scheduler.
 // Second return value is a map of the same entries, for efficiently
 // looking up a single container.
-func (disp *dispatcher) sQueueCurrent() ([]scheduler.QueueEnt, map[string]scheduler.QueueEnt) {
-	disp.sQueueMtx.Lock()
-	defer disp.sQueueMtx.Unlock()
-	if time.Since(disp.sQueueRefreshed) > sQueueRefresh {
-		disp.sQueue = disp.sched.Queue()
-		disp.sQueueMap = make(map[string]scheduler.QueueEnt)
-		for _, ent := range disp.sQueue {
-			disp.sQueueMap[ent.Container.UUID] = ent
+func (disp *dispatcher) schedQueueCurrent() ([]scheduler.QueueEnt, map[string]scheduler.QueueEnt) {
+	disp.schedQueueMtx.Lock()
+	defer disp.schedQueueMtx.Unlock()
+	if time.Since(disp.schedQueueRefreshed) > schedQueueRefresh {
+		disp.schedQueue = disp.sched.Queue()
+		disp.schedQueueMap = make(map[string]scheduler.QueueEnt)
+		for _, ent := range disp.schedQueue {
+			disp.schedQueueMap[ent.Container.UUID] = ent
 		}
-		disp.sQueueRefreshed = time.Now()
+		disp.schedQueueRefreshed = time.Now()
 	}
-	return disp.sQueue, disp.sQueueMap
+	return disp.schedQueue, disp.schedQueueMap
 }
 
 // Management API: scheduling queue entries for all active and queued
@@ -246,13 +246,13 @@ func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
 	var resp struct {
 		Items []scheduler.QueueEnt `json:"items"`
 	}
-	resp.Items, _ = disp.sQueueCurrent()
+	resp.Items, _ = disp.schedQueueCurrent()
 	json.NewEncoder(w).Encode(resp)
 }
 
 // Management API: scheduling queue entry for a specified container.
 func (disp *dispatcher) apiContainer(w http.ResponseWriter, r *http.Request) {
-	_, sq := disp.sQueueCurrent()
+	_, sq := disp.schedQueueCurrent()
 	ent, ok := sq[r.FormValue("container_uuid")]
 	if !ok {
 		httpserver.Error(w, "container not found", http.StatusNotFound)
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 3c23d81d58..d651e73a67 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -404,7 +404,7 @@ func (s *DispatcherSuite) TestManagementAPI_Containers(c *check.C) {
 		Items []queueEnt
 	}
 	getContainers := func() containersResponse {
-		sQueueRefresh = time.Millisecond
+		schedQueueRefresh = time.Millisecond
 		req := httptest.NewRequest("GET", "/arvados/v1/dispatch/containers", nil)
 		req.Header.Set("Authorization", "Bearer abcdefgh")
 		resp := httptest.NewRecorder()

commit 52059aab9c65a9cbdff0b8d379ab8b0a25f62880
Author: Tom Clegg <tom at curii.com>
Date:   Tue Mar 12 17:48:18 2024 -0400

    21123: Clarify empty means empty string.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/api/dispatch.html.textile.liquid b/doc/api/dispatch.html.textile.liquid
index 488545c7d4..7a916f0646 100644
--- a/doc/api/dispatch.html.textile.liquid
+++ b/doc/api/dispatch.html.textile.liquid
@@ -32,7 +32,7 @@ Return a list of containers that are either ready to dispatch, or being started/
 Each entry in the returned list of @items@ includes:
 * an @instance_type@ entry with the name and attributes of the instance type that will be used to schedule the container (chosen from the @InstanceTypes@ section of your cluster config file); and
 * a @container@ entry with selected attributes of the container itself, including @uuid@, @priority@, @runtime_constraints@, and @state at . Other fields of the container records are not loaded by the dispatcher, and will have empty/zero values here (e.g., @{...,"created_at":"0001-01-01T00:00:00Z","command":[],...}@).
-* a @scheduling_status@ entry: a brief explanation of the container's status in the dispatch queue, or empty if scheduling is not applicable, e.g., the container has already started running.
+* a @scheduling_status@ field with a brief explanation of the container's status in the dispatch queue, or the empty string if scheduling is not applicable, e.g., the container has already started running.
 
 Example response:
 
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid
index 770b56b697..130aa73c05 100644
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -240,7 +240,7 @@ table(table table-bordered table-condensed).
 |_. Attribute|_. Type|_. Description|_. Examples|
 |uuid|string|The UUID of the container assigned to this request.||
 |state|string|The state of the container assigned to this request (see "container resource attributes":containers.html).||
-|scheduling_status|string|A brief explanation of the container's status in the dispatch queue. Empty if scheduling is not applicable, e.g., the container is running or finished.|@waiting for cloud resources: queue position 3@
+|scheduling_status|string|A brief explanation of the container's status in the dispatch queue, or the empty string if scheduling is not applicable, e.g., the container is running or finished.|@waiting for cloud resources: queue position 3@
 @creating new instance@
 @preparing runtime environment@|
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list