[ARVADOS] updated: 1.3.0-2834-g3e269516a
Git user
git at public.arvados.org
Mon Aug 3 18:38:19 UTC 2020
Summary of changes:
lib/dispatchcloud/worker/pool.go | 4 ++-
lib/dispatchcloud/worker/worker.go | 57 +++++++++++++++++++-------------------
2 files changed, 32 insertions(+), 29 deletions(-)
via 3e269516a7500d49ad90f8590d7dd9e81f135ad6 (commit)
from 6036c55e1239281746152e85dfabbc9ed3cb6864 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 3e269516a7500d49ad90f8590d7dd9e81f135ad6
Author: Ward Vandewege <ward at curii.com>
Date: Mon Aug 3 14:37:22 2020 -0400
16636: implement review comments.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index bcf35e285..efcc102e8 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -437,6 +437,7 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
for _, wkr := range wp.workers {
if wkr.idleBehavior != IdleBehaviorHold && wkr.state == tryState && wkr.instType == it {
logger.WithField("Instance", wkr.instance.ID()).Info("shutting down")
+ wkr.reportBootOutcome(BootOutcomeAborted)
wkr.shutdown()
return true
}
@@ -609,7 +610,7 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
Subsystem: "dispatchcloud",
Name: "instances_disappeared",
Help: "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
- }, []string{"state"})
+ }, []string{"outcome"})
for _, v := range stateString {
wp.mDisappearances.WithLabelValues(v).Add(0)
}
@@ -776,6 +777,7 @@ func (wp *Pool) KillInstance(id cloud.InstanceID, reason string) error {
return errors.New("instance not found")
}
wkr.logger.WithField("Reason", reason).Info("shutting down")
+ wkr.reportBootOutcome(BootOutcomeAborted)
wkr.shutdown()
return nil
}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 6878bb065..5d2360f3c 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -43,33 +43,6 @@ var stateString = map[State]string{
StateShutdown: "shutdown",
}
-// BootOutcome is the result of a worker boot. It is used as a label in a metric.
-type BootOutcome string
-
-const (
- BootOutcomeFailed BootOutcome = "failure"
- BootOutcomeSucceeded BootOutcome = "success"
- BootOutcomeIdleShutdown BootOutcome = "idle shutdown"
- BootOutcomeDisappeared BootOutcome = "disappeared"
-)
-
-var validBootOutcomes = map[BootOutcome]bool{
- BootOutcomeFailed: true,
- BootOutcomeSucceeded: true,
- BootOutcomeIdleShutdown: true,
- BootOutcomeDisappeared: true,
-}
-
-func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
- if wkr.bootOutcomeReported {
- return
- }
- if wkr.wp.mBootOutcomes != nil {
- wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
- }
- wkr.bootOutcomeReported = true
-}
-
// String implements fmt.Stringer.
func (s State) String() string {
return stateString[s]
@@ -81,6 +54,23 @@ func (s State) MarshalText() ([]byte, error) {
return []byte(stateString[s]), nil
}
+// BootOutcome is the result of a worker boot. It is used as a label in a metric.
+type BootOutcome string
+
+const (
+ BootOutcomeFailed BootOutcome = "failure"
+ BootOutcomeSucceeded BootOutcome = "success"
+ BootOutcomeAborted BootOutcome = "aborted"
+ BootOutcomeDisappeared BootOutcome = "disappeared"
+)
+
+var validBootOutcomes = map[BootOutcome]bool{
+ BootOutcomeFailed: true,
+ BootOutcomeSucceeded: true,
+ BootOutcomeAborted: true,
+ BootOutcomeDisappeared: true,
+}
+
// IdleBehavior indicates the behavior desired when a node becomes idle.
type IdleBehavior string
@@ -139,6 +129,17 @@ func (wkr *worker) onKilled(uuid string) {
go wkr.wp.notify()
}
+// caller must have lock.
+func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
+ if wkr.bootOutcomeReported {
+ return
+ }
+ if wkr.wp.mBootOutcomes != nil {
+ wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
+ }
+ wkr.bootOutcomeReported = true
+}
+
// caller must have lock.
func (wkr *worker) setIdleBehavior(idleBehavior IdleBehavior) {
wkr.logger.WithField("IdleBehavior", idleBehavior).Info("set idle behavior")
@@ -499,7 +500,7 @@ func (wkr *worker) shutdownIfIdle() bool {
"IdleDuration": stats.Duration(time.Since(wkr.busy)),
"IdleBehavior": wkr.idleBehavior,
}).Info("shutdown worker")
- wkr.reportBootOutcome(BootOutcomeIdleShutdown)
+ wkr.reportBootOutcome(BootOutcomeAborted)
wkr.shutdown()
return true
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list