[ARVADOS] created: 1.3.0-3079-g59c6fadc7

Git user git at public.arvados.org
Thu Sep 3 17:11:10 UTC 2020


        at  59c6fadc7aa48cf1c7a68a8fd6fa9ab420eef7ba (commit)


commit 59c6fadc7aa48cf1c7a68a8fd6fa9ab420eef7ba
Author: Ward Vandewege <ward at curii.com>
Date:   Thu Sep 3 13:10:42 2020 -0400

    16636: a-d-c: add a time-to-ssh metric
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 42decff31..3d602c08d 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -215,6 +215,9 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
 	c.Check(resp.Body.String(), check.Matches, `(?ms).*boot_outcomes{outcome="success"} [^0].*`)
 	c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="shutdown"} [^0].*`)
 	c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="unknown"} 0\n.*`)
+	c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ssh_seconds{quantile="0.95"} [0-9.]*`)
+	c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ssh_seconds_count [0-9]*`)
+	c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ssh_seconds_sum [0-9.]*`)
 }
 
 func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 435b6e43a..b4d75478b 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -177,6 +177,7 @@ type Pool struct {
 	mMemory            *prometheus.GaugeVec
 	mBootOutcomes      *prometheus.CounterVec
 	mDisappearances    *prometheus.CounterVec
+	mTimeToSSH         prometheus.Summary
 }
 
 type createCall struct {
@@ -324,6 +325,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 			wp.tagKeyPrefix + tagKeyInstanceSecret: secret,
 		}
 		initCmd := TagVerifier{nil, secret}.InitCommand()
+		// WARD HERE FIXME
 		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
 		wp.mtx.Lock()
 		defer wp.mtx.Unlock()
@@ -626,6 +628,14 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 		wp.mDisappearances.WithLabelValues(v).Add(0)
 	}
 	reg.MustRegister(wp.mDisappearances)
+	wp.mTimeToSSH = prometheus.NewSummary(prometheus.SummaryOpts{
+		Namespace:  "arvados",
+		Subsystem:  "dispatchcloud",
+		Name:       "instances_time_to_ssh_seconds",
+		Help:       "Number of seconds between instance creation and the first successful SSH connection.",
+		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
+	})
+	reg.MustRegister(wp.mTimeToSSH)
 }
 
 func (wp *Pool) runMetrics() {
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 5d2360f3c..14f649334 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -108,6 +108,7 @@ type worker struct {
 	starting            map[string]*remoteRunner // remember to update state idle<->running when this changes
 	probing             chan struct{}
 	bootOutcomeReported bool
+	timeToSSHReported   bool
 }
 
 func (wkr *worker) onUnkillable(uuid string) {
@@ -140,6 +141,17 @@ func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
 	wkr.bootOutcomeReported = true
 }
 
+// caller must have lock.
+func (wkr *worker) reportTimeToSSH() {
+	if wkr.timeToSSHReported {
+		return
+	}
+	if wkr.wp.mTimeToSSH != nil {
+		wkr.wp.mTimeToSSH.Observe(time.Since(wkr.appeared).Seconds())
+	}
+	wkr.timeToSSHReported = true
+}
+
 // caller must have lock.
 func (wkr *worker) setIdleBehavior(idleBehavior IdleBehavior) {
 	wkr.logger.WithField("IdleBehavior", idleBehavior).Info("set idle behavior")
@@ -365,6 +377,9 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
 		}).WithError(err).Warn("probe failed")
 		return
 	}
+	wkr.mtx.Lock()
+	wkr.reportTimeToSSH()
+	wkr.mtx.Unlock()
 	ok = true
 	for _, s := range strings.Split(string(stdout), "\n") {
 		if s == "broken" {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list