[ARVADOS] updated: 1.2.0-300-g3877bdae2
Git user
git at public.curoverse.com
Thu Nov 15 11:00:27 EST 2018
Summary of changes:
lib/dispatchcloud/cmd.go | 4 +-
lib/dispatchcloud/container/queue.go | 89 +++-
lib/dispatchcloud/dispatcher.go | 80 ++--
lib/dispatchcloud/dispatcher_test.go | 31 +-
lib/dispatchcloud/node_size.go | 62 ---
lib/dispatchcloud/readme.go | 71 ++-
lib/dispatchcloud/scheduler/fix_stale_locks.go | 24 +-
lib/dispatchcloud/scheduler/interfaces.go | 3 +
lib/dispatchcloud/scheduler/map.go | 144 ------
lib/dispatchcloud/scheduler/run_queue.go | 153 +++++++
.../scheduler/{map_test.go => run_queue_test.go} | 240 +++++-----
lib/dispatchcloud/scheduler/scheduler.go | 111 +++++
lib/dispatchcloud/scheduler/sync.go | 49 +-
lib/dispatchcloud/ssh_executor/executor.go | 18 +
lib/dispatchcloud/test/fixtures.go | 1 +
lib/dispatchcloud/test/queue.go | 55 ++-
lib/dispatchcloud/test/stub_driver.go | 8 +-
lib/dispatchcloud/worker/pool.go | 504 +++++++--------------
lib/dispatchcloud/worker/pool_test.go | 21 +-
lib/dispatchcloud/worker/worker.go | 272 ++++++++++-
.../crunch-dispatch-slurm/crunch-dispatch-slurm.go | 2 +-
.../crunch-dispatch-slurm/node_type.go | 68 +--
services/crunch-run/background.go | 9 +-
23 files changed, 1129 insertions(+), 890 deletions(-)
delete mode 100644 lib/dispatchcloud/scheduler/map.go
create mode 100644 lib/dispatchcloud/scheduler/run_queue.go
rename lib/dispatchcloud/scheduler/{map_test.go => run_queue_test.go} (52%)
create mode 100644 lib/dispatchcloud/scheduler/scheduler.go
copy lib/dispatchcloud/node_size.go => services/crunch-dispatch-slurm/node_type.go (53%)
via 3877bdae211807bd4c8cc5d824ffbede288dd708 (commit)
via 0f65ce49f183f7936d694961ce0f5980a8abffcc (commit)
via ff8c3cfde1e73c1c0adccb670040e98d82120e99 (commit)
via 15b7d1d4fa484641d846c1750487e640bfe4be09 (commit)
via 93671366e7633cbf0ca3cab68395e211e3afc31c (commit)
via d6bf3b66c2c938ca2ce2bda3276c0c0f4e54f1b6 (commit)
via 49f46db107c8226d70ed6d489cd000bfba82ca2f (commit)
via 3cb857be0aad14d0ebd631305a7a5c0f847811d4 (commit)
via 3654f222637b00728158300e700d615c4c913b29 (commit)
via 1c1e245bc3163a7841859d5ff75d0cff59dfc90c (commit)
via f33f8084159f701b8a1435e8cf77b7ab7f3417ae (commit)
via e5e2dc5e6bf1bce9893517470632295d42bb5f1c (commit)
via 359dc0dba7e93744c583b431d428de62fb8e0c90 (commit)
via 17479bd75a29c52470abe0049cb447e114eb39e9 (commit)
via 67464732a18e0aac11d89a75987151543b704cf6 (commit)
via a5ef124795af5b28a97c98e69a89c4c6e99cf0b7 (commit)
via a5fe34a8cd05c4e55deaec599347f65e6d662d22 (commit)
via 8ef9e211fa1eb91deb1b6aa7245cd7e5b8dd4c33 (commit)
via 468a33e9931c3dbadc83332a3b85d7677c9e4dc3 (commit)
via 0f159e5eb304e08a0704156046762709bd6e1eba (commit)
via 3ebb991101ef62387268d594e3e4307a0f5c4e9e (commit)
via 968519edf4c9150ac65991aea5f977356551b4e4 (commit)
via 71460cf96c9b43c8ab0a38118c3745a4c0e6d7e9 (commit)
via 438ce6f86261376880b186775d7ba5fbade560a6 (commit)
via c11e1e9b7e04b819a8a1794acc3ffb0a816715b7 (commit)
via b10284a496cd2d953b865144a5f28265fc277938 (commit)
via 8dff5fe14f23f3db9efaf5d71a21668738a9f164 (commit)
via c81a653c1e800c40a0c6e1a5d94cddd6620b5e52 (commit)
via 8162f74b2611b686cc8bcec225c6724c71b8926d (commit)
via 1b47c040b802c09d8652b952704e2ad09f31aa27 (commit)
via 8c44ed01463d5e0f0d167b37474b0e78fe11bd8d (commit)
from c1b959750a78509a2f5182dbd17685d010826b64 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 3877bdae211807bd4c8cc5d824ffbede288dd708
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Nov 15 10:59:28 2018 -0500
14360: Retry shutdown after configured timeout.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 3ac9370a3..83797e984 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -188,6 +188,8 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
},
HostKey: hostpriv,
AuthorizedKeys: []ssh.PublicKey{dispatchpub},
+
+ ErrorRateDestroy: 0.1,
}
s.cluster = &arvados.Cluster{
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 53e312ea2..bfe0f8f26 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -27,7 +27,10 @@ type StubDriver struct {
Exec StubExecFunc
HostKey ssh.Signer
AuthorizedKeys []ssh.PublicKey
- instanceSets []*StubInstanceSet
+
+ ErrorRateDestroy float64
+
+ instanceSets []*StubInstanceSet
}
// InstanceSet returns a new *StubInstanceSet.
@@ -142,6 +145,9 @@ func (si stubInstance) Address() string {
}
func (si stubInstance) Destroy() error {
+ if math_rand.Float64() < si.ss.sis.driver.ErrorRateDestroy {
+ return errors.New("instance could not be destroyed")
+ }
si.ss.SSHService.Close()
sis := si.ss.sis
sis.mtx.Lock()
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index be66895a9..37add6d3d 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -60,6 +60,7 @@ const (
defaultTimeoutIdle = time.Minute
defaultTimeoutBooting = time.Minute * 10
defaultTimeoutProbe = time.Minute * 10
+ defaultTimeoutShutdown = time.Second * 10
)
func duration(conf arvados.Duration, def time.Duration) time.Duration {
@@ -88,6 +89,7 @@ func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cl
timeoutIdle: duration(cluster.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
timeoutBooting: duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
timeoutProbe: duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
+ timeoutShutdown: duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
}
wp.registerMetrics(reg)
go func() {
@@ -115,6 +117,7 @@ type Pool struct {
timeoutIdle time.Duration
timeoutBooting time.Duration
timeoutProbe time.Duration
+ timeoutShutdown time.Duration
// private state
subscribers map[<-chan struct{}]chan<- struct{}
@@ -230,19 +233,21 @@ func (wp *Pool) AtQuota() bool {
}
// Add or update worker attached to the given instance. Use
-// initialState if a new worker is created. Caller must have lock.
+// initialState if a new worker is created.
//
-// Returns true when a new worker is created.
-func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) bool {
+// The second return value is true if a new worker is created.
+//
+// Caller must have lock.
+func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) (*worker, bool) {
id := inst.ID()
- if wp.workers[id] != nil {
- wp.workers[id].executor.SetTarget(inst)
- wp.workers[id].instance = inst
- wp.workers[id].updated = time.Now()
- if initialState == StateBooting && wp.workers[id].state == StateUnknown {
- wp.workers[id].state = StateBooting
+ if wkr := wp.workers[id]; wkr != nil {
+ wkr.executor.SetTarget(inst)
+ wkr.instance = inst
+ wkr.updated = time.Now()
+ if initialState == StateBooting && wkr.state == StateUnknown {
+ wkr.state = StateBooting
}
- return false
+ return wkr, false
}
if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
initialState = StateHold
@@ -253,7 +258,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
})
logger.WithField("State", initialState).Infof("instance appeared in cloud")
now := time.Now()
- wp.workers[id] = &worker{
+ wkr := &worker{
mtx: &wp.mtx,
wp: wp,
logger: logger,
@@ -268,7 +273,8 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
starting: make(map[string]struct{}),
probing: make(chan struct{}, 1),
}
- return true
+ wp.workers[id] = wkr
+ return wkr, true
}
// caller must have lock.
@@ -619,8 +625,11 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
continue
}
- if wp.updateWorker(inst, it, StateUnknown) {
+ if wkr, isNew := wp.updateWorker(inst, it, StateUnknown); isNew {
notify = true
+ } else if wkr.state == StateShutdown && time.Since(wkr.destroyed) > wp.timeoutShutdown {
+ wp.logger.WithField("Instance", inst).Info("worker still listed after shutdown; retrying")
+ wkr.shutdown()
}
}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index c3825db52..c8e6e86f8 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -58,19 +58,20 @@ type worker struct {
executor Executor
wp *Pool
- mtx sync.Locker // must be wp's Locker.
- state State
- instance cloud.Instance
- instType arvados.InstanceType
- vcpus int64
- memory int64
- probed time.Time
- updated time.Time
- busy time.Time
- lastUUID string
- running map[string]struct{} // remember to update state idle<->running when this changes
- starting map[string]struct{} // remember to update state idle<->running when this changes
- probing chan struct{}
+ mtx sync.Locker // must be wp's Locker.
+ state State
+ instance cloud.Instance
+ instType arvados.InstanceType
+ vcpus int64
+ memory int64
+ probed time.Time
+ updated time.Time
+ busy time.Time
+ destroyed time.Time
+ lastUUID string
+ running map[string]struct{} // remember to update state idle<->running when this changes
+ starting map[string]struct{} // remember to update state idle<->running when this changes
+ probing chan struct{}
}
// caller must have lock.
@@ -300,7 +301,9 @@ func (wkr *worker) shutdownIfIdle() bool {
// caller must have lock
func (wkr *worker) shutdown() {
- wkr.updated = time.Now()
+ now := time.Now()
+ wkr.updated = now
+ wkr.destroyed = now
wkr.state = StateShutdown
go func() {
err := wkr.instance.Destroy()
commit 0f65ce49f183f7936d694961ce0f5980a8abffcc
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Nov 8 16:48:19 2018 -0500
14360: Fix false negative in test suite.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 248986bf3..3ac9370a3 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -113,6 +113,10 @@ func (fvm *fakeVM) exec(queue *test.Queue, onComplete, onCancel func(uuid string
logger.Print("[test] container was killed")
return
}
+ // TODO: Check whether the stub instance has
+ // been destroyed, and if so, don't call
+ // onComplete. Then "container finished twice"
+ // can be classified as a bug.
if crashluck < fvm.crunchRunCrashRate {
logger.Print("[test] crashing crunch-run stub")
if onCancel != nil && ctr.State == arvados.ContainerStateRunning {
@@ -259,7 +263,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
mtx.Lock()
defer mtx.Unlock()
if _, ok := waiting[uuid]; !ok {
- c.Errorf("container completed twice: %s", uuid)
+ c.Logf("container completed twice: %s -- perhaps completed after stub instance was killed?", uuid)
}
delete(waiting, uuid)
if len(waiting) == 0 {
commit ff8c3cfde1e73c1c0adccb670040e98d82120e99
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Nov 8 16:48:17 2018 -0500
14360: Don't block scheduling while locking containers.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index 9fc1a1658..7b432adc6 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -34,6 +34,7 @@ func (sch *Scheduler) runQueue() {
dontstart := map[arvados.InstanceType]bool{}
var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota
+tryrun:
for i, ctr := range sorted {
ctr, it := ctr.Container, ctr.InstanceType
logger := sch.logger.WithFields(logrus.Fields{
@@ -43,60 +44,53 @@ func (sch *Scheduler) runQueue() {
if _, running := running[ctr.UUID]; running || ctr.Priority < 1 {
continue
}
- if ctr.State == arvados.ContainerStateQueued {
+ switch ctr.State {
+ case arvados.ContainerStateQueued:
if unalloc[it] < 1 && sch.pool.AtQuota() {
- logger.Debugf("not locking: AtQuota and no unalloc workers")
+ logger.Debug("not locking: AtQuota and no unalloc workers")
overquota = sorted[i:]
- break
+ break tryrun
}
- logger.Debugf("locking")
- err := sch.queue.Lock(ctr.UUID)
- if err != nil {
- logger.WithError(err).Warnf("lock error")
+ sch.bgLock(logger, ctr.UUID)
+ unalloc[it]--
+ case arvados.ContainerStateLocked:
+ if unalloc[it] < 1 {
+ if sch.pool.AtQuota() {
+ logger.Debug("not starting: AtQuota and no unalloc workers")
+ overquota = sorted[i:]
+ break tryrun
+ }
+ logger.Info("creating new instance")
+ err := sch.pool.Create(it)
+ if err != nil {
+ if _, ok := err.(cloud.QuotaError); !ok {
+ logger.WithError(err).Warn("error creating worker")
+ }
+ sch.queue.Unlock(ctr.UUID)
+ // Don't let lower-priority
+ // containers starve this one
+ // by using keeping idle
+ // workers alive on different
+ // instance types. TODO:
+ // avoid getting starved here
+ // if instances of a specific
+ // type always fail.
+ overquota = sorted[i:]
+ break tryrun
+ }
unalloc[it]++
- continue
}
- var ok bool
- ctr, ok = sch.queue.Get(ctr.UUID)
- if !ok {
- logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
- continue
- }
- if ctr.State != arvados.ContainerStateLocked {
- logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
- }
- }
- if ctr.State != arvados.ContainerStateLocked {
- continue
- }
- if unalloc[it] < 1 {
- logger.Info("creating new instance")
- err := sch.pool.Create(it)
- if err != nil {
- if _, ok := err.(cloud.QuotaError); !ok {
- logger.WithError(err).Warn("error creating worker")
- }
- sch.queue.Unlock(ctr.UUID)
- // Don't let lower-priority containers
- // starve this one by using keeping
- // idle workers alive on different
- // instance types. TODO: avoid
- // getting starved here if instances
- // of a specific type always fail.
- overquota = sorted[i:]
- break
+
+ if dontstart[it] {
+ // We already tried & failed to start
+ // a higher-priority container on the
+ // same instance type. Don't let this
+ // one sneak in ahead of it.
+ } else if sch.pool.StartContainer(it, ctr) {
+ unalloc[it]--
+ } else {
+ dontstart[it] = true
}
- unalloc[it]++
- }
- if dontstart[it] {
- // We already tried & failed to start a
- // higher-priority container on the same
- // instance type. Don't let this one sneak in
- // ahead of it.
- } else if sch.pool.StartContainer(it, ctr) {
- unalloc[it]--
- } else {
- dontstart[it] = true
}
}
@@ -124,3 +118,36 @@ func (sch *Scheduler) runQueue() {
}
}
}
+
+// Start an API call to lock the given container, and return
+// immediately while waiting for the response in a new goroutine. Do
+// nothing if a lock request is already in progress for this
+// container.
+func (sch *Scheduler) bgLock(logger logrus.FieldLogger, uuid string) {
+ logger.Debug("locking")
+ sch.mtx.Lock()
+ defer sch.mtx.Unlock()
+ if sch.locking[uuid] {
+ logger.Debug("locking in progress, doing nothing")
+ return
+ }
+ sch.locking[uuid] = true
+ go func() {
+ defer func() {
+ sch.mtx.Lock()
+ defer sch.mtx.Unlock()
+ delete(sch.locking, uuid)
+ }()
+ err := sch.queue.Lock(uuid)
+ if err != nil {
+ logger.WithError(err).Warn("error locking container")
+ return
+ }
+ ctr, ok := sch.queue.Get(uuid)
+ if !ok {
+ logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
+ } else if ctr.State != arvados.ContainerStateLocked {
+ logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
+ }
+ }()
+}
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index b8a03275f..35db70522 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -108,7 +108,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
{
UUID: test.ContainerUUID(1),
Priority: 1,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -117,7 +117,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
{
UUID: test.ContainerUUID(2),
Priority: 2,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -126,7 +126,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
{
UUID: test.ContainerUUID(3),
Priority: 3,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -135,7 +135,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
{
UUID: test.ContainerUUID(4),
Priority: 4,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -154,11 +154,11 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
test.InstanceType(2): 2,
},
running: map[string]time.Time{},
- canCreate: 1,
+ canCreate: 0,
}
New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
- c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4), test.ContainerUUID(3)})
+ c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
c.Check(pool.running, check.HasLen, 1)
for uuid := range pool.running {
c.Check(uuid, check.Equals, uuids[4])
@@ -169,9 +169,10 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
// Create(), if AtQuota() is true.
func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
for quota := 0; quota < 2; quota++ {
+ c.Logf("quota=%d", quota)
shouldCreate := []arvados.InstanceType{}
- for i := 1; i < 1+quota; i++ {
- shouldCreate = append(shouldCreate, test.InstanceType(i))
+ for i := 0; i < quota; i++ {
+ shouldCreate = append(shouldCreate, test.InstanceType(1))
}
queue := test.Queue{
ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
@@ -181,7 +182,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
{
UUID: test.ContainerUUID(1),
Priority: 1,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -223,7 +224,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
test.InstanceType(2): 1,
},
running: map[string]time.Time{},
- canCreate: 2,
+ canCreate: 4,
}
queue := test.Queue{
ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
@@ -234,7 +235,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// create a new worker
UUID: test.ContainerUUID(1),
Priority: 1,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -244,7 +245,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// tentatively map to unalloc worker
UUID: test.ContainerUUID(2),
Priority: 2,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -254,7 +255,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// start now on idle worker
UUID: test.ContainerUUID(3),
Priority: 3,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 1,
RAM: 1 << 30,
@@ -264,7 +265,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// create a new worker
UUID: test.ContainerUUID(4),
Priority: 4,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 2,
RAM: 2 << 30,
@@ -274,7 +275,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// tentatively map to unalloc worker
UUID: test.ContainerUUID(5),
Priority: 5,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 2,
RAM: 2 << 30,
@@ -284,7 +285,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
// start now on idle worker
UUID: test.ContainerUUID(6),
Priority: 6,
- State: arvados.ContainerStateQueued,
+ State: arvados.ContainerStateLocked,
RuntimeConstraints: arvados.RuntimeConstraints{
VCPUs: 2,
RAM: 2 << 30,
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
index 0be8edb7b..9a5fb10d5 100644
--- a/lib/dispatchcloud/scheduler/scheduler.go
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -32,6 +32,9 @@ type Scheduler struct {
staleLockTimeout time.Duration
queueUpdateInterval time.Duration
+ locking map[string]bool
+ mtx sync.Mutex
+
runOnce sync.Once
stop chan struct{}
}
@@ -48,6 +51,7 @@ func New(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, stale
staleLockTimeout: staleLockTimeout,
queueUpdateInterval: queueUpdateInterval,
stop: make(chan struct{}),
+ locking: map[string]bool{},
}
}
diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go
index 152094f52..fda04d52b 100644
--- a/lib/dispatchcloud/test/queue.go
+++ b/lib/dispatchcloud/test/queue.go
@@ -108,7 +108,7 @@ func (q *Queue) notify() {
func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
ent := q.entries[uuid]
if ent.Container.State != from {
- return fmt.Errorf("lock failed: state=%q", ent.Container.State)
+ return fmt.Errorf("changeState failed: state=%q", ent.Container.State)
}
ent.Container.State = to
q.entries[uuid] = ent
commit 15b7d1d4fa484641d846c1750487e640bfe4be09
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Nov 7 14:10:01 2018 -0500
14360: Move single-worker Pool methods to worker type.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 45e9da37a..be66895a9 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -5,7 +5,6 @@
package worker
import (
- "bytes"
"io"
"sort"
"strings"
@@ -137,22 +136,6 @@ type Pool struct {
mMemoryInuse prometheus.Gauge
}
-type worker struct {
- state State
- instance cloud.Instance
- executor Executor
- instType arvados.InstanceType
- vcpus int64
- memory int64
- probed time.Time
- updated time.Time
- busy time.Time
- lastUUID string
- running map[string]struct{} // remember to update state idle<->running when this changes
- starting map[string]struct{} // remember to update state idle<->running when this changes
- probing chan struct{}
-}
-
// Subscribe returns a channel that becomes ready whenever a worker's
// state changes.
//
@@ -264,13 +247,16 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
initialState = StateHold
}
- wp.logger.WithFields(logrus.Fields{
+ logger := wp.logger.WithFields(logrus.Fields{
"InstanceType": it.Name,
"Instance": inst,
- "State": initialState,
- }).Infof("instance appeared in cloud")
+ })
+ logger.WithField("State", initialState).Infof("instance appeared in cloud")
now := time.Now()
wp.workers[id] = &worker{
+ mtx: &wp.mtx,
+ wp: wp,
+ logger: logger,
executor: wp.newExecutor(inst),
state: initialState,
instance: inst,
@@ -285,6 +271,11 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
return true
}
+// caller must have lock.
+func (wp *Pool) notifyExited(uuid string, t time.Time) {
+ wp.exited[uuid] = t
+}
+
// Shutdown shuts down a worker with the given type, or returns false
// if all workers with the given type are busy.
func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
@@ -298,9 +289,8 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
// time (Idle) or the earliest create time (Booting)
for _, wkr := range wp.workers {
if wkr.state == tryState && wkr.instType == it {
- logger = logger.WithField("Instance", wkr.instance)
- logger.Info("shutting down")
- wp.shutdown(wkr, logger)
+ logger.WithField("Instance", wkr.instance).Info("shutting down")
+ wkr.shutdown()
return true
}
}
@@ -308,23 +298,6 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
return false
}
-// caller must have lock
-func (wp *Pool) shutdown(wkr *worker, logger logrus.FieldLogger) {
- wkr.updated = time.Now()
- wkr.state = StateShutdown
- go func() {
- err := wkr.instance.Destroy()
- if err != nil {
- logger.WithError(err).WithField("Instance", wkr.instance).Warn("shutdown failed")
- return
- }
- wp.mtx.Lock()
- wp.atQuotaUntil = time.Now()
- wp.mtx.Unlock()
- wp.notify()
- }()
-}
-
// Workers returns the current number of workers in each state.
func (wp *Pool) Workers() map[State]int {
wp.setupOnce.Do(wp.setup)
@@ -360,11 +333,6 @@ func (wp *Pool) Running() map[string]time.Time {
// StartContainer starts a container on an idle worker immediately if
// possible, otherwise returns false.
func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
- logger := wp.logger.WithFields(logrus.Fields{
- "InstanceType": it.Name,
- "ContainerUUID": ctr.UUID,
- "Priority": ctr.Priority,
- })
wp.setupOnce.Do(wp.setup)
wp.mtx.Lock()
defer wp.mtx.Unlock()
@@ -379,34 +347,7 @@ func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) b
if wkr == nil {
return false
}
- logger = logger.WithField("Instance", wkr.instance)
- logger.Debug("starting container")
- wkr.starting[ctr.UUID] = struct{}{}
- wkr.state = StateRunning
- go func() {
- stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
- wp.mtx.Lock()
- defer wp.mtx.Unlock()
- now := time.Now()
- wkr.updated = now
- wkr.busy = now
- delete(wkr.starting, ctr.UUID)
- wkr.running[ctr.UUID] = struct{}{}
- wkr.lastUUID = ctr.UUID
- if err != nil {
- logger.WithField("stdout", string(stdout)).
- WithField("stderr", string(stderr)).
- WithError(err).
- Error("error starting crunch-run process")
- // Leave uuid in wkr.running, though: it's
- // possible the error was just a communication
- // failure and the process was in fact
- // started. Wait for next probe to find out.
- return
- }
- logger.Info("crunch-run process started")
- wkr.lastUUID = ctr.UUID
- }()
+ wkr.startContainer(ctr)
return true
}
@@ -556,7 +497,7 @@ func (wp *Pool) runProbes() {
workers = workers[:0]
wp.mtx.Lock()
for id, wkr := range wp.workers {
- if wkr.state == StateShutdown || wp.shutdownIfIdle(wkr) {
+ if wkr.state == StateShutdown || wkr.shutdownIfIdle() {
continue
}
workers = append(workers, id)
@@ -567,20 +508,12 @@ func (wp *Pool) runProbes() {
wp.mtx.Lock()
wkr, ok := wp.workers[id]
wp.mtx.Unlock()
- if !ok || wkr.state == StateShutdown {
- // Deleted/shutdown while we
- // were probing others
+ if !ok {
+ // Deleted while we were probing
+ // others
continue
}
- select {
- case wkr.probing <- struct{}{}:
- go func() {
- wp.probeAndUpdate(wkr)
- <-wkr.probing
- }()
- default:
- wp.logger.WithField("Instance", wkr.instance).Debug("still waiting for last probe to finish")
- }
+ go wkr.ProbeAndUpdate()
select {
case <-wp.stop:
return
@@ -609,45 +542,6 @@ func (wp *Pool) runSync() {
}
}
-// caller must have lock.
-func (wp *Pool) shutdownIfBroken(wkr *worker, dur time.Duration) {
- if wkr.state == StateHold {
- return
- }
- label, threshold := "", wp.timeoutProbe
- if wkr.state == StateBooting {
- label, threshold = "new ", wp.timeoutBooting
- }
- if dur < threshold {
- return
- }
- wp.logger.WithFields(logrus.Fields{
- "Instance": wkr.instance,
- "Duration": dur,
- "Since": wkr.probed,
- "State": wkr.state,
- }).Warnf("%sinstance unresponsive, shutting down", label)
- wp.shutdown(wkr, wp.logger)
-}
-
-// caller must have lock.
-func (wp *Pool) shutdownIfIdle(wkr *worker) bool {
- if wkr.state != StateIdle {
- return false
- }
- age := time.Since(wkr.busy)
- if age < wp.timeoutIdle {
- return false
- }
- logger := wp.logger.WithFields(logrus.Fields{
- "Age": age,
- "Instance": wkr.instance,
- })
- logger.Info("shutdown idle worker")
- wp.shutdown(wkr, logger)
- return true
-}
-
// Stop synchronizing with the InstanceSet.
func (wp *Pool) Stop() {
wp.setupOnce.Do(wp.setup)
@@ -753,146 +647,3 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
go wp.notify()
}
}
-
-// should be called in a new goroutine
-func (wp *Pool) probeAndUpdate(wkr *worker) {
- logger := wp.logger.WithField("Instance", wkr.instance)
- wp.mtx.Lock()
- updated := wkr.updated
- needProbeRunning := wkr.state == StateRunning || wkr.state == StateIdle
- needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
- wp.mtx.Unlock()
- if !needProbeBooted && !needProbeRunning {
- return
- }
-
- var (
- ctrUUIDs []string
- ok bool
- stderr []byte
- )
- if needProbeBooted {
- ok, stderr = wp.probeBooted(wkr)
- wp.mtx.Lock()
- if ok || wkr.state == StateRunning || wkr.state == StateIdle {
- logger.Info("instance booted; will try probeRunning")
- needProbeRunning = true
- }
- wp.mtx.Unlock()
- }
- if needProbeRunning {
- ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
- }
- logger = logger.WithField("stderr", string(stderr))
- wp.mtx.Lock()
- defer wp.mtx.Unlock()
- if !ok {
- if wkr.state == StateShutdown && wkr.updated.After(updated) {
- // Skip the logging noise if shutdown was
- // initiated during probe.
- return
- }
- dur := time.Since(wkr.probed)
- logger := logger.WithFields(logrus.Fields{
- "Duration": dur,
- "State": wkr.state,
- })
- if wkr.state == StateBooting {
- logger.Debug("new instance not responding")
- } else {
- logger.Info("instance not responding")
- }
- wp.shutdownIfBroken(wkr, dur)
- return
- }
-
- updateTime := time.Now()
- wkr.probed = updateTime
-
- if updated != wkr.updated {
- // Worker was updated after the probe began, so
- // wkr.running might have a container UUID that was
- // not yet running when ctrUUIDs was generated. Leave
- // wkr.running alone and wait for the next probe to
- // catch up on any changes.
- return
- }
-
- if len(ctrUUIDs) > 0 {
- wkr.busy = updateTime
- wkr.lastUUID = ctrUUIDs[0]
- } else if len(wkr.running) > 0 {
- // Actual last-busy time was sometime between wkr.busy
- // and now. Now is the earliest opportunity to take
- // advantage of the non-busy state, though.
- wkr.busy = updateTime
- }
- running := map[string]struct{}{}
- changed := false
- for _, uuid := range ctrUUIDs {
- running[uuid] = struct{}{}
- if _, ok := wkr.running[uuid]; !ok {
- changed = true
- }
- }
- for uuid := range wkr.running {
- if _, ok := running[uuid]; !ok {
- logger.WithField("ContainerUUID", uuid).Info("crunch-run process ended")
- wp.exited[uuid] = updateTime
- changed = true
- }
- }
- if wkr.state == StateUnknown || wkr.state == StateBooting {
- wkr.state = StateIdle
- changed = true
- }
- if changed {
- wkr.running = running
- if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
- wkr.state = StateRunning
- } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
- wkr.state = StateIdle
- }
- wkr.updated = updateTime
- go wp.notify()
- }
-}
-
-func (wp *Pool) probeRunning(wkr *worker) (running []string, ok bool, stderr []byte) {
- cmd := "crunch-run --list"
- stdout, stderr, err := wkr.executor.Execute(cmd, nil)
- if err != nil {
- wp.logger.WithFields(logrus.Fields{
- "Instance": wkr.instance,
- "Command": cmd,
- "stdout": string(stdout),
- "stderr": string(stderr),
- }).WithError(err).Warn("probe failed")
- return nil, false, stderr
- }
- stdout = bytes.TrimRight(stdout, "\n")
- if len(stdout) == 0 {
- return nil, true, stderr
- }
- return strings.Split(string(stdout), "\n"), true, stderr
-}
-
-func (wp *Pool) probeBooted(wkr *worker) (ok bool, stderr []byte) {
- cmd := wp.bootProbeCommand
- if cmd == "" {
- cmd = "true"
- }
- stdout, stderr, err := wkr.executor.Execute(cmd, nil)
- logger := wp.logger.WithFields(logrus.Fields{
- "Instance": wkr.instance,
- "Command": cmd,
- "stdout": string(stdout),
- "stderr": string(stderr),
- })
- if err != nil {
- logger.WithError(err).Debug("boot probe failed")
- return false, stderr
- }
- logger.Info("boot probe succeeded")
- return true, stderr
-}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index c24efeb3c..c3825db52 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -5,7 +5,14 @@
package worker
import (
+ "bytes"
+ "strings"
+ "sync"
"time"
+
+ "git.curoverse.com/arvados.git/lib/cloud"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
+ "github.com/Sirupsen/logrus"
)
// State indicates whether a worker is available to do work, and (if
@@ -45,3 +52,261 @@ func (s State) String() string {
func (s State) MarshalText() ([]byte, error) {
return []byte(stateString[s]), nil
}
+
+type worker struct {
+ logger logrus.FieldLogger
+ executor Executor
+ wp *Pool
+
+ mtx sync.Locker // must be wp's Locker.
+ state State
+ instance cloud.Instance
+ instType arvados.InstanceType
+ vcpus int64
+ memory int64
+ probed time.Time
+ updated time.Time
+ busy time.Time
+ lastUUID string
+ running map[string]struct{} // remember to update state idle<->running when this changes
+ starting map[string]struct{} // remember to update state idle<->running when this changes
+ probing chan struct{}
+}
+
+// caller must have lock.
+func (wkr *worker) startContainer(ctr arvados.Container) {
+ logger := wkr.logger.WithFields(logrus.Fields{
+ "ContainerUUID": ctr.UUID,
+ "Priority": ctr.Priority,
+ })
+ logger = logger.WithField("Instance", wkr.instance)
+ logger.Debug("starting container")
+ wkr.starting[ctr.UUID] = struct{}{}
+ wkr.state = StateRunning
+ go func() {
+ stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
+ wkr.mtx.Lock()
+ defer wkr.mtx.Unlock()
+ now := time.Now()
+ wkr.updated = now
+ wkr.busy = now
+ delete(wkr.starting, ctr.UUID)
+ wkr.running[ctr.UUID] = struct{}{}
+ wkr.lastUUID = ctr.UUID
+ if err != nil {
+ logger.WithField("stdout", string(stdout)).
+ WithField("stderr", string(stderr)).
+ WithError(err).
+ Error("error starting crunch-run process")
+ // Leave uuid in wkr.running, though: it's
+ // possible the error was just a communication
+ // failure and the process was in fact
+ // started. Wait for next probe to find out.
+ return
+ }
+ logger.Info("crunch-run process started")
+ wkr.lastUUID = ctr.UUID
+ }()
+}
+
+// ProbeAndUpdate conducts appropriate boot/running probes (if any)
+// for the worker's curent state. If a previous probe is still
+// running, it does nothing.
+//
+// It should be called in a new goroutine.
+func (wkr *worker) ProbeAndUpdate() {
+ select {
+ case wkr.probing <- struct{}{}:
+ wkr.probeAndUpdate()
+ <-wkr.probing
+ default:
+ wkr.logger.Debug("still waiting for last probe to finish")
+ }
+}
+
+// should be called in a new goroutine
+func (wkr *worker) probeAndUpdate() {
+ wkr.mtx.Lock()
+ updated := wkr.updated
+ needProbeRunning := wkr.state == StateRunning || wkr.state == StateIdle
+ needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
+ wkr.mtx.Unlock()
+ if !needProbeBooted && !needProbeRunning {
+ return
+ }
+
+ var (
+ ctrUUIDs []string
+ ok bool
+ stderr []byte
+ )
+ if needProbeBooted {
+ ok, stderr = wkr.probeBooted()
+ wkr.mtx.Lock()
+ if ok || wkr.state == StateRunning || wkr.state == StateIdle {
+ wkr.logger.Info("instance booted; will try probeRunning")
+ needProbeRunning = true
+ }
+ wkr.mtx.Unlock()
+ }
+ if needProbeRunning {
+ ctrUUIDs, ok, stderr = wkr.probeRunning()
+ }
+ logger := wkr.logger.WithField("stderr", string(stderr))
+ wkr.mtx.Lock()
+ defer wkr.mtx.Unlock()
+ if !ok {
+ if wkr.state == StateShutdown && wkr.updated.After(updated) {
+ // Skip the logging noise if shutdown was
+ // initiated during probe.
+ return
+ }
+ dur := time.Since(wkr.probed)
+ logger := logger.WithFields(logrus.Fields{
+ "Duration": dur,
+ "State": wkr.state,
+ })
+ if wkr.state == StateBooting {
+ logger.Debug("new instance not responding")
+ } else {
+ logger.Info("instance not responding")
+ }
+ wkr.shutdownIfBroken(dur)
+ return
+ }
+
+ updateTime := time.Now()
+ wkr.probed = updateTime
+
+ if updated != wkr.updated {
+ // Worker was updated after the probe began, so
+ // wkr.running might have a container UUID that was
+ // not yet running when ctrUUIDs was generated. Leave
+ // wkr.running alone and wait for the next probe to
+ // catch up on any changes.
+ return
+ }
+
+ if len(ctrUUIDs) > 0 {
+ wkr.busy = updateTime
+ wkr.lastUUID = ctrUUIDs[0]
+ } else if len(wkr.running) > 0 {
+ // Actual last-busy time was sometime between wkr.busy
+ // and now. Now is the earliest opportunity to take
+ // advantage of the non-busy state, though.
+ wkr.busy = updateTime
+ }
+ running := map[string]struct{}{}
+ changed := false
+ for _, uuid := range ctrUUIDs {
+ running[uuid] = struct{}{}
+ if _, ok := wkr.running[uuid]; !ok {
+ changed = true
+ }
+ }
+ for uuid := range wkr.running {
+ if _, ok := running[uuid]; !ok {
+ logger.WithField("ContainerUUID", uuid).Info("crunch-run process ended")
+ wkr.wp.notifyExited(uuid, updateTime)
+ changed = true
+ }
+ }
+ if wkr.state == StateUnknown || wkr.state == StateBooting {
+ wkr.state = StateIdle
+ changed = true
+ }
+ if changed {
+ wkr.running = running
+ if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
+ wkr.state = StateRunning
+ } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
+ wkr.state = StateIdle
+ }
+ wkr.updated = updateTime
+ go wkr.wp.notify()
+ }
+}
+
+func (wkr *worker) probeRunning() (running []string, ok bool, stderr []byte) {
+ cmd := "crunch-run --list"
+ stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+ if err != nil {
+ wkr.logger.WithFields(logrus.Fields{
+ "Command": cmd,
+ "stdout": string(stdout),
+ "stderr": string(stderr),
+ }).WithError(err).Warn("probe failed")
+ return nil, false, stderr
+ }
+ stdout = bytes.TrimRight(stdout, "\n")
+ if len(stdout) == 0 {
+ return nil, true, stderr
+ }
+ return strings.Split(string(stdout), "\n"), true, stderr
+}
+
+func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
+ cmd := wkr.wp.bootProbeCommand
+ if cmd == "" {
+ cmd = "true"
+ }
+ stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+ logger := wkr.logger.WithFields(logrus.Fields{
+ "Command": cmd,
+ "stdout": string(stdout),
+ "stderr": string(stderr),
+ })
+ if err != nil {
+ logger.WithError(err).Debug("boot probe failed")
+ return false, stderr
+ }
+ logger.Info("boot probe succeeded")
+ return true, stderr
+}
+
+// caller must have lock.
+func (wkr *worker) shutdownIfBroken(dur time.Duration) {
+ if wkr.state == StateHold {
+ return
+ }
+ label, threshold := "", wkr.wp.timeoutProbe
+ if wkr.state == StateBooting {
+ label, threshold = "new ", wkr.wp.timeoutBooting
+ }
+ if dur < threshold {
+ return
+ }
+ wkr.logger.WithFields(logrus.Fields{
+ "Duration": dur,
+ "Since": wkr.probed,
+ "State": wkr.state,
+ }).Warnf("%sinstance unresponsive, shutting down", label)
+ wkr.shutdown()
+}
+
+// caller must have lock.
+func (wkr *worker) shutdownIfIdle() bool {
+ if wkr.state != StateIdle {
+ return false
+ }
+ age := time.Since(wkr.busy)
+ if age < wkr.wp.timeoutIdle {
+ return false
+ }
+ wkr.logger.WithField("Age", age).Info("shutdown idle worker")
+ wkr.shutdown()
+ return true
+}
+
+// caller must have lock
+func (wkr *worker) shutdown() {
+ wkr.updated = time.Now()
+ wkr.state = StateShutdown
+ go func() {
+ err := wkr.instance.Destroy()
+ if err != nil {
+ wkr.logger.WithError(err).Warn("shutdown failed")
+ return
+ }
+ }()
+}
commit 93671366e7633cbf0ca3cab68395e211e3afc31c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Nov 6 17:01:06 2018 -0500
14360: Note race in docs, fix flaky test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 4e331256f..45e9da37a 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -185,6 +185,11 @@ func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
// Unallocated returns the number of unallocated (creating + booting +
// idle + unknown) workers for each instance type.
+//
+// The returned counts should be interpreted as upper bounds, rather
+// than exact counts: they are sometimes artificially high when a
+// newly created instance appears in the driver's Instances() list
+// before the Create() call returns.
func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
wp.setupOnce.Do(wp.setup)
wp.mtx.RLock()
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 790ee851e..f8667e06c 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -19,6 +19,16 @@ const GiB arvados.ByteSize = 1 << 30
var _ = check.Suite(&PoolSuite{})
+type lessChecker struct {
+ *check.CheckerInfo
+}
+
+func (*lessChecker) Check(params []interface{}, names []string) (result bool, error string) {
+ return params[0].(int) < params[1].(int), ""
+}
+
+var less = &lessChecker{&check.CheckerInfo{Name: "less", Params: []string{"obtained", "expected"}}}
+
type PoolSuite struct{}
func (suite *PoolSuite) SetUpSuite(c *check.C) {
@@ -69,8 +79,13 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
c.Check(pool.Unallocated()[type1], check.Equals, 1)
c.Check(pool.Unallocated()[type2], check.Equals, 2)
pool.getInstancesAndSync()
- c.Check(pool.Unallocated()[type1], check.Equals, 1)
- c.Check(pool.Unallocated()[type2], check.Equals, 2)
+ // Returned counts can be temporarily higher than 1 and 2 if
+ // poll ran before Create() returned.
+ c.Check(pool.Unallocated()[type1], check.Not(less), 1)
+ c.Check(pool.Unallocated()[type2], check.Not(less), 2)
+ suite.wait(c, pool, notify, func() bool {
+ return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 2
+ })
c.Check(pool.Shutdown(type2), check.Equals, true)
suite.wait(c, pool, notify, func() bool {
commit d6bf3b66c2c938ca2ce2bda3276c0c0f4e54f1b6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Nov 6 16:17:58 2018 -0500
14360: Add "idle" state.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index e5e8c7ecf..b8a03275f 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -75,7 +75,8 @@ func (p *stubPool) Shutdown(arvados.InstanceType) bool {
func (p *stubPool) Workers() map[worker.State]int {
return map[worker.State]int{
worker.StateBooting: len(p.unalloc) - len(p.idle),
- worker.StateRunning: len(p.idle) - len(p.running),
+ worker.StateIdle: len(p.idle),
+ worker.StateRunning: len(p.running),
}
}
func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index b1b570bd9..4e331256f 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -148,8 +148,8 @@ type worker struct {
updated time.Time
busy time.Time
lastUUID string
- running map[string]struct{}
- starting map[string]struct{}
+ running map[string]struct{} // remember to update state idle<->running when this changes
+ starting map[string]struct{} // remember to update state idle<->running when this changes
probing chan struct{}
}
@@ -194,7 +194,7 @@ func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
u[it] = c
}
for _, wkr := range wp.workers {
- if len(wkr.running)+len(wkr.starting) == 0 && (wkr.state == StateRunning || wkr.state == StateBooting || wkr.state == StateUnknown) {
+ if wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown {
u[wkr.instType]++
}
}
@@ -288,21 +288,16 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
defer wp.mtx.Unlock()
logger := wp.logger.WithField("InstanceType", it.Name)
logger.Info("shutdown requested")
- for _, tryState := range []State{StateBooting, StateRunning} {
+ for _, tryState := range []State{StateBooting, StateIdle} {
// TODO: shutdown the worker with the longest idle
- // time (Running) or the earliest create time
- // (Booting)
+ // time (Idle) or the earliest create time (Booting)
for _, wkr := range wp.workers {
- if wkr.state != tryState || len(wkr.running)+len(wkr.starting) > 0 {
- continue
- }
- if wkr.instType != it {
- continue
+ if wkr.state == tryState && wkr.instType == it {
+ logger = logger.WithField("Instance", wkr.instance)
+ logger.Info("shutting down")
+ wp.shutdown(wkr, logger)
+ return true
}
- logger = logger.WithField("Instance", wkr.instance)
- logger.Info("shutting down")
- wp.shutdown(wkr, logger)
- return true
}
}
return false
@@ -370,7 +365,7 @@ func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) b
defer wp.mtx.Unlock()
var wkr *worker
for _, w := range wp.workers {
- if w.instType == it && w.state == StateRunning && len(w.running)+len(w.starting) == 0 {
+ if w.instType == it && w.state == StateIdle {
if wkr == nil || w.busy.After(wkr.busy) {
wkr = w
}
@@ -382,6 +377,7 @@ func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) b
logger = logger.WithField("Instance", wkr.instance)
logger.Debug("starting container")
wkr.starting[ctr.UUID] = struct{}{}
+ wkr.state = StateRunning
go func() {
stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
wp.mtx.Lock()
@@ -451,6 +447,9 @@ func (wp *Pool) kill(wkr *worker, uuid string) {
defer wp.mtx.Unlock()
if _, ok := wkr.running[uuid]; ok {
delete(wkr.running, uuid)
+ if wkr.state == StateRunning && len(wkr.running)+len(wkr.starting) == 0 {
+ wkr.state = StateIdle
+ }
wkr.updated = time.Now()
go wp.notify()
}
@@ -628,7 +627,7 @@ func (wp *Pool) shutdownIfBroken(wkr *worker, dur time.Duration) {
// caller must have lock.
func (wp *Pool) shutdownIfIdle(wkr *worker) bool {
- if len(wkr.running)+len(wkr.starting) > 0 || wkr.state != StateRunning {
+ if wkr.state != StateIdle {
return false
}
age := time.Since(wkr.busy)
@@ -755,7 +754,7 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
logger := wp.logger.WithField("Instance", wkr.instance)
wp.mtx.Lock()
updated := wkr.updated
- needProbeRunning := wkr.state == StateRunning
+ needProbeRunning := wkr.state == StateRunning || wkr.state == StateIdle
needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
wp.mtx.Unlock()
if !needProbeBooted && !needProbeRunning {
@@ -770,13 +769,10 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
if needProbeBooted {
ok, stderr = wp.probeBooted(wkr)
wp.mtx.Lock()
- if ok && (wkr.state == StateUnknown || wkr.state == StateBooting) {
- wkr.state = StateRunning
- wkr.probed = time.Now()
- logger.Info("instance booted")
- go wp.notify()
+ if ok || wkr.state == StateRunning || wkr.state == StateIdle {
+ logger.Info("instance booted; will try probeRunning")
+ needProbeRunning = true
}
- needProbeRunning = wkr.state == StateRunning
wp.mtx.Unlock()
}
if needProbeRunning {
@@ -786,7 +782,7 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
wp.mtx.Lock()
defer wp.mtx.Unlock()
if !ok {
- if wkr.state == StateShutdown {
+ if wkr.state == StateShutdown && wkr.updated.After(updated) {
// Skip the logging noise if shutdown was
// initiated during probe.
return
@@ -841,8 +837,17 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
changed = true
}
}
+ if wkr.state == StateUnknown || wkr.state == StateBooting {
+ wkr.state = StateIdle
+ changed = true
+ }
if changed {
wkr.running = running
+ if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
+ wkr.state = StateRunning
+ } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
+ wkr.state = StateIdle
+ }
wkr.updated = updateTime
go wp.notify()
}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 7828d4f69..c24efeb3c 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -15,7 +15,8 @@ type State int
const (
StateUnknown State = iota // might be running a container already
StateBooting // instance is booting
- StateRunning // instance is running
+ StateIdle // instance booted, no containers are running
+ StateRunning // instance is running one or more containers
StateShutdown // worker has stopped monitoring the instance
StateHold // running, but not available to run new containers
)
@@ -28,6 +29,7 @@ const (
var stateString = map[State]string{
StateUnknown: "unknown",
StateBooting: "booting",
+ StateIdle: "idle",
StateRunning: "running",
StateShutdown: "shutdown",
StateHold: "hold",
commit 49f46db107c8226d70ed6d489cd000bfba82ca2f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 16:37:31 2018 -0400
14360: Fix concurrent map access in tests.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go
index 0bad5a391..152094f52 100644
--- a/lib/dispatchcloud/test/queue.go
+++ b/lib/dispatchcloud/test/queue.go
@@ -61,14 +61,20 @@ func (q *Queue) Forget(uuid string) {
}
func (q *Queue) Lock(uuid string) error {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
return q.changeState(uuid, arvados.ContainerStateQueued, arvados.ContainerStateLocked)
}
func (q *Queue) Unlock(uuid string) error {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
return q.changeState(uuid, arvados.ContainerStateLocked, arvados.ContainerStateQueued)
}
func (q *Queue) Cancel(uuid string) error {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
return q.changeState(uuid, q.entries[uuid].Container.State, arvados.ContainerStateCancelled)
}
@@ -98,9 +104,8 @@ func (q *Queue) notify() {
}
}
+// caller must have lock.
func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
- q.mtx.Lock()
- defer q.mtx.Unlock()
ent := q.entries[uuid]
if ent.Container.State != from {
return fmt.Errorf("lock failed: state=%q", ent.Container.State)
commit 3cb857be0aad14d0ebd631305a7a5c0f847811d4
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 16:35:34 2018 -0400
14360: Move slurm-only code to crunch-dispatch-slurm.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go
index 3bada3baf..e77c862b3 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/lib/dispatchcloud/node_size.go
@@ -6,11 +6,7 @@ package dispatchcloud
import (
"errors"
- "log"
- "os/exec"
"sort"
- "strings"
- "time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
)
@@ -78,61 +74,3 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
}
return
}
-
-// SlurmNodeTypeFeatureKludge ensures SLURM accepts every instance
-// type name as a valid feature name, even if no instances of that
-// type have appeared yet.
-//
-// It takes advantage of some SLURM peculiarities:
-//
-// (1) A feature is valid after it has been offered by a node, even if
-// it is no longer offered by any node. So, to make a feature name
-// valid, we can add it to a dummy node ("compute0"), then remove it.
-//
-// (2) To test whether a set of feature names are valid without
-// actually submitting a job, we can call srun --test-only with the
-// desired features.
-//
-// SlurmNodeTypeFeatureKludge does a test-and-fix operation
-// immediately, and then periodically, in case slurm restarts and
-// forgets the list of valid features. It never returns (unless there
-// are no node types configured, in which case it returns
-// immediately), so it should generally be invoked with "go".
-func SlurmNodeTypeFeatureKludge(cc *arvados.Cluster) {
- if len(cc.InstanceTypes) == 0 {
- return
- }
- var features []string
- for _, it := range cc.InstanceTypes {
- features = append(features, "instancetype="+it.Name)
- }
- for {
- slurmKludge(features)
- time.Sleep(2 * time.Second)
- }
-}
-
-const slurmDummyNode = "compute0"
-
-func slurmKludge(features []string) {
- allFeatures := strings.Join(features, ",")
-
- cmd := exec.Command("sinfo", "--nodes="+slurmDummyNode, "--format=%f", "--noheader")
- out, err := cmd.CombinedOutput()
- if err != nil {
- log.Printf("running %q %q: %s (output was %q)", cmd.Path, cmd.Args, err, out)
- return
- }
- if string(out) == allFeatures+"\n" {
- // Already configured correctly, nothing to do.
- return
- }
-
- log.Printf("configuring node %q with all node type features", slurmDummyNode)
- cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+allFeatures)
- log.Printf("running: %q %q", cmd.Path, cmd.Args)
- out, err = cmd.CombinedOutput()
- if err != nil {
- log.Printf("error: scontrol: %s (output was %q)", err, out)
- }
-}
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 084700d39..30e88bf92 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -197,7 +197,7 @@ func (disp *Dispatcher) run() error {
defer disp.sqCheck.Stop()
if disp.cluster != nil && len(disp.cluster.InstanceTypes) > 0 {
- go dispatchcloud.SlurmNodeTypeFeatureKludge(disp.cluster)
+ go SlurmNodeTypeFeatureKludge(disp.cluster)
}
if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
diff --git a/lib/dispatchcloud/node_size.go b/services/crunch-dispatch-slurm/node_type.go
similarity index 53%
copy from lib/dispatchcloud/node_size.go
copy to services/crunch-dispatch-slurm/node_type.go
index 3bada3baf..62a96932f 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/services/crunch-dispatch-slurm/node_type.go
@@ -2,83 +2,17 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package dispatchcloud
+package main
import (
- "errors"
"log"
"os/exec"
- "sort"
"strings"
"time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
)
-var ErrInstanceTypesNotConfigured = errors.New("site configuration does not list any instance types")
-
-var discountConfiguredRAMPercent = 5
-
-// ConstraintsNotSatisfiableError includes a list of available instance types
-// to be reported back to the user.
-type ConstraintsNotSatisfiableError struct {
- error
- AvailableTypes []arvados.InstanceType
-}
-
-// ChooseInstanceType returns the cheapest available
-// arvados.InstanceType big enough to run ctr.
-func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvados.InstanceType, err error) {
- if len(cc.InstanceTypes) == 0 {
- err = ErrInstanceTypesNotConfigured
- return
- }
-
- needScratch := int64(0)
- for _, m := range ctr.Mounts {
- if m.Kind == "tmp" {
- needScratch += m.Capacity
- }
- }
-
- needVCPUs := ctr.RuntimeConstraints.VCPUs
-
- needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM
- needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent)
-
- ok := false
- for _, it := range cc.InstanceTypes {
- switch {
- case ok && it.Price > best.Price:
- case int64(it.Scratch) < needScratch:
- case int64(it.RAM) < needRAM:
- case it.VCPUs < needVCPUs:
- case it.Preemptible != ctr.SchedulingParameters.Preemptible:
- case it.Price == best.Price && (it.RAM < best.RAM || it.VCPUs < best.VCPUs):
- // Equal price, but worse specs
- default:
- // Lower price || (same price && better specs)
- best = it
- ok = true
- }
- }
- if !ok {
- availableTypes := make([]arvados.InstanceType, 0, len(cc.InstanceTypes))
- for _, t := range cc.InstanceTypes {
- availableTypes = append(availableTypes, t)
- }
- sort.Slice(availableTypes, func(a, b int) bool {
- return availableTypes[a].Price < availableTypes[b].Price
- })
- err = ConstraintsNotSatisfiableError{
- errors.New("constraints not satisfiable by any configured instance type"),
- availableTypes,
- }
- return
- }
- return
-}
-
// SlurmNodeTypeFeatureKludge ensures SLURM accepts every instance
// type name as a valid feature name, even if no instances of that
// type have appeared yet.
commit 3654f222637b00728158300e700d615c4c913b29
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 16:12:11 2018 -0400
14360: Fail Create calls fast during quota quarantine.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 68174e6b7..b1b570bd9 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -124,6 +124,7 @@ type Pool struct {
loaded bool // loaded list of instances from InstanceSet at least once
exited map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
atQuotaUntil time.Time
+ atQuotaErr cloud.QuotaError
stop chan bool
mtx sync.RWMutex
setupOnce sync.Once
@@ -208,6 +209,9 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
wp.setupOnce.Do(wp.setup)
wp.mtx.Lock()
defer wp.mtx.Unlock()
+ if time.Now().Before(wp.atQuotaUntil) {
+ return wp.atQuotaErr
+ }
tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
wp.creating[it]++
go func() {
@@ -217,6 +221,7 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
defer wp.mtx.Unlock()
wp.creating[it]--
if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
+ wp.atQuotaErr = err
wp.atQuotaUntil = time.Now().Add(time.Minute)
}
if err != nil {
commit 1c1e245bc3163a7841859d5ff75d0cff59dfc90c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 16:08:47 2018 -0400
14360: Remove redundant wp.unallocated field.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 4aa4f089a..68174e6b7 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -30,7 +30,7 @@ type InstanceView struct {
ArvadosInstanceType string
ProviderInstanceType string
LastContainerUUID string
- Unallocated time.Time
+ LastBusy time.Time
WorkerState string
}
@@ -137,20 +137,19 @@ type Pool struct {
}
type worker struct {
- state State
- instance cloud.Instance
- executor Executor
- instType arvados.InstanceType
- vcpus int64
- memory int64
- probed time.Time
- updated time.Time
- busy time.Time
- unallocated time.Time
- lastUUID string
- running map[string]struct{}
- starting map[string]struct{}
- probing chan struct{}
+ state State
+ instance cloud.Instance
+ executor Executor
+ instType arvados.InstanceType
+ vcpus int64
+ memory int64
+ probed time.Time
+ updated time.Time
+ busy time.Time
+ lastUUID string
+ running map[string]struct{}
+ starting map[string]struct{}
+ probing chan struct{}
}
// Subscribe returns a channel that becomes ready whenever a worker's
@@ -262,17 +261,16 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
}).Infof("instance appeared in cloud")
now := time.Now()
wp.workers[id] = &worker{
- executor: wp.newExecutor(inst),
- state: initialState,
- instance: inst,
- instType: it,
- probed: now,
- busy: now,
- updated: now,
- unallocated: now,
- running: make(map[string]struct{}),
- starting: make(map[string]struct{}),
- probing: make(chan struct{}, 1),
+ executor: wp.newExecutor(inst),
+ state: initialState,
+ instance: inst,
+ instType: it,
+ probed: now,
+ busy: now,
+ updated: now,
+ running: make(map[string]struct{}),
+ starting: make(map[string]struct{}),
+ probing: make(chan struct{}, 1),
}
return true
}
@@ -628,7 +626,7 @@ func (wp *Pool) shutdownIfIdle(wkr *worker) bool {
if len(wkr.running)+len(wkr.starting) > 0 || wkr.state != StateRunning {
return false
}
- age := time.Since(wkr.unallocated)
+ age := time.Since(wkr.busy)
if age < wp.timeoutIdle {
return false
}
@@ -660,7 +658,7 @@ func (wp *Pool) Instances() []InstanceView {
ArvadosInstanceType: w.instType.Name,
ProviderInstanceType: w.instType.ProviderType,
LastContainerUUID: w.lastUUID,
- Unallocated: w.unallocated,
+ LastBusy: w.busy,
WorkerState: w.state.String(),
})
}
@@ -818,7 +816,10 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
wkr.busy = updateTime
wkr.lastUUID = ctrUUIDs[0]
} else if len(wkr.running) > 0 {
- wkr.unallocated = updateTime
+ // Actual last-busy time was sometime between wkr.busy
+ // and now. Now is the earliest opportunity to take
+ // advantage of the non-busy state, though.
+ wkr.busy = updateTime
}
running := map[string]struct{}{}
changed := false
commit f33f8084159f701b8a1435e8cf77b7ab7f3417ae
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 15:35:07 2018 -0400
14360: Remove redundant wkr.booted flag.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 1e759e38f..4aa4f089a 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -143,7 +143,6 @@ type worker struct {
instType arvados.InstanceType
vcpus int64
memory int64
- booted bool
probed time.Time
updated time.Time
busy time.Time
@@ -753,26 +752,31 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
logger := wp.logger.WithField("Instance", wkr.instance)
wp.mtx.Lock()
updated := wkr.updated
- booted := wkr.booted
+ needProbeRunning := wkr.state == StateRunning
+ needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
wp.mtx.Unlock()
+ if !needProbeBooted && !needProbeRunning {
+ return
+ }
var (
ctrUUIDs []string
ok bool
stderr []byte
)
- if !booted {
- booted, stderr = wp.probeBooted(wkr)
+ if needProbeBooted {
+ ok, stderr = wp.probeBooted(wkr)
wp.mtx.Lock()
- if booted && !wkr.booted {
- wkr.booted = booted
+ if ok && (wkr.state == StateUnknown || wkr.state == StateBooting) {
+ wkr.state = StateRunning
+ wkr.probed = time.Now()
logger.Info("instance booted")
- } else {
- booted = wkr.booted
+ go wp.notify()
}
+ needProbeRunning = wkr.state == StateRunning
wp.mtx.Unlock()
}
- if booted {
+ if needProbeRunning {
ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
}
logger = logger.WithField("stderr", string(stderr))
@@ -780,6 +784,8 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
defer wp.mtx.Unlock()
if !ok {
if wkr.state == StateShutdown {
+ // Skip the logging noise if shutdown was
+ // initiated during probe.
return
}
dur := time.Since(wkr.probed)
@@ -798,15 +804,6 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
updateTime := time.Now()
wkr.probed = updateTime
- if wkr.state == StateShutdown || wkr.state == StateHold {
- } else if booted {
- if wkr.state != StateRunning {
- wkr.state = StateRunning
- go wp.notify()
- }
- } else {
- wkr.state = StateBooting
- }
if updated != wkr.updated {
// Worker was updated after the probe began, so
commit e5e2dc5e6bf1bce9893517470632295d42bb5f1c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 14:32:24 2018 -0400
14360: Ensure signals don't pass to child in startup race.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index 57f770ae1..bd02db430 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -65,7 +65,12 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
cmd := exec.Command(args[0], append([]string{"-detached"}, args[1:]...)...)
cmd.Stdout = outfile
cmd.Stderr = errfile
+ // Child inherits lockfile.
cmd.ExtraFiles = []*os.File{lockfile}
+ // Ensure child isn't interrupted even if we receive signals
+ // from parent (sshd) while sending lockfile content to
+ // caller.
+ cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
err = cmd.Start()
if err != nil {
os.Remove(outfile.Name())
commit 359dc0dba7e93744c583b431d428de62fb8e0c90
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 14:18:35 2018 -0400
14360: Fix mismatched flag name, -nodetach -> -detached.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index 1f98d3791..57f770ae1 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -32,7 +32,7 @@ type procinfo struct {
}
// Detach acquires a lock for the given uuid, and starts the current
-// program as a child process (with -nodetach prepended to the given
+// program as a child process (with -detached prepended to the given
// arguments so the child knows not to detach again). The lock is
// passed along to the child process.
func Detach(uuid string, args []string, stdout, stderr io.Writer) int {
@@ -62,7 +62,7 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
}
defer errfile.Close()
- cmd := exec.Command(args[0], append([]string{"-nodetach"}, args[1:]...)...)
+ cmd := exec.Command(args[0], append([]string{"-detached"}, args[1:]...)...)
cmd.Stdout = outfile
cmd.Stderr = errfile
cmd.ExtraFiles = []*os.File{lockfile}
commit 17479bd75a29c52470abe0049cb447e114eb39e9
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Oct 30 13:50:30 2018 -0400
14360: Encapsulate scheduler object.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index e0e1cd0cd..432f4d488 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -69,6 +69,9 @@ type Queue struct {
// When no network update is in progress, this protection is
// not needed, and dontupdate is nil.
dontupdate map[string]struct{}
+
+ // active notification subscribers (see Subscribe)
+ subscribers map[<-chan struct{}]chan struct{}
}
// NewQueue returns a new Queue. When a new container appears in the
@@ -76,11 +79,46 @@ type Queue struct {
// assign an appropriate arvados.InstanceType for the queue entry.
func NewQueue(logger logrus.FieldLogger, reg *prometheus.Registry, chooseType typeChooser, client APIClient) *Queue {
return &Queue{
- logger: logger,
- reg: reg,
- chooseType: chooseType,
- client: client,
- current: map[string]QueueEnt{},
+ logger: logger,
+ reg: reg,
+ chooseType: chooseType,
+ client: client,
+ current: map[string]QueueEnt{},
+ subscribers: map[<-chan struct{}]chan struct{}{},
+ }
+}
+
+// Subscribe returns a channel that becomes ready to receive when an
+// entry in the Queue is updated.
+//
+// ch := q.Subscribe()
+// defer q.Unsubscribe(ch)
+// for range ch {
+// // ...
+// }
+func (cq *Queue) Subscribe() <-chan struct{} {
+ cq.mtx.Lock()
+ defer cq.mtx.Unlock()
+ ch := make(chan struct{}, 1)
+ cq.subscribers[ch] = ch
+ return ch
+}
+
+// Unsubscribe stops sending updates to the given channel. See
+// Subscribe.
+func (cq *Queue) Unsubscribe(ch <-chan struct{}) {
+ cq.mtx.Lock()
+ defer cq.mtx.Unlock()
+ delete(cq.subscribers, ch)
+}
+
+// Caller must have lock.
+func (cq *Queue) notify() {
+ for _, ch := range cq.subscribers {
+ select {
+ case ch <- struct{}{}:
+ default:
+ }
}
}
@@ -167,6 +205,7 @@ func (cq *Queue) Update() error {
}
cq.dontupdate = nil
cq.updated = updateStarted
+ cq.notify()
return nil
}
@@ -192,9 +231,16 @@ func (cq *Queue) Unlock(uuid string) error {
// Cancel cancels the given container.
func (cq *Queue) Cancel(uuid string) error {
- return cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
+ err := cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
"container": {"state": arvados.ContainerStateCancelled},
})
+ if err != nil {
+ return err
+ }
+ cq.mtx.Lock()
+ defer cq.mtx.Unlock()
+ cq.notify()
+ return nil
}
func (cq *Queue) apiUpdate(uuid, action string) error {
@@ -215,6 +261,7 @@ func (cq *Queue) apiUpdate(uuid, action string) error {
ent.Container.State, ent.Container.Priority, ent.Container.LockedByUUID = resp.State, resp.Priority, resp.LockedByUUID
cq.current[uuid] = ent
}
+ cq.notify()
return nil
}
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index c441dc6fa..97aacf604 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -28,14 +28,10 @@ import (
)
const (
- defaultPollInterval = time.Second
+ defaultPollInterval = time.Second
+ defaultStaleLockTimeout = time.Minute
)
-type containerQueue interface {
- scheduler.ContainerQueue
- Update() error
-}
-
type pool interface {
scheduler.WorkerPool
Instances() []worker.InstanceView
@@ -45,14 +41,13 @@ type dispatcher struct {
Cluster *arvados.Cluster
InstanceSetID cloud.InstanceSetID
- logger logrus.FieldLogger
- reg *prometheus.Registry
- instanceSet cloud.InstanceSet
- pool pool
- queue containerQueue
- httpHandler http.Handler
- pollInterval time.Duration
- sshKey ssh.Signer
+ logger logrus.FieldLogger
+ reg *prometheus.Registry
+ instanceSet cloud.InstanceSet
+ pool pool
+ queue scheduler.ContainerQueue
+ httpHandler http.Handler
+ sshKey ssh.Signer
setupOnce sync.Once
stop chan struct{}
@@ -140,39 +135,24 @@ func (disp *dispatcher) initialize() {
mux.Handle("/metrics", metricsH)
mux.Handle("/metrics.json", metricsH)
disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
-
- if d := disp.Cluster.Dispatch.PollInterval; d > 0 {
- disp.pollInterval = time.Duration(d)
- } else {
- disp.pollInterval = defaultPollInterval
- }
}
func (disp *dispatcher) run() {
defer disp.instanceSet.Stop()
- t0 := time.Now()
- disp.logger.Infof("FixStaleLocks starting.")
- scheduler.FixStaleLocks(disp.logger, disp.queue, disp.pool, time.Duration(disp.Cluster.Dispatch.StaleLockTimeout))
- disp.logger.Infof("FixStaleLocks finished (%s), starting scheduling.", time.Since(t0))
-
- wp := disp.pool.Subscribe()
- defer disp.pool.Unsubscribe(wp)
- poll := time.NewTicker(disp.pollInterval)
- for {
- scheduler.Map(disp.logger, disp.queue, disp.pool)
- scheduler.Sync(disp.logger, disp.queue, disp.pool)
- select {
- case <-disp.stop:
- return
- case <-wp:
- case <-poll.C:
- err := disp.queue.Update()
- if err != nil {
- disp.logger.Errorf("error updating queue: %s", err)
- }
- }
+ staleLockTimeout := time.Duration(disp.Cluster.Dispatch.StaleLockTimeout)
+ if staleLockTimeout == 0 {
+ staleLockTimeout = defaultStaleLockTimeout
}
+ pollInterval := time.Duration(disp.Cluster.Dispatch.PollInterval)
+ if pollInterval <= 0 {
+ pollInterval = defaultPollInterval
+ }
+ sched := scheduler.New(disp.logger, disp.queue, disp.pool, staleLockTimeout, pollInterval)
+ sched.Start()
+ defer sched.Stop()
+
+ <-disp.stop
}
// Management API: all active and queued containers.
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 362fb4942..248986bf3 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -199,6 +199,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
PrivateKey: dispatchprivraw,
PollInterval: arvados.Duration(5 * time.Millisecond),
ProbeInterval: arvados.Duration(5 * time.Millisecond),
+ StaleLockTimeout: arvados.Duration(5 * time.Millisecond),
MaxProbesPerSecond: 1000,
},
InstanceTypes: arvados.InstanceTypeMap{
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
index e9644aed2..985941090 100644
--- a/lib/dispatchcloud/scheduler/fix_stale_locks.go
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -9,17 +9,17 @@ import (
"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
"git.curoverse.com/arvados.git/sdk/go/arvados"
- "github.com/Sirupsen/logrus"
)
-// FixStaleLocks waits for any already-locked containers (i.e., locked
+// fixStaleLocks waits for any already-locked containers (i.e., locked
// by a prior dispatcher process) to appear on workers as the worker
// pool recovers its state. It unlocks any that still remain when all
-// workers are recovered or shutdown, or its timer expires.
-func FixStaleLocks(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, limit time.Duration) {
- wp := pool.Subscribe()
- defer pool.Unsubscribe(wp)
- timeout := time.NewTimer(limit)
+// workers are recovered or shutdown, or its timer
+// (sch.staleLockTimeout) expires.
+func (sch *Scheduler) fixStaleLocks() {
+ wp := sch.pool.Subscribe()
+ defer sch.pool.Unsubscribe(wp)
+ timeout := time.NewTimer(sch.staleLockTimeout)
waiting:
for {
unlock := false
@@ -28,15 +28,15 @@ waiting:
// If all workers have been contacted, unlock
// containers that aren't claimed by any
// worker.
- unlock = pool.Workers()[worker.StateUnknown] == 0
+ unlock = sch.pool.Workers()[worker.StateUnknown] == 0
case <-timeout.C:
// Give up and unlock the containers, even
// though they might be working.
unlock = true
}
- running := pool.Running()
- qEntries, _ := queue.Entries()
+ running := sch.pool.Running()
+ qEntries, _ := sch.queue.Entries()
for uuid, ent := range qEntries {
if ent.Container.State != arvados.ContainerStateLocked {
continue
@@ -47,9 +47,9 @@ waiting:
if !unlock {
continue waiting
}
- err := queue.Unlock(uuid)
+ err := sch.queue.Unlock(uuid)
if err != nil {
- logger.Warnf("Unlock %s: %s", uuid, err)
+ sch.logger.Warnf("Unlock %s: %s", uuid, err)
}
}
return
diff --git a/lib/dispatchcloud/scheduler/interfaces.go b/lib/dispatchcloud/scheduler/interfaces.go
index bdb8678e9..467247ad9 100644
--- a/lib/dispatchcloud/scheduler/interfaces.go
+++ b/lib/dispatchcloud/scheduler/interfaces.go
@@ -21,6 +21,9 @@ type ContainerQueue interface {
Cancel(uuid string) error
Forget(uuid string)
Get(uuid string) (arvados.Container, bool)
+ Subscribe() <-chan struct{}
+ Unsubscribe(<-chan struct{})
+ Update() error
}
// A WorkerPool asynchronously starts and stops worker VMs, and starts
diff --git a/lib/dispatchcloud/scheduler/map.go b/lib/dispatchcloud/scheduler/run_queue.go
similarity index 65%
rename from lib/dispatchcloud/scheduler/map.go
rename to lib/dispatchcloud/scheduler/run_queue.go
index aa92d0433..9fc1a1658 100644
--- a/lib/dispatchcloud/scheduler/map.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -2,11 +2,6 @@
//
// SPDX-License-Identifier: AGPL-3.0
-// Package scheduler uses a resizable worker pool to execute
-// containers in priority order.
-//
-// Scheduler functions must not be called concurrently using the same
-// queue or pool.
package scheduler
import (
@@ -18,28 +13,8 @@ import (
"github.com/Sirupsen/logrus"
)
-// Map maps queued containers onto unallocated workers in priority
-// order, creating new workers if needed. It locks containers that can
-// be mapped onto existing/pending workers, and starts them if
-// possible.
-//
-// Map unlocks any containers that are locked but can't be
-// mapped. (For example, this happens when the cloud provider reaches
-// quota/capacity and a previously mappable container's priority is
-// surpassed by a newer container.)
-//
-// If it encounters errors while creating new workers, Map shuts down
-// idle workers, in case they are consuming quota.
-//
-// Map should not be called without first calling FixStaleLocks.
-//
-// FixStaleLocks()
-// for {
-// Map()
-// Sync()
-// }
-func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
- unsorted, _ := queue.Entries()
+func (sch *Scheduler) runQueue() {
+ unsorted, _ := sch.queue.Entries()
sorted := make([]container.QueueEnt, 0, len(unsorted))
for _, ent := range unsorted {
sorted = append(sorted, ent)
@@ -48,20 +23,20 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
return sorted[i].Container.Priority > sorted[j].Container.Priority
})
- running := pool.Running()
- unalloc := pool.Unallocated()
+ running := sch.pool.Running()
+ unalloc := sch.pool.Unallocated()
- logger.WithFields(logrus.Fields{
+ sch.logger.WithFields(logrus.Fields{
"Containers": len(sorted),
"Processes": len(running),
- }).Debug("mapping")
+ }).Debug("runQueue")
dontstart := map[arvados.InstanceType]bool{}
var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota
for i, ctr := range sorted {
ctr, it := ctr.Container, ctr.InstanceType
- logger := logger.WithFields(logrus.Fields{
+ logger := sch.logger.WithFields(logrus.Fields{
"ContainerUUID": ctr.UUID,
"InstanceType": it.Name,
})
@@ -69,19 +44,20 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
continue
}
if ctr.State == arvados.ContainerStateQueued {
- logger.Debugf("locking")
- if unalloc[it] < 1 && pool.AtQuota() {
+ if unalloc[it] < 1 && sch.pool.AtQuota() {
+ logger.Debugf("not locking: AtQuota and no unalloc workers")
overquota = sorted[i:]
break
}
- err := queue.Lock(ctr.UUID)
+ logger.Debugf("locking")
+ err := sch.queue.Lock(ctr.UUID)
if err != nil {
logger.WithError(err).Warnf("lock error")
unalloc[it]++
continue
}
var ok bool
- ctr, ok = queue.Get(ctr.UUID)
+ ctr, ok = sch.queue.Get(ctr.UUID)
if !ok {
logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
continue
@@ -95,12 +71,12 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
}
if unalloc[it] < 1 {
logger.Info("creating new instance")
- err := pool.Create(it)
+ err := sch.pool.Create(it)
if err != nil {
if _, ok := err.(cloud.QuotaError); !ok {
logger.WithError(err).Warn("error creating worker")
}
- queue.Unlock(ctr.UUID)
+ sch.queue.Unlock(ctr.UUID)
// Don't let lower-priority containers
// starve this one by using keeping
// idle workers alive on different
@@ -117,7 +93,7 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
// higher-priority container on the same
// instance type. Don't let this one sneak in
// ahead of it.
- } else if pool.StartContainer(it, ctr) {
+ } else if sch.pool.StartContainer(it, ctr) {
unalloc[it]--
} else {
dontstart[it] = true
@@ -130,9 +106,9 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
for _, ctr := range overquota {
ctr := ctr.Container
if ctr.State == arvados.ContainerStateLocked {
- logger := logger.WithField("ContainerUUID", ctr.UUID)
+ logger := sch.logger.WithField("ContainerUUID", ctr.UUID)
logger.Debug("unlock because pool capacity is used by higher priority containers")
- err := queue.Unlock(ctr.UUID)
+ err := sch.queue.Unlock(ctr.UUID)
if err != nil {
logger.WithError(err).Warn("error unlocking")
}
@@ -144,7 +120,7 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
if n < 1 {
continue
}
- pool.Shutdown(it)
+ sch.pool.Shutdown(it)
}
}
}
diff --git a/lib/dispatchcloud/scheduler/map_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
similarity index 53%
rename from lib/dispatchcloud/scheduler/map_test.go
rename to lib/dispatchcloud/scheduler/run_queue_test.go
index f30520053..e5e8c7ecf 100644
--- a/lib/dispatchcloud/scheduler/map_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -6,10 +6,8 @@ package scheduler
import (
"errors"
- "fmt"
"time"
- "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
"git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -20,14 +18,6 @@ import (
var (
logger = logrus.StandardLogger()
- // arbitrary example instance types
- types = func() (r []arvados.InstanceType) {
- for i := 0; i < 16; i++ {
- r = append(r, test.InstanceType(i))
- }
- return
- }()
-
// arbitrary example container UUIDs
uuids = func() (r []string) {
for i := 0; i < 16; i++ {
@@ -37,38 +27,6 @@ var (
}()
)
-type stubQueue struct {
- ents map[string]container.QueueEnt
-}
-
-func (q *stubQueue) Entries() (map[string]container.QueueEnt, time.Time) {
- return q.ents, time.Now()
-}
-func (q *stubQueue) Lock(uuid string) error {
- return q.setState(uuid, arvados.ContainerStateLocked)
-}
-func (q *stubQueue) Unlock(uuid string) error {
- return q.setState(uuid, arvados.ContainerStateQueued)
-}
-func (q *stubQueue) Cancel(uuid string) error {
- return q.setState(uuid, arvados.ContainerStateCancelled)
-}
-func (q *stubQueue) Forget(uuid string) {
-}
-func (q *stubQueue) Get(uuid string) (arvados.Container, bool) {
- ent, ok := q.ents[uuid]
- return ent.Container, ok
-}
-func (q *stubQueue) setState(uuid string, state arvados.ContainerState) error {
- ent, ok := q.ents[uuid]
- if !ok {
- return fmt.Errorf("no such ent: %q", uuid)
- }
- ent.Container.State = state
- q.ents[uuid] = ent
- return nil
-}
-
type stubQuotaError struct {
error
}
@@ -135,47 +93,71 @@ var _ = check.Suite(&SchedulerSuite{})
type SchedulerSuite struct{}
-// Map priority=4 container to idle node. Create a new instance for
+// Assign priority=4 container to idle node. Create a new instance for
// the priority=3 container. Don't try to start any priority<3
// containers because priority=3 container didn't start
// immediately. Don't try to create any other nodes after the failed
// create.
-func (*SchedulerSuite) TestMapIdle(c *check.C) {
- queue := stubQueue{
- ents: map[string]container.QueueEnt{
- uuids[1]: {
- Container: arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
+ queue := test.Queue{
+ ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+ },
+ Containers: []arvados.Container{
+ {
+ UUID: test.ContainerUUID(1),
+ Priority: 1,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[2]: {
- Container: arvados.Container{UUID: uuids[2], Priority: 2, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ {
+ UUID: test.ContainerUUID(2),
+ Priority: 2,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[3]: {
- Container: arvados.Container{UUID: uuids[3], Priority: 3, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ {
+ UUID: test.ContainerUUID(3),
+ Priority: 3,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[4]: {
- Container: arvados.Container{UUID: uuids[4], Priority: 4, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ {
+ UUID: test.ContainerUUID(4),
+ Priority: 4,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
},
}
+ queue.Update()
pool := stubPool{
unalloc: map[arvados.InstanceType]int{
- types[1]: 1,
- types[2]: 2,
+ test.InstanceType(1): 1,
+ test.InstanceType(2): 2,
},
idle: map[arvados.InstanceType]int{
- types[1]: 1,
- types[2]: 2,
+ test.InstanceType(1): 1,
+ test.InstanceType(2): 2,
},
running: map[string]time.Time{},
canCreate: 1,
}
- Map(logger, &queue, &pool)
- c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{types[1]})
- c.Check(pool.starts, check.DeepEquals, []string{uuids[4], uuids[3]})
+ New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+ c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
+ c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4), test.ContainerUUID(3)})
c.Check(pool.running, check.HasLen, 1)
for uuid := range pool.running {
c.Check(uuid, check.Equals, uuids[4])
@@ -184,31 +166,43 @@ func (*SchedulerSuite) TestMapIdle(c *check.C) {
// Shutdown some nodes if Create() fails -- and without even calling
// Create(), if AtQuota() is true.
-func (*SchedulerSuite) TestMapShutdownAtQuota(c *check.C) {
+func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
for quota := 0; quota < 2; quota++ {
- shouldCreate := types[1 : 1+quota]
- queue := stubQueue{
- ents: map[string]container.QueueEnt{
- uuids[1]: {
- Container: arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ shouldCreate := []arvados.InstanceType{}
+ for i := 1; i < 1+quota; i++ {
+ shouldCreate = append(shouldCreate, test.InstanceType(i))
+ }
+ queue := test.Queue{
+ ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+ },
+ Containers: []arvados.Container{
+ {
+ UUID: test.ContainerUUID(1),
+ Priority: 1,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
},
}
+ queue.Update()
pool := stubPool{
atQuota: quota == 0,
unalloc: map[arvados.InstanceType]int{
- types[2]: 2,
+ test.InstanceType(2): 2,
},
idle: map[arvados.InstanceType]int{
- types[2]: 2,
+ test.InstanceType(2): 2,
},
running: map[string]time.Time{},
creates: []arvados.InstanceType{},
starts: []string{},
canCreate: 0,
}
- Map(logger, &queue, &pool)
+ New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
c.Check(pool.creates, check.DeepEquals, shouldCreate)
c.Check(pool.starts, check.DeepEquals, []string{})
c.Check(pool.shutdowns, check.Not(check.Equals), 0)
@@ -217,55 +211,89 @@ func (*SchedulerSuite) TestMapShutdownAtQuota(c *check.C) {
// Start lower-priority containers while waiting for new/existing
// workers to come up for higher-priority containers.
-func (*SchedulerSuite) TestMapStartWhileCreating(c *check.C) {
+func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
pool := stubPool{
unalloc: map[arvados.InstanceType]int{
- types[1]: 1,
- types[2]: 1,
+ test.InstanceType(1): 1,
+ test.InstanceType(2): 1,
},
idle: map[arvados.InstanceType]int{
- types[1]: 1,
- types[2]: 1,
+ test.InstanceType(1): 1,
+ test.InstanceType(2): 1,
},
running: map[string]time.Time{},
canCreate: 2,
}
- queue := stubQueue{
- ents: map[string]container.QueueEnt{
- uuids[1]: {
+ queue := test.Queue{
+ ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+ },
+ Containers: []arvados.Container{
+ {
// create a new worker
- Container: arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ UUID: test.ContainerUUID(1),
+ Priority: 1,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[2]: {
+ {
// tentatively map to unalloc worker
- Container: arvados.Container{UUID: uuids[2], Priority: 2, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ UUID: test.ContainerUUID(2),
+ Priority: 2,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[3]: {
+ {
// start now on idle worker
- Container: arvados.Container{UUID: uuids[3], Priority: 3, State: arvados.ContainerStateQueued},
- InstanceType: types[1],
+ UUID: test.ContainerUUID(3),
+ Priority: 3,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 1,
+ RAM: 1 << 30,
+ },
},
- uuids[4]: {
+ {
// create a new worker
- Container: arvados.Container{UUID: uuids[4], Priority: 4, State: arvados.ContainerStateQueued},
- InstanceType: types[2],
+ UUID: test.ContainerUUID(4),
+ Priority: 4,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 2,
+ RAM: 2 << 30,
+ },
},
- uuids[5]: {
+ {
// tentatively map to unalloc worker
- Container: arvados.Container{UUID: uuids[5], Priority: 5, State: arvados.ContainerStateQueued},
- InstanceType: types[2],
+ UUID: test.ContainerUUID(5),
+ Priority: 5,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 2,
+ RAM: 2 << 30,
+ },
},
- uuids[6]: {
+ {
// start now on idle worker
- Container: arvados.Container{UUID: uuids[6], Priority: 6, State: arvados.ContainerStateQueued},
- InstanceType: types[2],
+ UUID: test.ContainerUUID(6),
+ Priority: 6,
+ State: arvados.ContainerStateQueued,
+ RuntimeConstraints: arvados.RuntimeConstraints{
+ VCPUs: 2,
+ RAM: 2 << 30,
+ },
},
},
}
- Map(logger, &queue, &pool)
- c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{types[2], types[1]})
+ queue.Update()
+ New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+ c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
running := map[string]bool{}
for uuid, t := range pool.running {
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
new file mode 100644
index 000000000..0be8edb7b
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -0,0 +1,107 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package scheduler uses a resizable worker pool to execute
+// containers in priority order.
+package scheduler
+
+import (
+ "sync"
+ "time"
+
+ "github.com/Sirupsen/logrus"
+)
+
+// A Scheduler maps queued containers onto unallocated workers in
+// priority order, creating new workers if needed. It locks containers
+// that can be mapped onto existing/pending workers, and starts them
+// if possible.
+//
+// A Scheduler unlocks any containers that are locked but can't be
+// mapped. (For example, this happens when the cloud provider reaches
+// quota/capacity and a previously mappable container's priority is
+// surpassed by a newer container.)
+//
+// If it encounters errors while creating new workers, a Scheduler
+// shuts down idle workers, in case they are consuming quota.
+type Scheduler struct {
+ logger logrus.FieldLogger
+ queue ContainerQueue
+ pool WorkerPool
+ staleLockTimeout time.Duration
+ queueUpdateInterval time.Duration
+
+ runOnce sync.Once
+ stop chan struct{}
+}
+
+// New returns a new unstarted Scheduler.
+//
+// Any given queue and pool should not be used by more than one
+// scheduler at a time.
+func New(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
+ return &Scheduler{
+ logger: logger,
+ queue: queue,
+ pool: pool,
+ staleLockTimeout: staleLockTimeout,
+ queueUpdateInterval: queueUpdateInterval,
+ stop: make(chan struct{}),
+ }
+}
+
+// Start starts the scheduler.
+func (sch *Scheduler) Start() {
+ go sch.runOnce.Do(sch.run)
+}
+
+// Stop stops the scheduler. No other method should be called after
+// Stop.
+func (sch *Scheduler) Stop() {
+ close(sch.stop)
+}
+
+func (sch *Scheduler) run() {
+ // Ensure the queue is fetched once before attempting anything.
+ for err := sch.queue.Update(); err != nil; err = sch.queue.Update() {
+ sch.logger.Errorf("error updating queue: %s", err)
+ d := sch.queueUpdateInterval / 60
+ sch.logger.Infof("waiting %s before retry", d)
+ time.Sleep(d)
+ }
+
+ // Keep the queue up to date.
+ poll := time.NewTicker(sch.queueUpdateInterval)
+ defer poll.Stop()
+ go func() {
+ for range poll.C {
+ err := sch.queue.Update()
+ if err != nil {
+ sch.logger.Errorf("error updating queue: %s", err)
+ }
+ }
+ }()
+
+ t0 := time.Now()
+ sch.logger.Infof("FixStaleLocks starting.")
+ sch.fixStaleLocks()
+ sch.logger.Infof("FixStaleLocks finished (%s), starting scheduling.", time.Since(t0))
+
+ poolNotify := sch.pool.Subscribe()
+ defer sch.pool.Unsubscribe(poolNotify)
+
+ queueNotify := sch.queue.Subscribe()
+ defer sch.queue.Unsubscribe(queueNotify)
+
+ for {
+ sch.runQueue()
+ sch.sync()
+ select {
+ case <-sch.stop:
+ return
+ case <-queueNotify:
+ case <-poolNotify:
+ }
+ }
+}
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
index bd0e9b309..a85162deb 100644
--- a/lib/dispatchcloud/scheduler/sync.go
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -12,7 +12,7 @@ import (
"github.com/Sirupsen/logrus"
)
-// Sync resolves discrepancies between the queue and the pool:
+// sync resolves discrepancies between the queue and the pool:
//
// Lingering crunch-run processes for finalized and unlocked/requeued
// containers are killed.
@@ -22,27 +22,24 @@ import (
//
// Running containers whose crunch-run processes have exited are
// cancelled.
-//
-// Sync must not be called concurrently with other calls to Map or
-// Sync using the same queue or pool.
-func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
- running := pool.Running()
+func (sch *Scheduler) sync() {
+ running := sch.pool.Running()
cancel := func(ent container.QueueEnt, reason string) {
uuid := ent.Container.UUID
- logger := logger.WithField("ContainerUUID", uuid)
+ logger := sch.logger.WithField("ContainerUUID", uuid)
logger.Infof("cancelling container because %s", reason)
- err := queue.Cancel(uuid)
+ err := sch.queue.Cancel(uuid)
if err != nil {
logger.WithError(err).Print("error cancelling container")
}
}
kill := func(ent container.QueueEnt) {
uuid := ent.Container.UUID
- logger := logger.WithField("ContainerUUID", uuid)
+ logger := sch.logger.WithField("ContainerUUID", uuid)
logger.Debugf("killing crunch-run process because state=%q", ent.Container.State)
- pool.KillContainer(uuid)
+ sch.pool.KillContainer(uuid)
}
- qEntries, qUpdated := queue.Entries()
+ qEntries, qUpdated := sch.queue.Entries()
for uuid, ent := range qEntries {
exited, running := running[uuid]
switch ent.Container.State {
@@ -64,11 +61,11 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
// container.
go kill(ent)
} else {
- logger.WithFields(logrus.Fields{
+ sch.logger.WithFields(logrus.Fields{
"ContainerUUID": uuid,
"State": ent.Container.State,
}).Info("container finished")
- queue.Forget(uuid)
+ sch.queue.Forget(uuid)
}
case arvados.ContainerStateQueued:
if running {
@@ -80,18 +77,18 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
}
case arvados.ContainerStateLocked:
if running && !exited.IsZero() && qUpdated.After(exited) {
- logger = logger.WithFields(logrus.Fields{
+ logger := sch.logger.WithFields(logrus.Fields{
"ContainerUUID": uuid,
"Exited": time.Since(exited).Seconds(),
})
logger.Infof("requeueing container because state=%q after crunch-run exited", ent.Container.State)
- err := queue.Unlock(uuid)
+ err := sch.queue.Unlock(uuid)
if err != nil {
logger.WithError(err).Info("error requeueing container")
}
}
default:
- logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
+ sch.logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
}
}
}
diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go
index 909f56114..0bad5a391 100644
--- a/lib/dispatchcloud/test/queue.go
+++ b/lib/dispatchcloud/test/queue.go
@@ -6,6 +6,7 @@ package test
import (
"fmt"
+ "sync"
"time"
"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
@@ -22,13 +23,18 @@ type Queue struct {
// must not be nil.
ChooseType func(*arvados.Container) (arvados.InstanceType, error)
- entries map[string]container.QueueEnt
- updTime time.Time
+ entries map[string]container.QueueEnt
+ updTime time.Time
+ subscribers map[<-chan struct{}]chan struct{}
+
+ mtx sync.Mutex
}
// Entries returns the containers that were queued when Update was
// last called.
func (q *Queue) Entries() (map[string]container.QueueEnt, time.Time) {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
updTime := q.updTime
r := map[string]container.QueueEnt{}
for uuid, ent := range q.entries {
@@ -42,11 +48,15 @@ func (q *Queue) Entries() (map[string]container.QueueEnt, time.Time) {
// the state has been changed (via Lock, Unlock, or Cancel) since the
// last Update, the updated state is returned.
func (q *Queue) Get(uuid string) (arvados.Container, bool) {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
ent, ok := q.entries[uuid]
return ent.Container, ok
}
func (q *Queue) Forget(uuid string) {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
delete(q.entries, uuid)
}
@@ -62,7 +72,35 @@ func (q *Queue) Cancel(uuid string) error {
return q.changeState(uuid, q.entries[uuid].Container.State, arvados.ContainerStateCancelled)
}
+func (q *Queue) Subscribe() <-chan struct{} {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
+ if q.subscribers == nil {
+ q.subscribers = map[<-chan struct{}]chan struct{}{}
+ }
+ ch := make(chan struct{}, 1)
+ q.subscribers[ch] = ch
+ return ch
+}
+
+func (q *Queue) Unsubscribe(ch <-chan struct{}) {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
+ delete(q.subscribers, ch)
+}
+
+func (q *Queue) notify() {
+ for _, ch := range q.subscribers {
+ select {
+ case ch <- struct{}{}:
+ default:
+ }
+ }
+}
+
func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
ent := q.entries[uuid]
if ent.Container.State != from {
return fmt.Errorf("lock failed: state=%q", ent.Container.State)
@@ -75,11 +113,14 @@ func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error
break
}
}
+ q.notify()
return nil
}
// Update rebuilds the current entries from the Containers slice.
func (q *Queue) Update() error {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
updTime := time.Now()
upd := map[string]container.QueueEnt{}
for _, ctr := range q.Containers {
@@ -95,6 +136,7 @@ func (q *Queue) Update() error {
}
q.entries = upd
q.updTime = updTime
+ q.notify()
return nil
}
@@ -106,6 +148,8 @@ func (q *Queue) Update() error {
// The resulting changes are not exposed through Get() or Entries()
// until the next call to Update().
func (q *Queue) Notify(upd arvados.Container) {
+ q.mtx.Lock()
+ defer q.mtx.Unlock()
for i, ctr := range q.Containers {
if ctr.UUID == upd.UUID {
q.Containers[i] = upd
commit 67464732a18e0aac11d89a75987151543b704cf6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 29 13:13:37 2018 -0400
14360: Cancel containers asynchronously in Sync cleanup.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
index c9273b4be..bd0e9b309 100644
--- a/lib/dispatchcloud/scheduler/sync.go
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -48,9 +48,9 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
switch ent.Container.State {
case arvados.ContainerStateRunning:
if !running {
- cancel(ent, "not running on any worker")
+ go cancel(ent, "not running on any worker")
} else if !exited.IsZero() && qUpdated.After(exited) {
- cancel(ent, "state=\"Running\" after crunch-run exited")
+ go cancel(ent, "state=\"Running\" after crunch-run exited")
}
case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
if running {
@@ -62,7 +62,7 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
// of kill() will be to make the
// worker available for the next
// container.
- kill(ent)
+ go kill(ent)
} else {
logger.WithFields(logrus.Fields{
"ContainerUUID": uuid,
@@ -76,7 +76,7 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
// a network outage and is still
// preparing to run a container that
// has already been unlocked/requeued.
- kill(ent)
+ go kill(ent)
}
case arvados.ContainerStateLocked:
if running && !exited.IsZero() && qUpdated.After(exited) {
commit a5ef124795af5b28a97c98e69a89c4c6e99cf0b7
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 29 13:12:48 2018 -0400
14360: Log level debug->warning for unexpected condition.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/scheduler/map.go b/lib/dispatchcloud/scheduler/map.go
index 75d8eeefb..aa92d0433 100644
--- a/lib/dispatchcloud/scheduler/map.go
+++ b/lib/dispatchcloud/scheduler/map.go
@@ -87,7 +87,7 @@ func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
continue
}
if ctr.State != arvados.ContainerStateLocked {
- logger.Debugf("(race?) container has state=%q after Lock succeeded", ctr.State)
+ logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
}
}
if ctr.State != arvados.ContainerStateLocked {
commit a5fe34a8cd05c4e55deaec599347f65e6d662d22
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 29 11:28:42 2018 -0400
14360: Improve comments.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/scheduler/map.go b/lib/dispatchcloud/scheduler/map.go
index 5742c1f47..75d8eeefb 100644
--- a/lib/dispatchcloud/scheduler/map.go
+++ b/lib/dispatchcloud/scheduler/map.go
@@ -32,6 +32,12 @@ import (
// idle workers, in case they are consuming quota.
//
// Map should not be called without first calling FixStaleLocks.
+//
+// FixStaleLocks()
+// for {
+// Map()
+// Sync()
+// }
func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
unsorted, _ := queue.Entries()
sorted := make([]container.QueueEnt, 0, len(unsorted))
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
index 00e2a30f7..c9273b4be 100644
--- a/lib/dispatchcloud/scheduler/sync.go
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -54,6 +54,14 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
}
case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
if running {
+ // Kill crunch-run in case it's stuck;
+ // nothing it does now will matter
+ // anyway. If crunch-run has already
+ // exited and we just haven't found
+ // out about it yet, the only effect
+ // of kill() will be to make the
+ // worker available for the next
+ // container.
kill(ent)
} else {
logger.WithFields(logrus.Fields{
@@ -64,6 +72,10 @@ func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
}
case arvados.ContainerStateQueued:
if running {
+ // Can happen if a worker returns from
+ // a network outage and is still
+ // preparing to run a container that
+ // has already been unlocked/requeued.
kill(ent)
}
case arvados.ContainerStateLocked:
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index d1e23eb6b..1e759e38f 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -409,6 +409,9 @@ func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) b
// KillContainer kills the crunch-run process for the given container
// UUID, if it's running on any worker.
+//
+// KillContainer returns immediately; the act of killing the container
+// takes some time, and runs in the background.
func (wp *Pool) KillContainer(uuid string) {
wp.mtx.Lock()
defer wp.mtx.Unlock()
commit 8ef9e211fa1eb91deb1b6aa7245cd7e5b8dd4c33
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 29 11:17:05 2018 -0400
14360: Add missing mutex lock.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 28feb3ded..d1e23eb6b 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -233,6 +233,8 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
// AtQuota returns true if Create is not expected to work at the
// moment.
func (wp *Pool) AtQuota() bool {
+ wp.mtx.Lock()
+ defer wp.mtx.Unlock()
return time.Now().Before(wp.atQuotaUntil)
}
commit 468a33e9931c3dbadc83332a3b85d7677c9e4dc3
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Oct 29 11:08:45 2018 -0400
14360: Rename View to InstanceView.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 707c31f24..c441dc6fa 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -38,7 +38,7 @@ type containerQueue interface {
type pool interface {
scheduler.WorkerPool
- View() []worker.View
+ Instances() []worker.InstanceView
}
type dispatcher struct {
@@ -198,8 +198,8 @@ func (disp *dispatcher) apiInstances(w http.ResponseWriter, r *http.Request) {
return
}
var resp struct {
- Items []worker.View
+ Items []worker.InstanceView
}
- resp.Items = disp.pool.View()
+ resp.Items = disp.pool.Instances()
json.NewEncoder(w).Encode(resp)
}
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index a9531987c..28feb3ded 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -23,8 +23,8 @@ const (
tagKeyHold = "Hold"
)
-// A View shows a worker's current state and recent activity.
-type View struct {
+// An InstanceView shows a worker's current state and recent activity.
+type InstanceView struct {
Instance string
Price float64
ArvadosInstanceType string
@@ -643,13 +643,14 @@ func (wp *Pool) Stop() {
close(wp.stop)
}
-// View reports status information for every worker in the pool.
-func (wp *Pool) View() []View {
- var r []View
+// Instances returns an InstanceView for each worker in the pool,
+// summarizing its current state and recent activity.
+func (wp *Pool) Instances() []InstanceView {
+ var r []InstanceView
wp.setupOnce.Do(wp.setup)
wp.mtx.Lock()
for _, w := range wp.workers {
- r = append(r, View{
+ r = append(r, InstanceView{
Instance: w.instance.String(),
Price: w.instType.Price,
ArvadosInstanceType: w.instType.Name,
commit 0f159e5eb304e08a0704156046762709bd6e1eba
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 01:58:29 2018 -0400
14360: Rename keeplocal -> dontupdate, add explanation.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index 17a38259d..e0e1cd0cd 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -56,11 +56,19 @@ type Queue struct {
chooseType typeChooser
client APIClient
- auth *arvados.APIClientAuthorization
- current map[string]QueueEnt
- updated time.Time
- mtx sync.Mutex
- keeplocal map[string]struct{}
+ auth *arvados.APIClientAuthorization
+ current map[string]QueueEnt
+ updated time.Time
+ mtx sync.Mutex
+
+ // Methods that modify the Queue (like Lock) add the affected
+ // container UUIDs to dontupdate. When applying a batch of
+ // updates received from the network, anything appearing in
+ // dontupdate is skipped, in case the received update has
+ // already been superseded by the locally initiated change.
+ // When no network update is in progress, this protection is
+ // not needed, and dontupdate is nil.
+ dontupdate map[string]struct{}
}
// NewQueue returns a new Queue. When a new container appears in the
@@ -126,7 +134,7 @@ func (cq *Queue) Entries() (entries map[string]QueueEnt, threshold time.Time) {
// containers.
func (cq *Queue) Update() error {
cq.mtx.Lock()
- cq.keeplocal = map[string]struct{}{}
+ cq.dontupdate = map[string]struct{}{}
updateStarted := time.Now()
cq.mtx.Unlock()
@@ -138,7 +146,7 @@ func (cq *Queue) Update() error {
cq.mtx.Lock()
defer cq.mtx.Unlock()
for uuid, ctr := range next {
- if _, keep := cq.keeplocal[uuid]; keep {
+ if _, keep := cq.dontupdate[uuid]; keep {
continue
}
if cur, ok := cq.current[uuid]; !ok {
@@ -149,7 +157,7 @@ func (cq *Queue) Update() error {
}
}
for uuid := range cq.current {
- if _, keep := cq.keeplocal[uuid]; keep {
+ if _, keep := cq.dontupdate[uuid]; keep {
continue
} else if _, keep = next[uuid]; keep {
continue
@@ -157,7 +165,7 @@ func (cq *Queue) Update() error {
delete(cq.current, uuid)
}
}
- cq.keeplocal = nil
+ cq.dontupdate = nil
cq.updated = updateStarted
return nil
}
@@ -198,8 +206,8 @@ func (cq *Queue) apiUpdate(uuid, action string) error {
cq.mtx.Lock()
defer cq.mtx.Unlock()
- if cq.keeplocal != nil {
- cq.keeplocal[uuid] = struct{}{}
+ if cq.dontupdate != nil {
+ cq.dontupdate[uuid] = struct{}{}
}
if ent, ok := cq.current[uuid]; !ok {
cq.addEnt(uuid, resp)
commit 3ebb991101ef62387268d594e3e4307a0f5c4e9e
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 01:41:11 2018 -0400
14360: Update busy/lastUUID/running together. Clarify race comment.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 4ddd3745e..a9531987c 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -382,9 +382,12 @@ func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) b
stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
wp.mtx.Lock()
defer wp.mtx.Unlock()
- wkr.updated = time.Now()
+ now := time.Now()
+ wkr.updated = now
+ wkr.busy = now
delete(wkr.starting, ctr.UUID)
wkr.running[ctr.UUID] = struct{}{}
+ wkr.lastUUID = ctr.UUID
if err != nil {
logger.WithField("stdout", string(stdout)).
WithField("stderr", string(stderr)).
@@ -789,10 +792,6 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
updateTime := time.Now()
wkr.probed = updateTime
- if len(ctrUUIDs) > 0 {
- wkr.busy = updateTime
- wkr.lastUUID = ctrUUIDs[0]
- }
if wkr.state == StateShutdown || wkr.state == StateHold {
} else if booted {
if wkr.state != StateRunning {
@@ -804,13 +803,18 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
}
if updated != wkr.updated {
- // Worker was updated (e.g., by starting a new
- // container) after the probe began. Avoid clobbering
- // those changes with the probe results.
+ // Worker was updated after the probe began, so
+ // wkr.running might have a container UUID that was
+ // not yet running when ctrUUIDs was generated. Leave
+ // wkr.running alone and wait for the next probe to
+ // catch up on any changes.
return
}
- if len(ctrUUIDs) == 0 && len(wkr.running) > 0 {
+ if len(ctrUUIDs) > 0 {
+ wkr.busy = updateTime
+ wkr.lastUUID = ctrUUIDs[0]
+ } else if len(wkr.running) > 0 {
wkr.unallocated = updateTime
}
running := map[string]struct{}{}
commit 968519edf4c9150ac65991aea5f977356551b4e4
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 01:19:05 2018 -0400
14360: Move monitoring API test from lame driver to stub driver.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index e6f536d24..362fb4942 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -318,10 +318,8 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
}
func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
- var lameSet test.LameInstanceSet
- drivers["test"] = cloud.DriverFunc(func(params map[string]interface{}, id cloud.InstanceSetID) (cloud.InstanceSet, error) {
- return &lameSet, nil
- })
+ s.cluster.CloudVMs.TimeoutBooting = arvados.Duration(time.Second)
+ drivers["test"] = s.stubDriver
type instance struct {
Instance string
@@ -349,22 +347,16 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
ch := s.disp.pool.Subscribe()
defer s.disp.pool.Unsubscribe(ch)
- err := s.disp.pool.Create(arvados.InstanceType{
- Name: "a1.small-1",
- ProviderType: "a1.small",
- VCPUs: 1,
- RAM: 1 << 30,
- Price: 0.12,
- })
+ err := s.disp.pool.Create(test.InstanceType(1))
c.Check(err, check.IsNil)
<-ch
sr = getInstances()
c.Assert(len(sr.Items), check.Equals, 1)
- c.Check(sr.Items[0].Instance, check.Matches, "lame-.*")
+ c.Check(sr.Items[0].Instance, check.Matches, "stub.*")
c.Check(sr.Items[0].WorkerState, check.Equals, "booting")
- c.Check(sr.Items[0].Price, check.Equals, 0.12)
+ c.Check(sr.Items[0].Price, check.Equals, 0.123)
c.Check(sr.Items[0].LastContainerUUID, check.Equals, "")
- c.Check(sr.Items[0].ProviderInstanceType, check.Equals, "a1.small")
- c.Check(sr.Items[0].ArvadosInstanceType, check.Equals, "a1.small-1")
+ c.Check(sr.Items[0].ProviderInstanceType, check.Equals, test.InstanceType(1).ProviderType)
+ c.Check(sr.Items[0].ArvadosInstanceType, check.Equals, test.InstanceType(1).Name)
}
diff --git a/lib/dispatchcloud/test/fixtures.go b/lib/dispatchcloud/test/fixtures.go
index 7d65ca057..68bdb3db4 100644
--- a/lib/dispatchcloud/test/fixtures.go
+++ b/lib/dispatchcloud/test/fixtures.go
@@ -23,5 +23,6 @@ func InstanceType(i int) arvados.InstanceType {
ProviderType: fmt.Sprintf("providertype%d", i),
VCPUs: i,
RAM: arvados.ByteSize(i) << 30,
+ Price: float64(i) * 0.123,
}
}
commit 71460cf96c9b43c8ab0a38118c3745a4c0e6d7e9
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 01:18:36 2018 -0400
14360: Move shutdown-if-broken check to its own func.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index a7b5132a5..4ddd3745e 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -311,7 +311,7 @@ func (wp *Pool) shutdown(wkr *worker, logger logrus.FieldLogger) {
go func() {
err := wkr.instance.Destroy()
if err != nil {
- logger.WithError(err).Warn("shutdown failed")
+ logger.WithError(err).WithField("Instance", wkr.instance).Warn("shutdown failed")
return
}
wp.mtx.Lock()
@@ -542,7 +542,7 @@ func (wp *Pool) runProbes() {
workers = workers[:0]
wp.mtx.Lock()
for id, wkr := range wp.workers {
- if wkr.state == StateShutdown || wp.autoShutdown(wkr) {
+ if wkr.state == StateShutdown || wp.shutdownIfIdle(wkr) {
continue
}
workers = append(workers, id)
@@ -596,7 +596,28 @@ func (wp *Pool) runSync() {
}
// caller must have lock.
-func (wp *Pool) autoShutdown(wkr *worker) bool {
+func (wp *Pool) shutdownIfBroken(wkr *worker, dur time.Duration) {
+ if wkr.state == StateHold {
+ return
+ }
+ label, threshold := "", wp.timeoutProbe
+ if wkr.state == StateBooting {
+ label, threshold = "new ", wp.timeoutBooting
+ }
+ if dur < threshold {
+ return
+ }
+ wp.logger.WithFields(logrus.Fields{
+ "Instance": wkr.instance,
+ "Duration": dur,
+ "Since": wkr.probed,
+ "State": wkr.state,
+ }).Warnf("%sinstance unresponsive, shutting down", label)
+ wp.shutdown(wkr, wp.logger)
+}
+
+// caller must have lock.
+func (wp *Pool) shutdownIfIdle(wkr *worker) bool {
if len(wkr.running)+len(wkr.starting) > 0 || wkr.state != StateRunning {
return false
}
@@ -762,19 +783,7 @@ func (wp *Pool) probeAndUpdate(wkr *worker) {
} else {
logger.Info("instance not responding")
}
-
- if wkr.state == StateHold {
- return
- }
-
- label, threshold := "", wp.timeoutProbe
- if wkr.state == StateBooting {
- label, threshold = "new ", wp.timeoutBooting
- }
- if dur > threshold {
- logger.WithField("Since", wkr.probed).Warnf("%sinstance unresponsive, shutting down", label)
- wp.shutdown(wkr, logger)
- }
+ wp.shutdownIfBroken(wkr, dur)
return
}
commit 438ce6f86261376880b186775d7ba5fbade560a6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 01:17:39 2018 -0400
14360: Just notify once per sync.
Sync holds the mutex so none of the notifications are delivered until
sync is done anyway.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 74d3d5172..a7b5132a5 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -213,6 +213,7 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
wp.creating[it]++
go func() {
+ defer wp.notify()
inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
wp.mtx.Lock()
defer wp.mtx.Unlock()
@@ -222,7 +223,6 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
}
if err != nil {
logger.WithError(err).Error("create failed")
- go wp.notify()
return
}
wp.updateWorker(inst, it, StateBooting)
@@ -238,7 +238,9 @@ func (wp *Pool) AtQuota() bool {
// Add or update worker attached to the given instance. Use
// initialState if a new worker is created. Caller must have lock.
-func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) {
+//
+// Returns true when a new worker is created.
+func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) bool {
id := inst.ID()
if wp.workers[id] != nil {
wp.workers[id].executor.SetTarget(inst)
@@ -247,7 +249,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
if initialState == StateBooting && wp.workers[id].state == StateUnknown {
wp.workers[id].state = StateBooting
}
- return
+ return false
}
if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
initialState = StateHold
@@ -271,7 +273,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
starting: make(map[string]struct{}),
probing: make(chan struct{}, 1),
}
- go wp.notify()
+ return true
}
// Shutdown shuts down a worker with the given type, or returns false
@@ -678,6 +680,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
wp.mtx.Lock()
defer wp.mtx.Unlock()
wp.logger.WithField("Instances", len(instances)).Debug("sync instances")
+ notify := false
for _, inst := range instances {
itTag := inst.Tags()[tagKeyInstanceType]
@@ -686,7 +689,9 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
continue
}
- wp.updateWorker(inst, it, StateUnknown)
+ if wp.updateWorker(inst, it, StateUnknown) {
+ notify = true
+ }
}
for id, wkr := range wp.workers {
@@ -700,13 +705,17 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
logger.Info("instance disappeared in cloud")
delete(wp.workers, id)
go wkr.executor.Close()
- go wp.notify()
+ notify = true
}
if !wp.loaded {
wp.loaded = true
wp.logger.WithField("N", len(wp.workers)).Info("loaded initial instance list")
}
+
+ if notify {
+ go wp.notify()
+ }
}
// should be called in a new goroutine
commit c11e1e9b7e04b819a8a1794acc3ffb0a816715b7
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 00:31:43 2018 -0400
14360: Cut excess time.Now() calls.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index fc87ea421..74d3d5172 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -257,15 +257,16 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
"Instance": inst,
"State": initialState,
}).Infof("instance appeared in cloud")
+ now := time.Now()
wp.workers[id] = &worker{
executor: wp.newExecutor(inst),
state: initialState,
instance: inst,
instType: it,
- probed: time.Now(),
- busy: time.Now(),
- updated: time.Now(),
- unallocated: time.Now(),
+ probed: now,
+ busy: now,
+ updated: now,
+ unallocated: now,
running: make(map[string]struct{}),
starting: make(map[string]struct{}),
probing: make(chan struct{}, 1),
commit b10284a496cd2d953b865144a5f28265fc277938
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 00:30:16 2018 -0400
14360: Split up long run func.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index f2a53acba..fc87ea421 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -91,7 +91,12 @@ func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cl
timeoutProbe: duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
}
wp.registerMetrics(reg)
- go wp.run()
+ go func() {
+ wp.setupOnce.Do(wp.setup)
+ go wp.runMetrics()
+ go wp.runProbes()
+ go wp.runSync()
+ }()
return wp
}
@@ -487,6 +492,14 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
reg.MustRegister(wp.mMemoryInuse)
}
+func (wp *Pool) runMetrics() {
+ ch := wp.Subscribe()
+ defer wp.Unsubscribe(ch)
+ for range ch {
+ wp.updateMetrics()
+ }
+}
+
func (wp *Pool) updateMetrics() {
wp.mtx.RLock()
defer wp.mtx.RUnlock()
@@ -510,67 +523,57 @@ func (wp *Pool) updateMetrics() {
wp.mMemoryInuse.Set(float64(memInuse))
}
-func (wp *Pool) run() {
- wp.setupOnce.Do(wp.setup)
+func (wp *Pool) runProbes() {
+ maxPPS := wp.maxProbesPerSecond
+ if maxPPS < 1 {
+ maxPPS = defaultMaxProbesPerSecond
+ }
+ limitticker := time.NewTicker(time.Second / time.Duration(maxPPS))
+ defer limitticker.Stop()
- go func() {
- ch := wp.Subscribe()
- defer wp.Unsubscribe(ch)
- for range ch {
- wp.updateMetrics()
- }
- }()
+ probeticker := time.NewTicker(wp.probeInterval)
+ defer probeticker.Stop()
- go func() {
- maxPPS := wp.maxProbesPerSecond
- if maxPPS < 1 {
- maxPPS = defaultMaxProbesPerSecond
+ workers := []cloud.InstanceID{}
+ for range probeticker.C {
+ workers = workers[:0]
+ wp.mtx.Lock()
+ for id, wkr := range wp.workers {
+ if wkr.state == StateShutdown || wp.autoShutdown(wkr) {
+ continue
+ }
+ workers = append(workers, id)
}
- limitticker := time.NewTicker(time.Second / time.Duration(maxPPS))
- defer limitticker.Stop()
-
- probeticker := time.NewTicker(wp.probeInterval)
- defer probeticker.Stop()
+ wp.mtx.Unlock()
- workers := []cloud.InstanceID{}
- for range probeticker.C {
- workers = workers[:0]
+ for _, id := range workers {
wp.mtx.Lock()
- for id, wkr := range wp.workers {
- if wkr.state == StateShutdown || wp.autoShutdown(wkr) {
- continue
- }
- workers = append(workers, id)
- }
+ wkr, ok := wp.workers[id]
wp.mtx.Unlock()
-
- for _, id := range workers {
- wp.mtx.Lock()
- wkr, ok := wp.workers[id]
- wp.mtx.Unlock()
- if !ok || wkr.state == StateShutdown {
- // Deleted/shutdown while we
- // were probing others
- continue
- }
- select {
- case wkr.probing <- struct{}{}:
- go func() {
- wp.probeAndUpdate(wkr)
- <-wkr.probing
- }()
- default:
- wp.logger.WithField("Instance", wkr.instance).Debug("still waiting for last probe to finish")
- }
- select {
- case <-wp.stop:
- return
- case <-limitticker.C:
- }
+ if !ok || wkr.state == StateShutdown {
+ // Deleted/shutdown while we
+ // were probing others
+ continue
+ }
+ select {
+ case wkr.probing <- struct{}{}:
+ go func() {
+ wp.probeAndUpdate(wkr)
+ <-wkr.probing
+ }()
+ default:
+ wp.logger.WithField("Instance", wkr.instance).Debug("still waiting for last probe to finish")
+ }
+ select {
+ case <-wp.stop:
+ return
+ case <-limitticker.C:
}
}
- }()
+ }
+}
+func (wp *Pool) runSync() {
// sync once immediately, then wait syncInterval, sync again,
// etc.
timer := time.NewTimer(1)
commit 8dff5fe14f23f3db9efaf5d71a21668738a9f164
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 00:29:26 2018 -0400
14360: Use consts for tag keys.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 364670544..f2a53acba 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -18,6 +18,11 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
+const (
+ tagKeyInstanceType = "InstanceType"
+ tagKeyHold = "Hold"
+)
+
// A View shows a worker's current state and recent activity.
type View struct {
Instance string
@@ -200,7 +205,7 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
wp.setupOnce.Do(wp.setup)
wp.mtx.Lock()
defer wp.mtx.Unlock()
- tags := cloud.InstanceTags{"InstanceType": it.Name}
+ tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
wp.creating[it]++
go func() {
inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
@@ -239,7 +244,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
}
return
}
- if initialState == StateUnknown && inst.Tags()["hold"] != "" {
+ if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
initialState = StateHold
}
wp.logger.WithFields(logrus.Fields{
@@ -671,7 +676,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
wp.logger.WithField("Instances", len(instances)).Debug("sync instances")
for _, inst := range instances {
- itTag := inst.Tags()["InstanceType"]
+ itTag := inst.Tags()[tagKeyInstanceType]
it, ok := wp.instanceTypes[itTag]
if !ok {
wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
commit c81a653c1e800c40a0c6e1a5d94cddd6620b5e52
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Sat Oct 27 00:28:17 2018 -0400
14360: Close SSH connections when no longer needed.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index 804ae6f15..b5dba9870 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -90,6 +90,19 @@ func (exr *Executor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error
return stdout.Bytes(), stderr.Bytes(), err
}
+// Close shuts down any active connections.
+func (exr *Executor) Close() {
+ // Ensure exr is initialized
+ exr.sshClient(false)
+
+ exr.clientSetup <- true
+ if exr.client != nil {
+ defer exr.client.Close()
+ }
+ exr.client, exr.clientErr = nil, errors.New("closed")
+ <-exr.clientSetup
+}
+
// Create a new SSH session. If session setup fails or the SSH client
// hasn't been setup yet, setup a new SSH client and try again.
func (exr *Executor) newSession() (*ssh.Session, error) {
@@ -121,6 +134,11 @@ func (exr *Executor) sshClient(create bool) (*ssh.Client, error) {
if create {
client, err := exr.setupSSHClient()
if err == nil || exr.client == nil {
+ if exr.client != nil {
+ // Hang up the previous
+ // (non-working) client
+ go exr.client.Close()
+ }
exr.client, exr.clientErr = client, err
}
if err != nil {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 85e080142..364670544 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -45,6 +45,8 @@ type Executor interface {
//
// SetTarget must not block on concurrent Execute calls.
SetTarget(cloud.ExecutorTarget)
+
+ Close()
}
const (
@@ -688,6 +690,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
})
logger.Info("instance disappeared in cloud")
delete(wp.workers, id)
+ go wkr.executor.Close()
go wp.notify()
}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 7ab4e1b6c..790ee851e 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -123,3 +123,5 @@ func (*stubExecutor) SetTarget(cloud.ExecutorTarget) {}
func (*stubExecutor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
return nil, nil, nil
}
+
+func (*stubExecutor) Close() {}
commit 8162f74b2611b686cc8bcec225c6724c71b8926d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Oct 26 20:27:20 2018 -0400
14360: Clarify sync loop timing.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index cf8fac380..85e080142 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -564,24 +564,17 @@ func (wp *Pool) run() {
}
}()
- timer := time.NewTimer(time.Nanosecond)
+ // sync once immediately, then wait syncInterval, sync again,
+ // etc.
+ timer := time.NewTimer(1)
for {
- err := wp.getInstancesAndSync()
- if err != nil {
- wp.logger.WithError(err).Warn("sync failed")
- }
-
- // Reset timer to desired interval, and ignore the
- // tick that might have already arrived.
- timer.Stop()
- select {
- case <-timer.C:
- default:
- }
- timer.Reset(wp.syncInterval)
-
select {
case <-timer.C:
+ err := wp.getInstancesAndSync()
+ if err != nil {
+ wp.logger.WithError(err).Warn("sync failed")
+ }
+ timer.Reset(wp.syncInterval)
case <-wp.stop:
wp.logger.Debug("worker.Pool stopped")
return
commit 1b47c040b802c09d8652b952704e2ad09f31aa27
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Oct 26 20:18:27 2018 -0400
14360: Update readme.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/readme.go b/lib/dispatchcloud/readme.go
index a4b005eb8..c8491fb1d 100644
--- a/lib/dispatchcloud/readme.go
+++ b/lib/dispatchcloud/readme.go
@@ -5,33 +5,35 @@
package dispatchcloud
// A dispatcher comprises a container queue, a scheduler, a worker
-// pool, a cloud provider, a stale-lock fixer, and a syncer.
+// pool, a remote command executor, and a cloud driver.
// 1. Choose a provider.
// 2. Start a worker pool.
// 3. Start a container queue.
-// 4. Run a stale-lock fixer.
-// 5. Start a scheduler.
-// 6. Start a syncer.
+// 4. Run the scheduler's stale-lock fixer.
+// 5. Run the scheduler's mapper.
+// 6. Run the scheduler's syncer.
+// 7. Wait for updates to the container queue or worker pool.
+// 8. Repeat from 5.
//
//
-// A provider (cloud driver) creates new cloud VM instances and gets
-// the latest list of instances. The returned instances implement
-// proxies to the provider's metadata and control interfaces (get IP
-// address, update tags, shutdown).
+// A cloud driver creates new cloud VM instances and gets the latest
+// list of instances. The returned instances are caches/proxies for
+// the provider's metadata and control interfaces (get IP address,
+// update tags, shutdown).
//
//
-// A workerPool tracks workers' instance types and readiness states
+// A worker pool tracks workers' instance types and readiness states
// (available to do work now, booting, suffering a temporary network
// outage, shutting down). It loads internal state from the cloud
// provider's list of instances at startup, and syncs periodically
// after that.
//
//
-// A worker maintains a multiplexed SSH connection to a cloud
-// instance, retrying/reconnecting as needed, so the workerPool can
-// execute commands. It asks the provider's instance to verify its SSH
-// public key once when first connecting, and again later if the key
-// changes.
+// An executor maintains a multiplexed SSH connection to a cloud
+// instance, retrying/reconnecting as needed, so the worker pool can
+// execute commands. It asks the cloud driver's instance to verify its
+// SSH public key once when first connecting, and again later if the
+// key changes.
//
//
// A container queue tracks the known state (according to
@@ -44,36 +46,25 @@ package dispatchcloud
// lock/unlock/cancel operation.)
//
//
-// A stale-lock fixer waits for any already-locked containers (i.e.,
-// locked by a prior server process) to appear on workers as the
-// worker pool recovers its state. It unlocks/requeues any that still
-// remain when all workers are recovered or shutdown, or its timer
-// expires.
+// The scheduler's stale-lock fixer waits for any already-locked
+// containers (i.e., locked by a prior dispatcher process) to appear
+// on workers as the worker pool recovers its state. It
+// unlocks/requeues any that still remain when all workers are
+// recovered or shutdown, or its timer expires.
//
//
-// A scheduler chooses which containers to assign to which idle
-// workers, and decides what to do when there are not enough idle
+// The scheduler's mapper chooses which containers to assign to which
+// idle workers, and decides what to do when there are not enough idle
// workers (including shutting down some idle nodes).
//
//
-// A syncer updates state to Cancelled when a running container
-// process dies without finalizing its entry in the controller
-// database. It also calls the worker pool to kill containers that
-// have priority=0 while locked or running.
+// The scheduler's syncer updates state to Cancelled when a running
+// container process dies without finalizing its entry in the
+// controller database. It also calls the worker pool to kill
+// containers that have priority=0 while locked or running.
//
//
-// A provider proxy wraps a provider with rate-limiting logic. After
-// the wrapped provider receives a cloud.RateLimitError, the proxy
-// starts returning errors to callers immediately without calling
-// through to the wrapped provider.
-//
-//
-// TBD: Bootstrapping script via SSH, too? Future version.
-//
-// TBD: drain instance, keep instance alive
-// TBD: metrics, diagnostics
-// TBD: why dispatch token currently passed to worker?
-//
-// Metrics: queue size, time job has been in queued, #idle/busy/booting nodes
-// Timing in each step, and end-to-end
-// Metrics: boot/idle/alloc time and cost
+// An instance set proxy wraps a driver's instance set with
+// rate-limiting logic. After the wrapped instance set receives a
+// cloud.RateLimitError, the proxy starts returning errors to callers
+// immediately without calling through to the wrapped instance set.
commit 8c44ed01463d5e0f0d167b37474b0e78fe11bd8d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Oct 26 20:09:09 2018 -0400
14360: Add explicit Start method to dispatcher.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/cmd.go b/lib/dispatchcloud/cmd.go
index a5a11d2fa..92948fb30 100644
--- a/lib/dispatchcloud/cmd.go
+++ b/lib/dispatchcloud/cmd.go
@@ -13,5 +13,7 @@ import (
var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
func newHandler(cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
- return &dispatcher{Cluster: cluster}
+ d := &dispatcher{Cluster: cluster}
+ go d.Start()
+ return d
}
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index e422b3963..707c31f24 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -58,22 +58,28 @@ type dispatcher struct {
stop chan struct{}
}
+// Start starts the dispatcher. Start can be called multiple times
+// with no ill effect.
+func (disp *dispatcher) Start() {
+ disp.setupOnce.Do(disp.setup)
+}
+
// ServeHTTP implements service.Handler.
func (disp *dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
- disp.setupOnce.Do(disp.setup)
+ disp.Start()
disp.httpHandler.ServeHTTP(w, r)
}
// CheckHealth implements service.Handler.
func (disp *dispatcher) CheckHealth() error {
- disp.setupOnce.Do(disp.setup)
+ disp.Start()
return nil
}
// Stop dispatching containers and release resources. Typically used
// in tests.
func (disp *dispatcher) Close() {
- disp.setupOnce.Do(disp.setup)
+ disp.Start()
select {
case disp.stop <- struct{}{}:
default:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list