[ARVADOS] updated: 1.2.0-268-gc3c538444

Fri Oct 26 00:09:50 EDT 2018

Summary of changes:
 apps/workbench/Gemfile                             |   4 +-
 apps/workbench/Gemfile.lock                        |  16 ++-
 apps/workbench/test/integration_helper.rb          |  18 +++-
 build/run-tests.sh                                 |   2 +
 doc/_config.yml                                    |   3 +
 .../collection-versioning.html.textile.liquid      |  32 ++++++
 doc/api/methods/collections.html.textile.liquid    |  12 ++-
 .../collection-versioning.html.textile.liquid      | 107 +++++++++++++++++++++
 sdk/go/arvados/resource_list.go                    |  17 ++--
 services/keep-balance/balance.go                   |  29 ++++--
 services/keep-balance/balance_run_test.go          |   2 +-
 services/keep-balance/balance_test.go              |   9 +-
 services/keep-balance/collection.go                |  17 ++--
 services/keep-balance/integration_test.go          |   1 -
 tools/arvbox/lib/arvbox/docker/Dockerfile.base     |   4 +
 15 files changed, 229 insertions(+), 44 deletions(-)
 create mode 100644 doc/admin/collection-versioning.html.textile.liquid
 create mode 100644 doc/user/topics/collection-versioning.html.textile.liquid

  discards  2202b7952c7246d65c6e9a5ce99a1a81e87dbc75 (commit)
       via  c3c538444c15e68e96780f157935f2baa4ba0bc5 (commit)
       via  ba4464e2f05b5d9fc0e2445382330255fe857524 (commit)
       via  cb4e95b683eda5c503956818ce8bbc2af7d8414b (commit)
       via  dd5efa11702934a9e00ae05b76f3f0279e1c36df (commit)
       via  84dbb98962c7f4c9efd9b21db7cb55a9484aec01 (commit)
       via  f5887e3fd402a445c946d9320e1bd8197f7ed252 (commit)
       via  732327f837bd01864d6a56d1a29b3f459b547f9f (commit)
       via  62f6c1f0be5ebd4911ca0891e2b68db4c1e07a9a (commit)
       via  8aeabdc8bbcc545e07e1a0569f1c26583d89d68d (commit)
       via  906551dd3a4823c03290d6abc37a3232dfa957bb (commit)
       via  dd1157d0a3492621fcc8f2c7696bbc166c604677 (commit)
       via  f15a39075b9cea6649060178ba3d62afe51c1a32 (commit)
       via  15623db9b12d8ac1b0b44a3eb53358896254b28a (commit)
       via  e38d51d61fb25e5d5c7eb682f3750d44456b0620 (commit)
       via  33af4962460b99718e26e9c22b6e8cae1a19d253 (commit)
       via  59ec2a36cd613ff9274da288c04a39dd019659eb (commit)
       via  0adb096afc227db376823f84956de6d7ea30dc10 (commit)
       via  f3bb7d250a75a8d9da890296aeccb43a0027dfd0 (commit)
       via  e5dc0c9de3a098b2f3b5f2d29d5dab1c9b234eef (commit)
       via  177388574a96b3e9770dc3731262502638e9b786 (commit)
       via  3318fb4db96c25d87fd5a4037ff2c2aa55bc01ed (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (2202b7952c7246d65c6e9a5ce99a1a81e87dbc75)
            \
             N -- N -- N (c3c538444c15e68e96780f157935f2baa4ba0bc5)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit c3c538444c15e68e96780f157935f2baa4ba0bc5
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Oct 25 01:34:35 2018 -0400

    14360: Initial version of dispatch-cloud.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/.licenseignore b/.licenseignore
index 51a1e7cbd..a0127cfa3 100644
--- a/.licenseignore
+++ b/.licenseignore
@@ -69,3 +69,4 @@ sdk/R/NAMESPACE
 sdk/R/.Rbuildignore
 sdk/R/ArvadosR.Rproj
 *.Rd
+lib/dispatchcloud/test/sshkey_*
diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index fe01e4b2e..1b486b40b 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -294,6 +294,9 @@ package_go_binary cmd/arvados-server arvados-server \
     "Arvados server daemons"
 package_go_binary cmd/arvados-server arvados-controller \
     "Arvados cluster controller daemon"
+# No package until #14325
+#package_go_binary cmd/arvados-server crunch-dispatch-cloud \
+#    "Arvados cluster cloud dispatch"
 package_go_binary sdk/go/crunchrunner crunchrunner \
     "Crunchrunner executes a command inside a container and uploads the output"
 package_go_binary services/arv-git-httpd arvados-git-httpd \
diff --git a/build/run-tests.sh b/build/run-tests.sh
index 9674ad5d4..1b42ea21c 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -77,6 +77,10 @@ lib/cmd
 lib/controller
 lib/crunchstat
 lib/dispatchcloud
+lib/dispatchcloud/container
+lib/dispatchcloud/scheduler
+lib/dispatchcloud/ssh_executor
+lib/dispatchcloud/worker
 services/api
 services/arv-git-httpd
 services/crunchstat
@@ -926,6 +930,10 @@ gostuff=(
     lib/controller
     lib/crunchstat
     lib/dispatchcloud
+    lib/dispatchcloud/container
+    lib/dispatchcloud/scheduler
+    lib/dispatchcloud/ssh_executor
+    lib/dispatchcloud/worker
     sdk/go/arvados
     sdk/go/arvadosclient
     sdk/go/auth
diff --git a/cmd/arvados-server/cmd.go b/cmd/arvados-server/cmd.go
index 1af3745df..cd15d25dd 100644
--- a/cmd/arvados-server/cmd.go
+++ b/cmd/arvados-server/cmd.go
@@ -9,6 +9,7 @@ import (
 
 	"git.curoverse.com/arvados.git/lib/cmd"
 	"git.curoverse.com/arvados.git/lib/controller"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud"
 )
 
 var (
@@ -18,7 +19,8 @@ var (
 		"-version":  cmd.Version(version),
 		"--version": cmd.Version(version),
 
-		"controller": controller.Command,
+		"controller":     controller.Command,
+		"dispatch-cloud": dispatchcloud.Command,
 	})
 )
 
diff --git a/cmd/arvados-server/crunch-dispatch-cloud.service b/cmd/arvados-server/crunch-dispatch-cloud.service
new file mode 100644
index 000000000..f8d71c975
--- /dev/null
+++ b/cmd/arvados-server/crunch-dispatch-cloud.service
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+[Unit]
+Description=Arvados cloud dispatch
+Documentation=https://doc.arvados.org/
+After=network.target
+AssertPathExists=/etc/arvados/config.yml
+
+# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
+StartLimitInterval=0
+
+# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
+StartLimitIntervalSec=0
+
+[Service]
+Type=notify
+EnvironmentFile=-/etc/arvados/environment
+ExecStart=/usr/bin/crunch-dispatch-cloud
+Restart=always
+RestartSec=1
+
+# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
+StartLimitInterval=0
+
+[Install]
+WantedBy=multi-user.target
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
new file mode 100644
index 000000000..e3a072582
--- /dev/null
+++ b/lib/cloud/interfaces.go
@@ -0,0 +1,179 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package cloud
+
+import (
+	"io"
+	"time"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"golang.org/x/crypto/ssh"
+)
+
+// A RateLimitError should be returned by an InstanceSet when the
+// cloud service indicates it is rejecting all API calls for some time
+// interval.
+type RateLimitError interface {
+	// Time before which the caller should expect requests to
+	// fail.
+	EarliestRetry() time.Time
+	error
+}
+
+// A QuotaError should be returned by an InstanceSet when the cloud
+// service indicates the account cannot create more VMs than already
+// exist.
+type QuotaError interface {
+	// If true, don't create more instances until some existing
+	// instances are destroyed. If false, don't handle the error
+	// as a quota error.
+	IsQuotaError() bool
+	error
+}
+
+type InstanceSetID string
+type InstanceTags map[string]string
+type InstanceID string
+type ImageID string
+
+// An Executor executes commands on an ExecutorTarget.
+type Executor interface {
+	// Update the set of private keys used to authenticate to
+	// targets.
+	SetSigners(...ssh.Signer)
+
+	// Set the target used for subsequent command executions.
+	SetTarget(ExecutorTarget)
+
+	// Return the current target.
+	Target() ExecutorTarget
+
+	// Execute a shell command and return the resulting stdout and
+	// stderr. stdin can be nil.
+	Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
+}
+
+// An ExecutorTarget is a remote command execution service.
+type ExecutorTarget interface {
+	// SSH server hostname or IP address, or empty string if
+	// unknown while instance is booting.
+	Address() string
+
+	// Return nil if the given public key matches the instance's
+	// SSH server key. If the provided Dialer is not nil,
+	// VerifyHostKey can use it to make outgoing network
+	// connections from the instance -- e.g., to use the cloud's
+	// "this instance's metadata" API.
+	VerifyHostKey(ssh.PublicKey, *ssh.Client) error
+}
+
+// Instance is implemented by the provider-specific instance types.
+type Instance interface {
+	ExecutorTarget
+
+	// ID returns the provider's instance ID. It must be stable
+	// for the life of the instance.
+	ID() InstanceID
+
+	// String typically returns the cloud-provided instance ID.
+	String() string
+
+	// Cloud provider's "instance type" ID. Matches a ProviderType
+	// in the cluster's InstanceTypes configuration.
+	ProviderType() string
+
+	// Get current tags
+	Tags() InstanceTags
+
+	// Replace tags with the given tags
+	SetTags(InstanceTags) error
+
+	// Shut down the node
+	Destroy() error
+}
+
+// An InstanceSet manages a set of VM instances created by an elastic
+// cloud provider like AWS, GCE, or Azure.
+//
+// All public methods of an InstanceSet, and all public methods of the
+// instances it returns, are goroutine safe.
+type InstanceSet interface {
+	// Create a new instance. If supported by the driver, add the
+	// provided public key to /root/.ssh/authorized_keys.
+	//
+	// The returned error should implement RateLimitError and
+	// QuotaError where applicable.
+	Create(arvados.InstanceType, ImageID, InstanceTags, ssh.PublicKey) (Instance, error)
+
+	// Return all instances, including ones that are booting or
+	// shutting down. Optionally, filter out nodes that don't have
+	// all of the given InstanceTags (the caller will ignore these
+	// anyway).
+	//
+	// An instance returned by successive calls to Instances() may
+	// -- but does not need to -- be represented by the same
+	// Instance object each time. Thus, the caller is responsible
+	// for de-duplicating the returned instances by comparing the
+	// InstanceIDs returned by the instances' ID() methods.
+	Instances(InstanceTags) ([]Instance, error)
+
+	// Stop any background tasks and release other resources.
+	Stop()
+}
+
+// A Driver returns an InstanceSet that uses the given InstanceSetID
+// and driver-dependent configuration parameters.
+//
+// The supplied id will be of the form "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+// where each z can be any alphanum. The returned InstanceSet must use
+// this id to tag long-lived cloud resources that it creates, and must
+// assume control of any existing resources that are tagged with the
+// same id. Tagging can be accomplished by including the ID in
+// resource names, using the cloud provider's tagging feature, or any
+// other mechanism. The tags must be visible to another instance of
+// the same driver running on a different host.
+//
+// The returned InstanceSet must ignore existing resources that are
+// visible but not tagged with the given id, except that it should log
+// a summary of such resources -- only once -- when it starts
+// up. Thus, two identically configured InstanceSets running on
+// different hosts with different ids should log about the existence
+// of each other's resources at startup, but will not interfere with
+// each other.
+//
+// Example:
+//
+//	type exampleInstanceSet struct {
+//		ownID     string
+//		AccessKey string
+//	}
+//
+//	type exampleDriver struct {}
+//
+//	func (*exampleDriver) InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error) {
+//		var is exampleInstanceSet
+//		if err := mapstructure.Decode(config, &is); err != nil {
+//			return nil, err
+//		}
+//		is.ownID = id
+//		return &is, nil
+//	}
+//
+//	var _ = registerCloudDriver("example", &exampleDriver{})
+type Driver interface {
+	InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)
+}
+
+// DriverFunc makes a Driver using the provided function as its
+// InstanceSet method. This is similar to http.HandlerFunc.
+func DriverFunc(fn func(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)) Driver {
+	return driverFunc(fn)
+}
+
+type driverFunc func(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)
+
+func (df driverFunc) InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error) {
+	return df(config, id)
+}
diff --git a/lib/cmd/cmd.go b/lib/cmd/cmd.go
index 8c65cf7ac..9292ef7e5 100644
--- a/lib/cmd/cmd.go
+++ b/lib/cmd/cmd.go
@@ -36,8 +36,9 @@ func (v Version) RunCommand(prog string, args []string, stdin io.Reader, stdout,
 	return 0
 }
 
-// Multi is a Handler that looks up its first argument in a map, and
-// invokes the resulting Handler with the remaining args.
+// Multi is a Handler that looks up its first argument in a map (after
+// stripping any "arvados-" or "crunch-" prefix), and invokes the
+// resulting Handler with the remaining args.
 //
 // Example:
 //
diff --git a/lib/dispatchcloud/cmd.go b/lib/dispatchcloud/cmd.go
new file mode 100644
index 000000000..a5a11d2fa
--- /dev/null
+++ b/lib/dispatchcloud/cmd.go
@@ -0,0 +1,17 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"git.curoverse.com/arvados.git/lib/cmd"
+	"git.curoverse.com/arvados.git/lib/service"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
+
+func newHandler(cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
+	return &dispatcher{Cluster: cluster}
+}
diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
new file mode 100644
index 000000000..17a38259d
--- /dev/null
+++ b/lib/dispatchcloud/container/queue.go
@@ -0,0 +1,323 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package container
+
+import (
+	"io"
+	"sync"
+	"time"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+type typeChooser func(*arvados.Container) (arvados.InstanceType, error)
+
+// An APIClient performs Arvados API requests. It is typically an
+// *arvados.Client.
+type APIClient interface {
+	RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error
+}
+
+// A QueueEnt is an entry in the queue, consisting of a container
+// record and the instance type that should be used to run it.
+type QueueEnt struct {
+	// The container to run. Only the UUID, State, Priority, and
+	// RuntimeConstraints fields are populated.
+	Container    arvados.Container
+	InstanceType arvados.InstanceType
+}
+
+// String implements fmt.Stringer by returning the queued container's
+// UUID.
+func (c *QueueEnt) String() string {
+	return c.Container.UUID
+}
+
+// A Queue is an interface to an Arvados cluster's container
+// database. It presents only the containers that are eligible to be
+// run by, are already being run by, or have recently been run by the
+// present dispatcher.
+//
+// The Entries, Get, and Forget methods do not block: they return
+// immediately, using cached data.
+//
+// The updating methods (Cancel, Lock, Unlock, Update) do block: they
+// return only after the operation has completed.
+//
+// A Queue's Update method should be called periodically to keep the
+// cache up to date.
+type Queue struct {
+	logger     logrus.FieldLogger
+	reg        *prometheus.Registry
+	chooseType typeChooser
+	client     APIClient
+
+	auth      *arvados.APIClientAuthorization
+	current   map[string]QueueEnt
+	updated   time.Time
+	mtx       sync.Mutex
+	keeplocal map[string]struct{}
+}
+
+// NewQueue returns a new Queue. When a new container appears in the
+// Arvados cluster's queue during Update, chooseType will be called to
+// assign an appropriate arvados.InstanceType for the queue entry.
+func NewQueue(logger logrus.FieldLogger, reg *prometheus.Registry, chooseType typeChooser, client APIClient) *Queue {
+	return &Queue{
+		logger:     logger,
+		reg:        reg,
+		chooseType: chooseType,
+		client:     client,
+		current:    map[string]QueueEnt{},
+	}
+}
+
+// Forget drops the specified container from the cache. It should be
+// called on finalized containers to avoid leaking memory over
+// time. It is a no-op if the indicated container is not in a
+// finalized state.
+func (cq *Queue) Forget(uuid string) {
+	cq.mtx.Lock()
+	defer cq.mtx.Unlock()
+	ctr := cq.current[uuid].Container
+	if ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled {
+		delete(cq.current, uuid)
+	}
+}
+
+// Get returns the (partial) Container record for the specified
+// container. Like a map lookup, its second return value is false if
+// the specified container is not in the Queue.
+func (cq *Queue) Get(uuid string) (arvados.Container, bool) {
+	cq.mtx.Lock()
+	defer cq.mtx.Unlock()
+	if ctr, ok := cq.current[uuid]; !ok {
+		return arvados.Container{}, false
+	} else {
+		return ctr.Container, true
+	}
+}
+
+// Entries returns all cache entries, keyed by container UUID.
+//
+// The returned threshold indicates the maximum age of any cached data
+// returned in the map. This makes it possible for a scheduler to
+// determine correctly the outcome of a remote process that updates
+// container state. It must first wait for the remote process to exit,
+// then wait for the Queue to start and finish its next Update --
+// i.e., it must wait until threshold > timeProcessExited.
+func (cq *Queue) Entries() (entries map[string]QueueEnt, threshold time.Time) {
+	cq.mtx.Lock()
+	defer cq.mtx.Unlock()
+	entries = make(map[string]QueueEnt, len(cq.current))
+	for uuid, ctr := range cq.current {
+		entries[uuid] = ctr
+	}
+	threshold = cq.updated
+	return
+}
+
+// Update refreshes the cache from the Arvados API. It adds newly
+// queued containers, and updates the state of previously queued
+// containers.
+func (cq *Queue) Update() error {
+	cq.mtx.Lock()
+	cq.keeplocal = map[string]struct{}{}
+	updateStarted := time.Now()
+	cq.mtx.Unlock()
+
+	next, err := cq.poll()
+	if err != nil {
+		return err
+	}
+
+	cq.mtx.Lock()
+	defer cq.mtx.Unlock()
+	for uuid, ctr := range next {
+		if _, keep := cq.keeplocal[uuid]; keep {
+			continue
+		}
+		if cur, ok := cq.current[uuid]; !ok {
+			cq.addEnt(uuid, *ctr)
+		} else {
+			cur.Container = *ctr
+			cq.current[uuid] = cur
+		}
+	}
+	for uuid := range cq.current {
+		if _, keep := cq.keeplocal[uuid]; keep {
+			continue
+		} else if _, keep = next[uuid]; keep {
+			continue
+		} else {
+			delete(cq.current, uuid)
+		}
+	}
+	cq.keeplocal = nil
+	cq.updated = updateStarted
+	return nil
+}
+
+func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
+	it, err := cq.chooseType(&ctr)
+	if err != nil {
+		// FIXME: throttle warnings, cancel after timeout
+		cq.logger.Warnf("cannot run %s", &ctr)
+		return
+	}
+	cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
+}
+
+// Lock acquires the dispatch lock for the given container.
+func (cq *Queue) Lock(uuid string) error {
+	return cq.apiUpdate(uuid, "lock")
+}
+
+// Unlock releases the dispatch lock for the given container.
+func (cq *Queue) Unlock(uuid string) error {
+	return cq.apiUpdate(uuid, "unlock")
+}
+
+// Cancel cancels the given container.
+func (cq *Queue) Cancel(uuid string) error {
+	return cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
+		"container": {"state": arvados.ContainerStateCancelled},
+	})
+}
+
+func (cq *Queue) apiUpdate(uuid, action string) error {
+	var resp arvados.Container
+	err := cq.client.RequestAndDecode(&resp, "POST", "arvados/v1/containers/"+uuid+"/"+action, nil, nil)
+	if err != nil {
+		return err
+	}
+
+	cq.mtx.Lock()
+	defer cq.mtx.Unlock()
+	if cq.keeplocal != nil {
+		cq.keeplocal[uuid] = struct{}{}
+	}
+	if ent, ok := cq.current[uuid]; !ok {
+		cq.addEnt(uuid, resp)
+	} else {
+		ent.Container.State, ent.Container.Priority, ent.Container.LockedByUUID = resp.State, resp.Priority, resp.LockedByUUID
+		cq.current[uuid] = ent
+	}
+	return nil
+}
+
+func (cq *Queue) poll() (map[string]*arvados.Container, error) {
+	cq.mtx.Lock()
+	size := len(cq.current)
+	auth := cq.auth
+	cq.mtx.Unlock()
+
+	if auth == nil {
+		auth = &arvados.APIClientAuthorization{}
+		err := cq.client.RequestAndDecode(auth, "GET", "arvados/v1/api_client_authorizations/current", nil, nil)
+		if err != nil {
+			return nil, err
+		}
+		cq.mtx.Lock()
+		cq.auth = auth
+		cq.mtx.Unlock()
+	}
+
+	next := make(map[string]*arvados.Container, size)
+	apply := func(updates []arvados.Container) {
+		for _, upd := range updates {
+			if next[upd.UUID] == nil {
+				next[upd.UUID] = &arvados.Container{}
+			}
+			*next[upd.UUID] = upd
+		}
+	}
+	selectParam := []string{"uuid", "state", "priority", "runtime_constraints"}
+	limitParam := 1000
+
+	mine, err := cq.fetchAll(arvados.ResourceListParams{
+		Select:  selectParam,
+		Order:   "uuid",
+		Limit:   &limitParam,
+		Count:   "none",
+		Filters: []arvados.Filter{{"locked_by_uuid", "=", auth.UUID}},
+	})
+	if err != nil {
+		return nil, err
+	}
+	apply(mine)
+
+	avail, err := cq.fetchAll(arvados.ResourceListParams{
+		Select:  selectParam,
+		Order:   "uuid",
+		Limit:   &limitParam,
+		Count:   "none",
+		Filters: []arvados.Filter{{"state", "=", arvados.ContainerStateQueued}, {"priority", ">", "0"}},
+	})
+	if err != nil {
+		return nil, err
+	}
+	apply(avail)
+
+	var missing []string
+	cq.mtx.Lock()
+	for uuid, ent := range cq.current {
+		if next[uuid] == nil &&
+			ent.Container.State != arvados.ContainerStateCancelled &&
+			ent.Container.State != arvados.ContainerStateComplete {
+			missing = append(missing, uuid)
+		}
+	}
+	cq.mtx.Unlock()
+
+	for i, page := 0, 20; i < len(missing); i += page {
+		batch := missing[i:]
+		if len(batch) > page {
+			batch = batch[:page]
+		}
+		ended, err := cq.fetchAll(arvados.ResourceListParams{
+			Select:  selectParam,
+			Order:   "uuid",
+			Count:   "none",
+			Filters: []arvados.Filter{{"uuid", "in", batch}},
+		})
+		if err != nil {
+			return nil, err
+		}
+		apply(ended)
+	}
+	return next, nil
+}
+
+func (cq *Queue) fetchAll(initialParams arvados.ResourceListParams) ([]arvados.Container, error) {
+	var results []arvados.Container
+	params := initialParams
+	params.Offset = 0
+	for {
+		// This list variable must be a new one declared
+		// inside the loop: otherwise, items in the API
+		// response would get deep-merged into the items
+		// loaded in previous iterations.
+		var list arvados.ContainerList
+
+		err := cq.client.RequestAndDecode(&list, "GET", "arvados/v1/containers", nil, params)
+		if err != nil {
+			return nil, err
+		}
+		if len(list.Items) == 0 {
+			break
+		}
+
+		results = append(results, list.Items...)
+		if len(params.Order) == 1 && params.Order == "uuid" {
+			params.Filters = append(initialParams.Filters, arvados.Filter{"uuid", ">", list.Items[len(list.Items)-1].UUID})
+		} else {
+			params.Offset += len(list.Items)
+		}
+	}
+	return results, nil
+}
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
new file mode 100644
index 000000000..e422b3963
--- /dev/null
+++ b/lib/dispatchcloud/dispatcher.go
@@ -0,0 +1,199 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"crypto/md5"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/scheduler"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"github.com/Sirupsen/logrus"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+	"golang.org/x/crypto/ssh"
+)
+
+const (
+	defaultPollInterval = time.Second
+)
+
+type containerQueue interface {
+	scheduler.ContainerQueue
+	Update() error
+}
+
+type pool interface {
+	scheduler.WorkerPool
+	View() []worker.View
+}
+
+type dispatcher struct {
+	Cluster       *arvados.Cluster
+	InstanceSetID cloud.InstanceSetID
+
+	logger       logrus.FieldLogger
+	reg          *prometheus.Registry
+	instanceSet  cloud.InstanceSet
+	pool         pool
+	queue        containerQueue
+	httpHandler  http.Handler
+	pollInterval time.Duration
+	sshKey       ssh.Signer
+
+	setupOnce sync.Once
+	stop      chan struct{}
+}
+
+// ServeHTTP implements service.Handler.
+func (disp *dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	disp.setupOnce.Do(disp.setup)
+	disp.httpHandler.ServeHTTP(w, r)
+}
+
+// CheckHealth implements service.Handler.
+func (disp *dispatcher) CheckHealth() error {
+	disp.setupOnce.Do(disp.setup)
+	return nil
+}
+
+// Stop dispatching containers and release resources. Typically used
+// in tests.
+func (disp *dispatcher) Close() {
+	disp.setupOnce.Do(disp.setup)
+	select {
+	case disp.stop <- struct{}{}:
+	default:
+	}
+}
+
+// Make a worker.Executor for the given instance.
+func (disp *dispatcher) newExecutor(inst cloud.Instance) worker.Executor {
+	exr := ssh_executor.New(inst)
+	exr.SetSigners(disp.sshKey)
+	return exr
+}
+
+func (disp *dispatcher) typeChooser(ctr *arvados.Container) (arvados.InstanceType, error) {
+	return ChooseInstanceType(disp.Cluster, ctr)
+}
+
+func (disp *dispatcher) setup() {
+	disp.initialize()
+	go disp.run()
+}
+
+func (disp *dispatcher) initialize() {
+	arvClient := arvados.NewClientFromEnv()
+	if disp.InstanceSetID == "" {
+		if strings.HasPrefix(arvClient.AuthToken, "v2/") {
+			disp.InstanceSetID = cloud.InstanceSetID(strings.Split(arvClient.AuthToken, "/")[1])
+		} else {
+			// Use some other string unique to this token
+			// that doesn't reveal the token itself.
+			disp.InstanceSetID = cloud.InstanceSetID(fmt.Sprintf("%x", md5.Sum([]byte(arvClient.AuthToken))))
+		}
+	}
+	disp.stop = make(chan struct{}, 1)
+	disp.logger = logrus.StandardLogger()
+
+	if key, err := ssh.ParsePrivateKey(disp.Cluster.Dispatch.PrivateKey); err != nil {
+		disp.logger.Fatalf("error parsing configured Dispatch.PrivateKey: %s", err)
+	} else {
+		disp.sshKey = key
+	}
+
+	instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID)
+	if err != nil {
+		disp.logger.Fatalf("error initializing driver: %s", err)
+	}
+	disp.instanceSet = &instanceSetProxy{instanceSet}
+	disp.reg = prometheus.NewRegistry()
+	disp.pool = worker.NewPool(disp.logger, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+	disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/arvados/v1/dispatch/containers", disp.apiContainers)
+	mux.HandleFunc("/arvados/v1/dispatch/instances", disp.apiInstances)
+	metricsH := promhttp.HandlerFor(disp.reg, promhttp.HandlerOpts{
+		ErrorLog: disp.logger,
+	})
+	mux.Handle("/metrics", metricsH)
+	mux.Handle("/metrics.json", metricsH)
+	disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
+
+	if d := disp.Cluster.Dispatch.PollInterval; d > 0 {
+		disp.pollInterval = time.Duration(d)
+	} else {
+		disp.pollInterval = defaultPollInterval
+	}
+}
+
+func (disp *dispatcher) run() {
+	defer disp.instanceSet.Stop()
+
+	t0 := time.Now()
+	disp.logger.Infof("FixStaleLocks starting.")
+	scheduler.FixStaleLocks(disp.logger, disp.queue, disp.pool, time.Duration(disp.Cluster.Dispatch.StaleLockTimeout))
+	disp.logger.Infof("FixStaleLocks finished (%s), starting scheduling.", time.Since(t0))
+
+	wp := disp.pool.Subscribe()
+	defer disp.pool.Unsubscribe(wp)
+	poll := time.NewTicker(disp.pollInterval)
+	for {
+		scheduler.Map(disp.logger, disp.queue, disp.pool)
+		scheduler.Sync(disp.logger, disp.queue, disp.pool)
+		select {
+		case <-disp.stop:
+			return
+		case <-wp:
+		case <-poll.C:
+			err := disp.queue.Update()
+			if err != nil {
+				disp.logger.Errorf("error updating queue: %s", err)
+			}
+		}
+	}
+}
+
+// Management API: all active and queued containers.
+func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "GET" {
+		httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	var resp struct {
+		Items []container.QueueEnt
+	}
+	qEntries, _ := disp.queue.Entries()
+	for _, ent := range qEntries {
+		resp.Items = append(resp.Items, ent)
+	}
+	json.NewEncoder(w).Encode(resp)
+}
+
+// Management API: all active instances (cloud VMs).
+func (disp *dispatcher) apiInstances(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "GET" {
+		httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	var resp struct {
+		Items []worker.View
+	}
+	resp.Items = disp.pool.View()
+	json.NewEncoder(w).Encode(resp)
+}
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
new file mode 100644
index 000000000..e6f536d24
--- /dev/null
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -0,0 +1,370 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math/rand"
+	"net/http/httptest"
+	"os"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+	"golang.org/x/crypto/ssh"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&DispatcherSuite{})
+
+// fakeCloud provides an exec method that can be used as a
+// test.StubExecFunc. It calls the provided makeVM func when called
+// with a previously unseen instance ID. Calls to exec are passed on
+// to the *fakeVM for the appropriate instance ID.
+type fakeCloud struct {
+	queue      *test.Queue
+	makeVM     func(cloud.Instance) *fakeVM
+	onComplete func(string)
+	onCancel   func(string)
+	vms        map[cloud.InstanceID]*fakeVM
+	sync.Mutex
+}
+
+func (fc *fakeCloud) exec(inst cloud.Instance, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+	fc.Lock()
+	fvm, ok := fc.vms[inst.ID()]
+	if !ok {
+		if fc.vms == nil {
+			fc.vms = make(map[cloud.InstanceID]*fakeVM)
+		}
+		fvm = fc.makeVM(inst)
+		fc.vms[inst.ID()] = fvm
+	}
+	fc.Unlock()
+	return fvm.exec(fc.queue, fc.onComplete, fc.onCancel, command, stdin, stdout, stderr)
+}
+
+// fakeVM is a fake VM with configurable delays and failure modes.
+type fakeVM struct {
+	boot                 time.Time
+	broken               time.Time
+	crunchRunMissing     bool
+	crunchRunCrashRate   float64
+	crunchRunDetachDelay time.Duration
+	ctrExit              int
+	running              map[string]bool
+	completed            []string
+	sync.Mutex
+}
+
+func (fvm *fakeVM) exec(queue *test.Queue, onComplete, onCancel func(uuid string), command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+	uuid := regexp.MustCompile(`.{5}-dz642-.{15}`).FindString(command)
+	if eta := fvm.boot.Sub(time.Now()); eta > 0 {
+		fmt.Fprintf(stderr, "stub is booting, ETA %s\n", eta)
+		return 1
+	}
+	if !fvm.broken.IsZero() && fvm.broken.Before(time.Now()) {
+		fmt.Fprintf(stderr, "cannot fork\n")
+		return 2
+	}
+	if fvm.crunchRunMissing && strings.Contains(command, "crunch-run") {
+		fmt.Fprint(stderr, "crunch-run: command not found\n")
+		return 1
+	}
+	if strings.HasPrefix(command, "crunch-run --detach ") {
+		fvm.Lock()
+		if fvm.running == nil {
+			fvm.running = map[string]bool{}
+		}
+		fvm.running[uuid] = true
+		fvm.Unlock()
+		time.Sleep(fvm.crunchRunDetachDelay)
+		fmt.Fprintf(stderr, "starting %s\n", uuid)
+		logger := logrus.WithField("ContainerUUID", uuid)
+		logger.Printf("[test] starting crunch-run stub")
+		go func() {
+			crashluck := rand.Float64()
+			ctr, ok := queue.Get(uuid)
+			if !ok {
+				logger.Print("[test] container not in queue")
+				return
+			}
+			if crashluck > fvm.crunchRunCrashRate/2 {
+				time.Sleep(time.Duration(rand.Float64()*20) * time.Millisecond)
+				ctr.State = arvados.ContainerStateRunning
+				queue.Notify(ctr)
+			}
+
+			time.Sleep(time.Duration(rand.Float64()*20) * time.Millisecond)
+			fvm.Lock()
+			_, running := fvm.running[uuid]
+			fvm.Unlock()
+			if !running {
+				logger.Print("[test] container was killed")
+				return
+			}
+			if crashluck < fvm.crunchRunCrashRate {
+				logger.Print("[test] crashing crunch-run stub")
+				if onCancel != nil && ctr.State == arvados.ContainerStateRunning {
+					onCancel(uuid)
+				}
+			} else {
+				ctr.State = arvados.ContainerStateComplete
+				ctr.ExitCode = fvm.ctrExit
+				queue.Notify(ctr)
+				if onComplete != nil {
+					onComplete(uuid)
+				}
+			}
+			logger.Print("[test] exiting crunch-run stub")
+			fvm.Lock()
+			defer fvm.Unlock()
+			delete(fvm.running, uuid)
+		}()
+		return 0
+	}
+	if command == "crunch-run --list" {
+		fvm.Lock()
+		defer fvm.Unlock()
+		for uuid := range fvm.running {
+			fmt.Fprintf(stdout, "%s\n", uuid)
+		}
+		return 0
+	}
+	if strings.HasPrefix(command, "crunch-run --kill ") {
+		fvm.Lock()
+		defer fvm.Unlock()
+		if fvm.running[uuid] {
+			delete(fvm.running, uuid)
+		} else {
+			fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
+		}
+		return 0
+	}
+	if command == "true" {
+		return 0
+	}
+	fmt.Fprintf(stderr, "%q: command not found", command)
+	return 1
+}
+
+type DispatcherSuite struct {
+	cluster     *arvados.Cluster
+	instanceSet *test.LameInstanceSet
+	stubDriver  *test.StubDriver
+	disp        *dispatcher
+}
+
+func (s *DispatcherSuite) SetUpSuite(c *check.C) {
+	if os.Getenv("ARVADOS_DEBUG") != "" {
+		logrus.StandardLogger().SetLevel(logrus.DebugLevel)
+	}
+}
+
+func (s *DispatcherSuite) SetUpTest(c *check.C) {
+	dispatchpub, _ := test.LoadTestKey(c, "test/sshkey_dispatch")
+	dispatchprivraw, err := ioutil.ReadFile("test/sshkey_dispatch")
+	c.Assert(err, check.IsNil)
+
+	_, hostpriv := test.LoadTestKey(c, "test/sshkey_vm")
+	s.stubDriver = &test.StubDriver{
+		Exec: func(inst cloud.Instance, command string, _ io.Reader, _, _ io.Writer) uint32 {
+			c.Logf("stubDriver SSHExecFunc(%s, %q, ...)", inst, command)
+			return 1
+		},
+		HostKey:        hostpriv,
+		AuthorizedKeys: []ssh.PublicKey{dispatchpub},
+	}
+
+	s.cluster = &arvados.Cluster{
+		CloudVMs: arvados.CloudVMs{
+			Driver:          "test",
+			SyncInterval:    arvados.Duration(10 * time.Millisecond),
+			TimeoutIdle:     arvados.Duration(30 * time.Millisecond),
+			TimeoutBooting:  arvados.Duration(30 * time.Millisecond),
+			TimeoutProbe:    arvados.Duration(15 * time.Millisecond),
+			TimeoutShutdown: arvados.Duration(5 * time.Millisecond),
+		},
+		Dispatch: arvados.Dispatch{
+			PrivateKey:         dispatchprivraw,
+			PollInterval:       arvados.Duration(5 * time.Millisecond),
+			ProbeInterval:      arvados.Duration(5 * time.Millisecond),
+			MaxProbesPerSecond: 1000,
+		},
+		InstanceTypes: arvados.InstanceTypeMap{
+			test.InstanceType(1).Name:  test.InstanceType(1),
+			test.InstanceType(2).Name:  test.InstanceType(2),
+			test.InstanceType(3).Name:  test.InstanceType(3),
+			test.InstanceType(4).Name:  test.InstanceType(4),
+			test.InstanceType(6).Name:  test.InstanceType(6),
+			test.InstanceType(8).Name:  test.InstanceType(8),
+			test.InstanceType(16).Name: test.InstanceType(16),
+		},
+		NodeProfiles: map[string]arvados.NodeProfile{
+			"*": {
+				Controller:    arvados.SystemServiceInstance{Listen: os.Getenv("ARVADOS_API_HOST")},
+				DispatchCloud: arvados.SystemServiceInstance{Listen: ":"},
+			},
+		},
+	}
+	s.disp = &dispatcher{Cluster: s.cluster}
+	// Test cases can modify s.cluster before calling
+	// initialize(), and then modify private state before calling
+	// go run().
+}
+
+func (s *DispatcherSuite) TearDownTest(c *check.C) {
+	s.disp.Close()
+}
+
+func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
+	drivers["test"] = s.stubDriver
+	s.disp.setupOnce.Do(s.disp.initialize)
+	queue := &test.Queue{
+		ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+			return ChooseInstanceType(s.cluster, ctr)
+		},
+	}
+	for i := 0; i < 200; i++ {
+		queue.Containers = append(queue.Containers, arvados.Container{
+			UUID:     test.ContainerUUID(i + 1),
+			State:    arvados.ContainerStateQueued,
+			Priority: int64(i%20 + 1),
+			RuntimeConstraints: arvados.RuntimeConstraints{
+				RAM:   int64(i%3+1) << 30,
+				VCPUs: i%8 + 1,
+			},
+		})
+	}
+	s.disp.queue = queue
+
+	var mtx sync.Mutex
+	done := make(chan struct{})
+	waiting := map[string]struct{}{}
+	for _, ctr := range queue.Containers {
+		waiting[ctr.UUID] = struct{}{}
+	}
+	onComplete := func(uuid string) {
+		mtx.Lock()
+		defer mtx.Unlock()
+		if _, ok := waiting[uuid]; !ok {
+			c.Errorf("container completed twice: %s", uuid)
+		}
+		delete(waiting, uuid)
+		if len(waiting) == 0 {
+			close(done)
+		}
+	}
+	n := 0
+	fc := &fakeCloud{
+		queue: queue,
+		makeVM: func(inst cloud.Instance) *fakeVM {
+			n++
+			fvm := &fakeVM{
+				boot:                 time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond)))),
+				crunchRunDetachDelay: time.Duration(rand.Int63n(int64(10 * time.Millisecond))),
+				ctrExit:              int(rand.Uint32() & 0x3),
+			}
+			switch n % 7 {
+			case 0:
+				fvm.broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
+			case 1:
+				fvm.crunchRunMissing = true
+			default:
+				fvm.crunchRunCrashRate = 0.1
+			}
+			return fvm
+		},
+		onComplete: onComplete,
+		onCancel:   onComplete,
+	}
+	s.stubDriver.Exec = fc.exec
+
+	start := time.Now()
+	go s.disp.run()
+	err := s.disp.CheckHealth()
+	c.Check(err, check.IsNil)
+
+	select {
+	case <-done:
+		c.Logf("containers finished (%s), waiting for instances to shutdown and queue to clear", time.Since(start))
+	case <-time.After(10 * time.Second):
+		c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
+	}
+
+	deadline := time.Now().Add(time.Second)
+	for range time.NewTicker(10 * time.Millisecond).C {
+		insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
+		c.Check(err, check.IsNil)
+		queue.Update()
+		ents, _ := queue.Entries()
+		if len(ents) == 0 && len(insts) == 0 {
+			break
+		}
+		if time.Now().After(deadline) {
+			c.Fatalf("timed out with %d containers (%v), %d instances (%+v)", len(ents), ents, len(insts), insts)
+		}
+	}
+}
+
+func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
+	var lameSet test.LameInstanceSet
+	drivers["test"] = cloud.DriverFunc(func(params map[string]interface{}, id cloud.InstanceSetID) (cloud.InstanceSet, error) {
+		return &lameSet, nil
+	})
+
+	type instance struct {
+		Instance             string
+		WorkerState          string
+		Price                float64
+		LastContainerUUID    string
+		ArvadosInstanceType  string
+		ProviderInstanceType string
+	}
+	type instancesResponse struct {
+		Items []instance
+	}
+	getInstances := func() instancesResponse {
+		req := httptest.NewRequest("GET", "/arvados/v1/dispatch/instances", nil)
+		resp := httptest.NewRecorder()
+		s.disp.ServeHTTP(resp, req)
+		var sr instancesResponse
+		err := json.Unmarshal(resp.Body.Bytes(), &sr)
+		c.Check(err, check.IsNil)
+		return sr
+	}
+
+	sr := getInstances()
+	c.Check(len(sr.Items), check.Equals, 0)
+
+	ch := s.disp.pool.Subscribe()
+	defer s.disp.pool.Unsubscribe(ch)
+	err := s.disp.pool.Create(arvados.InstanceType{
+		Name:         "a1.small-1",
+		ProviderType: "a1.small",
+		VCPUs:        1,
+		RAM:          1 << 30,
+		Price:        0.12,
+	})
+	c.Check(err, check.IsNil)
+	<-ch
+
+	sr = getInstances()
+	c.Assert(len(sr.Items), check.Equals, 1)
+	c.Check(sr.Items[0].Instance, check.Matches, "lame-.*")
+	c.Check(sr.Items[0].WorkerState, check.Equals, "booting")
+	c.Check(sr.Items[0].Price, check.Equals, 0.12)
+	c.Check(sr.Items[0].LastContainerUUID, check.Equals, "")
+	c.Check(sr.Items[0].ProviderInstanceType, check.Equals, "a1.small")
+	c.Check(sr.Items[0].ArvadosInstanceType, check.Equals, "a1.small-1")
+}
diff --git a/lib/dispatchcloud/driver.go b/lib/dispatchcloud/driver.go
new file mode 100644
index 000000000..295fd6105
--- /dev/null
+++ b/lib/dispatchcloud/driver.go
@@ -0,0 +1,22 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"fmt"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+var drivers = map[string]cloud.Driver{}
+
+func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID) (cloud.InstanceSet, error) {
+	driver, ok := drivers[cluster.CloudVMs.Driver]
+	if !ok {
+		return nil, fmt.Errorf("unsupported cloud driver %q", cluster.CloudVMs.Driver)
+	}
+	return driver.InstanceSet(cluster.CloudVMs.DriverParameters, setID)
+}
diff --git a/lib/dispatchcloud/instance_set_proxy.go b/lib/dispatchcloud/instance_set_proxy.go
new file mode 100644
index 000000000..e728b67cd
--- /dev/null
+++ b/lib/dispatchcloud/instance_set_proxy.go
@@ -0,0 +1,25 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"golang.org/x/crypto/ssh"
+)
+
+type instanceSetProxy struct {
+	cloud.InstanceSet
+}
+
+func (is *instanceSetProxy) Create(it arvados.InstanceType, id cloud.ImageID, tags cloud.InstanceTags, pk ssh.PublicKey) (cloud.Instance, error) {
+	// TODO: return if Create failed recently with a RateLimitError or QuotaError
+	return is.InstanceSet.Create(it, id, tags, pk)
+}
+
+func (is *instanceSetProxy) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
+	// TODO: return if Instances failed recently with a RateLimitError
+	return is.InstanceSet.Instances(tags)
+}
diff --git a/lib/dispatchcloud/logger.go b/lib/dispatchcloud/logger.go
new file mode 100644
index 000000000..90bb6ca68
--- /dev/null
+++ b/lib/dispatchcloud/logger.go
@@ -0,0 +1,29 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+	"sync"
+	"time"
+)
+
+type logger interface {
+	Printf(string, ...interface{})
+	Warnf(string, ...interface{})
+	Debugf(string, ...interface{})
+}
+
+var nextSpam = map[string]time.Time{}
+var nextSpamMtx sync.Mutex
+
+func unspam(msg string) bool {
+	nextSpamMtx.Lock()
+	defer nextSpamMtx.Unlock()
+	if nextSpam[msg].Before(time.Now()) {
+		nextSpam[msg] = time.Now().Add(time.Minute)
+		return true
+	}
+	return false
+}
diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go
index 1c36d6cf5..3bada3baf 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/lib/dispatchcloud/node_size.go
@@ -15,10 +15,9 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 )
 
-var (
-	ErrInstanceTypesNotConfigured = errors.New("site configuration does not list any instance types")
-	discountConfiguredRAMPercent  = 5
-)
+var ErrInstanceTypesNotConfigured = errors.New("site configuration does not list any instance types")
+
+var discountConfiguredRAMPercent = 5
 
 // ConstraintsNotSatisfiableError includes a list of available instance types
 // to be reported back to the user.
diff --git a/lib/dispatchcloud/readme.go b/lib/dispatchcloud/readme.go
new file mode 100644
index 000000000..a4b005eb8
--- /dev/null
+++ b/lib/dispatchcloud/readme.go
@@ -0,0 +1,79 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+// A dispatcher comprises a container queue, a scheduler, a worker
+// pool, a cloud provider, a stale-lock fixer, and a syncer.
+// 1. Choose a provider.
+// 2. Start a worker pool.
+// 3. Start a container queue.
+// 4. Run a stale-lock fixer.
+// 5. Start a scheduler.
+// 6. Start a syncer.
+//
+//
+// A provider (cloud driver) creates new cloud VM instances and gets
+// the latest list of instances. The returned instances implement
+// proxies to the provider's metadata and control interfaces (get IP
+// address, update tags, shutdown).
+//
+//
+// A workerPool tracks workers' instance types and readiness states
+// (available to do work now, booting, suffering a temporary network
+// outage, shutting down). It loads internal state from the cloud
+// provider's list of instances at startup, and syncs periodically
+// after that.
+//
+//
+// A worker maintains a multiplexed SSH connection to a cloud
+// instance, retrying/reconnecting as needed, so the workerPool can
+// execute commands. It asks the provider's instance to verify its SSH
+// public key once when first connecting, and again later if the key
+// changes.
+//
+//
+// A container queue tracks the known state (according to
+// arvados-controller) of each container of interest -- i.e., queued,
+// or locked/running using our own dispatch token. It also proxies the
+// dispatcher's lock/unlock/cancel requests to the controller. It
+// handles concurrent refresh and update operations without exposing
+// out-of-order updates to its callers. (It drops any new information
+// that might have originated before its own most recent
+// lock/unlock/cancel operation.)
+//
+//
+// A stale-lock fixer waits for any already-locked containers (i.e.,
+// locked by a prior server process) to appear on workers as the
+// worker pool recovers its state. It unlocks/requeues any that still
+// remain when all workers are recovered or shutdown, or its timer
+// expires.
+//
+//
+// A scheduler chooses which containers to assign to which idle
+// workers, and decides what to do when there are not enough idle
+// workers (including shutting down some idle nodes).
+//
+//
+// A syncer updates state to Cancelled when a running container
+// process dies without finalizing its entry in the controller
+// database. It also calls the worker pool to kill containers that
+// have priority=0 while locked or running.
+//
+//
+// A provider proxy wraps a provider with rate-limiting logic. After
+// the wrapped provider receives a cloud.RateLimitError, the proxy
+// starts returning errors to callers immediately without calling
+// through to the wrapped provider.
+//
+//
+// TBD: Bootstrapping script via SSH, too? Future version.
+//
+// TBD: drain instance, keep instance alive
+// TBD: metrics, diagnostics
+// TBD: why dispatch token currently passed to worker?
+//
+// Metrics: queue size, time job has been in queued, #idle/busy/booting nodes
+// Timing in each step, and end-to-end
+// Metrics: boot/idle/alloc time and cost
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
new file mode 100644
index 000000000..e9644aed2
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -0,0 +1,57 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+)
+
+// FixStaleLocks waits for any already-locked containers (i.e., locked
+// by a prior dispatcher process) to appear on workers as the worker
+// pool recovers its state. It unlocks any that still remain when all
+// workers are recovered or shutdown, or its timer expires.
+func FixStaleLocks(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, limit time.Duration) {
+	wp := pool.Subscribe()
+	defer pool.Unsubscribe(wp)
+	timeout := time.NewTimer(limit)
+waiting:
+	for {
+		unlock := false
+		select {
+		case <-wp:
+			// If all workers have been contacted, unlock
+			// containers that aren't claimed by any
+			// worker.
+			unlock = pool.Workers()[worker.StateUnknown] == 0
+		case <-timeout.C:
+			// Give up and unlock the containers, even
+			// though they might be working.
+			unlock = true
+		}
+
+		running := pool.Running()
+		qEntries, _ := queue.Entries()
+		for uuid, ent := range qEntries {
+			if ent.Container.State != arvados.ContainerStateLocked {
+				continue
+			}
+			if _, running := running[uuid]; running {
+				continue
+			}
+			if !unlock {
+				continue waiting
+			}
+			err := queue.Unlock(uuid)
+			if err != nil {
+				logger.Warnf("Unlock %s: %s", uuid, err)
+			}
+		}
+		return
+	}
+}
diff --git a/lib/dispatchcloud/scheduler/gocheck_test.go b/lib/dispatchcloud/scheduler/gocheck_test.go
new file mode 100644
index 000000000..558c60f73
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/gocheck_test.go
@@ -0,0 +1,16 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+	"testing"
+
+	check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}
diff --git a/lib/dispatchcloud/scheduler/interfaces.go b/lib/dispatchcloud/scheduler/interfaces.go
new file mode 100644
index 000000000..bdb8678e9
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/interfaces.go
@@ -0,0 +1,40 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// A ContainerQueue is a set of containers that need to be started or
+// stopped. Implemented by container.Queue and test stubs.
+type ContainerQueue interface {
+	Entries() (entries map[string]container.QueueEnt, updated time.Time)
+	Lock(uuid string) error
+	Unlock(uuid string) error
+	Cancel(uuid string) error
+	Forget(uuid string)
+	Get(uuid string) (arvados.Container, bool)
+}
+
+// A WorkerPool asynchronously starts and stops worker VMs, and starts
+// and stops containers on them. Implemented by worker.Pool and test
+// stubs.
+type WorkerPool interface {
+	Running() map[string]time.Time
+	Unallocated() map[arvados.InstanceType]int
+	Workers() map[worker.State]int
+	AtQuota() bool
+	Create(arvados.InstanceType) error
+	Shutdown(arvados.InstanceType) bool
+	StartContainer(arvados.InstanceType, arvados.Container) bool
+	KillContainer(uuid string)
+	Subscribe() <-chan struct{}
+	Unsubscribe(<-chan struct{})
+}
diff --git a/lib/dispatchcloud/scheduler/map.go b/lib/dispatchcloud/scheduler/map.go
new file mode 100644
index 000000000..5742c1f47
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/map.go
@@ -0,0 +1,144 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package scheduler uses a resizable worker pool to execute
+// containers in priority order.
+//
+// Scheduler functions must not be called concurrently using the same
+// queue or pool.
+package scheduler
+
+import (
+	"sort"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+)
+
+// Map maps queued containers onto unallocated workers in priority
+// order, creating new workers if needed. It locks containers that can
+// be mapped onto existing/pending workers, and starts them if
+// possible.
+//
+// Map unlocks any containers that are locked but can't be
+// mapped. (For example, this happens when the cloud provider reaches
+// quota/capacity and a previously mappable container's priority is
+// surpassed by a newer container.)
+//
+// If it encounters errors while creating new workers, Map shuts down
+// idle workers, in case they are consuming quota.
+//
+// Map should not be called without first calling FixStaleLocks.
+func Map(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
+	unsorted, _ := queue.Entries()
+	sorted := make([]container.QueueEnt, 0, len(unsorted))
+	for _, ent := range unsorted {
+		sorted = append(sorted, ent)
+	}
+	sort.Slice(sorted, func(i, j int) bool {
+		return sorted[i].Container.Priority > sorted[j].Container.Priority
+	})
+
+	running := pool.Running()
+	unalloc := pool.Unallocated()
+
+	logger.WithFields(logrus.Fields{
+		"Containers": len(sorted),
+		"Processes":  len(running),
+	}).Debug("mapping")
+
+	dontstart := map[arvados.InstanceType]bool{}
+	var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota
+
+	for i, ctr := range sorted {
+		ctr, it := ctr.Container, ctr.InstanceType
+		logger := logger.WithFields(logrus.Fields{
+			"ContainerUUID": ctr.UUID,
+			"InstanceType":  it.Name,
+		})
+		if _, running := running[ctr.UUID]; running || ctr.Priority < 1 {
+			continue
+		}
+		if ctr.State == arvados.ContainerStateQueued {
+			logger.Debugf("locking")
+			if unalloc[it] < 1 && pool.AtQuota() {
+				overquota = sorted[i:]
+				break
+			}
+			err := queue.Lock(ctr.UUID)
+			if err != nil {
+				logger.WithError(err).Warnf("lock error")
+				unalloc[it]++
+				continue
+			}
+			var ok bool
+			ctr, ok = queue.Get(ctr.UUID)
+			if !ok {
+				logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
+				continue
+			}
+			if ctr.State != arvados.ContainerStateLocked {
+				logger.Debugf("(race?) container has state=%q after Lock succeeded", ctr.State)
+			}
+		}
+		if ctr.State != arvados.ContainerStateLocked {
+			continue
+		}
+		if unalloc[it] < 1 {
+			logger.Info("creating new instance")
+			err := pool.Create(it)
+			if err != nil {
+				if _, ok := err.(cloud.QuotaError); !ok {
+					logger.WithError(err).Warn("error creating worker")
+				}
+				queue.Unlock(ctr.UUID)
+				// Don't let lower-priority containers
+				// starve this one by using keeping
+				// idle workers alive on different
+				// instance types.  TODO: avoid
+				// getting starved here if instances
+				// of a specific type always fail.
+				overquota = sorted[i:]
+				break
+			}
+			unalloc[it]++
+		}
+		if dontstart[it] {
+			// We already tried & failed to start a
+			// higher-priority container on the same
+			// instance type. Don't let this one sneak in
+			// ahead of it.
+		} else if pool.StartContainer(it, ctr) {
+			unalloc[it]--
+		} else {
+			dontstart[it] = true
+		}
+	}
+
+	if len(overquota) > 0 {
+		// Unlock any containers that are unmappable while
+		// we're at quota.
+		for _, ctr := range overquota {
+			ctr := ctr.Container
+			if ctr.State == arvados.ContainerStateLocked {
+				logger := logger.WithField("ContainerUUID", ctr.UUID)
+				logger.Debug("unlock because pool capacity is used by higher priority containers")
+				err := queue.Unlock(ctr.UUID)
+				if err != nil {
+					logger.WithError(err).Warn("error unlocking")
+				}
+			}
+		}
+		// Shut down idle workers that didn't get any
+		// containers mapped onto them before we hit quota.
+		for it, n := range unalloc {
+			if n < 1 {
+				continue
+			}
+			pool.Shutdown(it)
+		}
+	}
+}
diff --git a/lib/dispatchcloud/scheduler/map_test.go b/lib/dispatchcloud/scheduler/map_test.go
new file mode 100644
index 000000000..c40b3041b
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/map_test.go
@@ -0,0 +1,259 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+	"errors"
+	"fmt"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+	check "gopkg.in/check.v1"
+)
+
+var (
+	logger = logrus.StandardLogger()
+
+	// arbitrary example instance types
+	types = func() (r []arvados.InstanceType) {
+		for i := 0; i < 16; i++ {
+			r = append(r, test.InstanceType(i))
+		}
+		return
+	}()
+
+	// arbitrary example container UUIDs
+	uuids = func() (r []string) {
+		for i := 0; i < 16; i++ {
+			r = append(r, test.ContainerUUID(i))
+		}
+		return
+	}()
+)
+
+type stubQueue struct {
+	ents map[string]container.QueueEnt
+}
+
+func (q *stubQueue) Entries() map[string]container.QueueEnt {
+	return q.ents
+}
+func (q *stubQueue) Lock(uuid string) error {
+	return q.setState(uuid, arvados.ContainerStateLocked)
+}
+func (q *stubQueue) Unlock(uuid string) error {
+	return q.setState(uuid, arvados.ContainerStateQueued)
+}
+func (q *stubQueue) Get(uuid string) (arvados.Container, bool) {
+	ent, ok := q.ents[uuid]
+	return ent.Container, ok
+}
+func (q *stubQueue) setState(uuid string, state arvados.ContainerState) error {
+	ent, ok := q.ents[uuid]
+	if !ok {
+		return fmt.Errorf("no such ent: %q", uuid)
+	}
+	ent.Container.State = state
+	q.ents[uuid] = ent
+	return nil
+}
+
+type stubQuotaError struct {
+	error
+}
+
+func (stubQuotaError) IsQuotaError() bool { return true }
+
+type stubPool struct {
+	notify    <-chan struct{}
+	unalloc   map[arvados.InstanceType]int // idle+booting+unknown
+	idle      map[arvados.InstanceType]int
+	running   map[string]bool
+	atQuota   bool
+	canCreate int
+	creates   []arvados.InstanceType
+	starts    []string
+	shutdowns int
+}
+
+func (p *stubPool) AtQuota() bool               { return p.atQuota }
+func (p *stubPool) Subscribe() <-chan struct{}  { return p.notify }
+func (p *stubPool) Unsubscribe(<-chan struct{}) {}
+func (p *stubPool) Running() map[string]bool    { return p.running }
+func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
+	r := map[arvados.InstanceType]int{}
+	for it, n := range p.unalloc {
+		r[it] = n
+	}
+	return r
+}
+func (p *stubPool) Create(it arvados.InstanceType) error {
+	p.creates = append(p.creates, it)
+	if p.canCreate < 1 {
+		return stubQuotaError{errors.New("quota")}
+	}
+	p.canCreate--
+	p.unalloc[it]++
+	return nil
+}
+func (p *stubPool) Shutdown(arvados.InstanceType) bool {
+	p.shutdowns++
+	return false
+}
+func (p *stubPool) Workers() map[worker.State]int {
+	return map[worker.State]int{
+		worker.StateBooting: len(p.unalloc) - len(p.idle),
+		worker.StateRunning: len(p.idle) - len(p.running),
+	}
+}
+func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
+	p.starts = append(p.starts, ctr.UUID)
+	if p.idle[it] == 0 {
+		return false
+	}
+	p.idle[it]--
+	p.unalloc[it]--
+	p.running[ctr.UUID] = true
+	return true
+}
+
+var _ = check.Suite(&SchedulerSuite{})
+
+type SchedulerSuite struct{}
+
+// Map priority=4 container to idle node. Create a new instance for
+// the priority=3 container. Don't try to start any priority<3
+// containers because priority=3 container didn't start
+// immediately. Don't try to create any other nodes after the failed
+// create.
+func (*SchedulerSuite) TestMapIdle(c *check.C) {
+	queue := stubQueue{
+		ents: map[string]container.QueueEnt{
+			uuids[1]: {
+				Container:    arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[2]: {
+				Container:    arvados.Container{UUID: uuids[2], Priority: 2, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[3]: {
+				Container:    arvados.Container{UUID: uuids[3], Priority: 3, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[4]: {
+				Container:    arvados.Container{UUID: uuids[4], Priority: 4, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+		},
+	}
+	pool := stubPool{
+		unalloc: map[arvados.InstanceType]int{
+			types[1]: 1,
+			types[2]: 2,
+		},
+		idle: map[arvados.InstanceType]int{
+			types[1]: 1,
+			types[2]: 2,
+		},
+		running:   map[string]bool{},
+		canCreate: 1,
+	}
+	Map(logger, &queue, &pool)
+	c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{types[1]})
+	c.Check(pool.starts, check.DeepEquals, []string{uuids[4], uuids[3]})
+	c.Check(pool.running, check.DeepEquals, map[string]bool{uuids[4]: true})
+}
+
+// Shutdown some nodes if Create() fails -- and without even calling
+// Create(), if AtQuota() is true.
+func (*SchedulerSuite) TestMapShutdownAtQuota(c *check.C) {
+	for quota := 0; quota < 2; quota++ {
+		shouldCreate := types[1 : 1+quota]
+		queue := stubQueue{
+			ents: map[string]container.QueueEnt{
+				uuids[1]: {
+					Container:    arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
+					InstanceType: types[1],
+				},
+			},
+		}
+		pool := stubPool{
+			atQuota: quota == 0,
+			unalloc: map[arvados.InstanceType]int{
+				types[2]: 2,
+			},
+			idle: map[arvados.InstanceType]int{
+				types[2]: 2,
+			},
+			running:   map[string]bool{},
+			creates:   []arvados.InstanceType{},
+			starts:    []string{},
+			canCreate: 0,
+		}
+		Map(logger, &queue, &pool)
+		c.Check(pool.creates, check.DeepEquals, shouldCreate)
+		c.Check(pool.starts, check.DeepEquals, []string{})
+		c.Check(pool.shutdowns, check.Not(check.Equals), 0)
+	}
+}
+
+// Start lower-priority containers while waiting for new/existing
+// workers to come up for higher-priority containers.
+func (*SchedulerSuite) TestMapStartWhileCreating(c *check.C) {
+	pool := stubPool{
+		unalloc: map[arvados.InstanceType]int{
+			types[1]: 1,
+			types[2]: 1,
+		},
+		idle: map[arvados.InstanceType]int{
+			types[1]: 1,
+			types[2]: 1,
+		},
+		running:   map[string]bool{},
+		canCreate: 2,
+	}
+	queue := stubQueue{
+		ents: map[string]container.QueueEnt{
+			uuids[1]: {
+				// create a new worker
+				Container:    arvados.Container{UUID: uuids[1], Priority: 1, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[2]: {
+				// tentatively map to unalloc worker
+				Container:    arvados.Container{UUID: uuids[2], Priority: 2, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[3]: {
+				// start now on idle worker
+				Container:    arvados.Container{UUID: uuids[3], Priority: 3, State: arvados.ContainerStateQueued},
+				InstanceType: types[1],
+			},
+			uuids[4]: {
+				// create a new worker
+				Container:    arvados.Container{UUID: uuids[4], Priority: 4, State: arvados.ContainerStateQueued},
+				InstanceType: types[2],
+			},
+			uuids[5]: {
+				// tentatively map to unalloc worker
+				Container:    arvados.Container{UUID: uuids[5], Priority: 5, State: arvados.ContainerStateQueued},
+				InstanceType: types[2],
+			},
+			uuids[6]: {
+				// start now on idle worker
+				Container:    arvados.Container{UUID: uuids[6], Priority: 6, State: arvados.ContainerStateQueued},
+				InstanceType: types[2],
+			},
+		},
+	}
+	Map(logger, &queue, &pool)
+	c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{types[2], types[1]})
+	c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
+	c.Check(pool.running, check.DeepEquals, map[string]bool{uuids[3]: true, uuids[6]: true})
+}
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
new file mode 100644
index 000000000..00e2a30f7
--- /dev/null
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -0,0 +1,85 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+)
+
+// Sync resolves discrepancies between the queue and the pool:
+//
+// Lingering crunch-run processes for finalized and unlocked/requeued
+// containers are killed.
+//
+// Locked containers whose crunch-run processes have exited are
+// requeued.
+//
+// Running containers whose crunch-run processes have exited are
+// cancelled.
+//
+// Sync must not be called concurrently with other calls to Map or
+// Sync using the same queue or pool.
+func Sync(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool) {
+	running := pool.Running()
+	cancel := func(ent container.QueueEnt, reason string) {
+		uuid := ent.Container.UUID
+		logger := logger.WithField("ContainerUUID", uuid)
+		logger.Infof("cancelling container because %s", reason)
+		err := queue.Cancel(uuid)
+		if err != nil {
+			logger.WithError(err).Print("error cancelling container")
+		}
+	}
+	kill := func(ent container.QueueEnt) {
+		uuid := ent.Container.UUID
+		logger := logger.WithField("ContainerUUID", uuid)
+		logger.Debugf("killing crunch-run process because state=%q", ent.Container.State)
+		pool.KillContainer(uuid)
+	}
+	qEntries, qUpdated := queue.Entries()
+	for uuid, ent := range qEntries {
+		exited, running := running[uuid]
+		switch ent.Container.State {
+		case arvados.ContainerStateRunning:
+			if !running {
+				cancel(ent, "not running on any worker")
+			} else if !exited.IsZero() && qUpdated.After(exited) {
+				cancel(ent, "state=\"Running\" after crunch-run exited")
+			}
+		case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
+			if running {
+				kill(ent)
+			} else {
+				logger.WithFields(logrus.Fields{
+					"ContainerUUID": uuid,
+					"State":         ent.Container.State,
+				}).Info("container finished")
+				queue.Forget(uuid)
+			}
+		case arvados.ContainerStateQueued:
+			if running {
+				kill(ent)
+			}
+		case arvados.ContainerStateLocked:
+			if running && !exited.IsZero() && qUpdated.After(exited) {
+				logger = logger.WithFields(logrus.Fields{
+					"ContainerUUID": uuid,
+					"Exited":        time.Since(exited).Seconds(),
+				})
+				logger.Infof("requeueing container because state=%q after crunch-run exited", ent.Container.State)
+				err := queue.Unlock(uuid)
+				if err != nil {
+					logger.WithError(err).Info("error requeueing container")
+				}
+			}
+		default:
+			logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
+		}
+	}
+}
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
new file mode 100644
index 000000000..804ae6f15
--- /dev/null
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -0,0 +1,172 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package ssh_executor provides an implementation of pool.Executor
+// using a long-lived multiplexed SSH session.
+package ssh_executor
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"net"
+	"sync"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"golang.org/x/crypto/ssh"
+)
+
+// New returns a new Executor, using the given target.
+func New(t cloud.ExecutorTarget) *Executor {
+	return &Executor{target: t}
+}
+
+// An Executor uses a multiplexed SSH connection to execute shell
+// commands on a remote target. It reconnects automatically after
+// errors.
+//
+// When setting up a connection, the Executor accepts whatever host
+// key is provided by the remote server, then passes the received key
+// and the SSH connection to the target's VerifyHostKey method before
+// executing commands on the connection.
+//
+// A zero Executor must not be used before calling SetTarget.
+//
+// An Executor must not be copied.
+type Executor struct {
+	target  cloud.ExecutorTarget
+	signers []ssh.Signer
+	mtx     sync.RWMutex // controls access to instance after creation
+
+	client      *ssh.Client
+	clientErr   error
+	clientOnce  sync.Once     // initialized private state
+	clientSetup chan bool     // len>0 while client setup is in progress
+	hostKey     ssh.PublicKey // most recent host key that passed verification, if any
+}
+
+// SetSigners updates the set of private keys that will be offered to
+// the target next time the Executor sets up a new connection.
+func (exr *Executor) SetSigners(signers ...ssh.Signer) {
+	exr.mtx.Lock()
+	defer exr.mtx.Unlock()
+	exr.signers = signers
+}
+
+// SetTarget sets the current target. The new target will be used next
+// time a new connection is set up; until then, the Executor will
+// continue to use the existing target.
+//
+// The new target is assumed to represent the same host as the
+// previous target, although its address and host key might differ.
+func (exr *Executor) SetTarget(t cloud.ExecutorTarget) {
+	exr.mtx.Lock()
+	defer exr.mtx.Unlock()
+	exr.target = t
+}
+
+// Target returns the current target.
+func (exr *Executor) Target() cloud.ExecutorTarget {
+	exr.mtx.RLock()
+	defer exr.mtx.RUnlock()
+	return exr.target
+}
+
+// Execute runs cmd on the target. If an existing connection is not
+// usable, it sets up a new connection to the current target.
+func (exr *Executor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
+	session, err := exr.newSession()
+	if err != nil {
+		return nil, nil, err
+	}
+	defer session.Close()
+	var stdout, stderr bytes.Buffer
+	session.Stdin = stdin
+	session.Stdout = &stdout
+	session.Stderr = &stderr
+	err = session.Run(cmd)
+	return stdout.Bytes(), stderr.Bytes(), err
+}
+
+// Create a new SSH session. If session setup fails or the SSH client
+// hasn't been setup yet, setup a new SSH client and try again.
+func (exr *Executor) newSession() (*ssh.Session, error) {
+	try := func(create bool) (*ssh.Session, error) {
+		client, err := exr.sshClient(create)
+		if err != nil {
+			return nil, err
+		}
+		return client.NewSession()
+	}
+	session, err := try(false)
+	if err != nil {
+		session, err = try(true)
+	}
+	return session, err
+}
+
+// Get the latest SSH client. If another goroutine is in the process
+// of setting one up, wait for it to finish and return its result (or
+// the last successfully setup client, if it fails).
+func (exr *Executor) sshClient(create bool) (*ssh.Client, error) {
+	exr.clientOnce.Do(func() {
+		exr.clientSetup = make(chan bool, 1)
+		exr.clientErr = errors.New("client not yet created")
+	})
+	defer func() { <-exr.clientSetup }()
+	select {
+	case exr.clientSetup <- true:
+		if create {
+			client, err := exr.setupSSHClient()
+			if err == nil || exr.client == nil {
+				exr.client, exr.clientErr = client, err
+			}
+			if err != nil {
+				return nil, err
+			}
+		}
+	default:
+		// Another goroutine is doing the above case.  Wait
+		// for it to finish and return whatever it leaves in
+		// wkr.client.
+		exr.clientSetup <- true
+	}
+	return exr.client, exr.clientErr
+}
+
+// Create a new SSH client.
+func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
+	target := exr.Target()
+	addr := target.Address()
+	if addr == "" {
+		return nil, errors.New("instance has no address")
+	}
+	var receivedKey ssh.PublicKey
+	client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
+		User: "root",
+		Auth: []ssh.AuthMethod{
+			ssh.PublicKeys(exr.signers...),
+		},
+		HostKeyCallback: func(hostname string, remote net.Addr, key ssh.PublicKey) error {
+			receivedKey = key
+			return nil
+		},
+		Timeout: time.Minute,
+	})
+	if err != nil {
+		return nil, err
+	} else if receivedKey == nil {
+		return nil, errors.New("BUG: key was never provided to HostKeyCallback")
+	}
+
+	if exr.hostKey == nil || !bytes.Equal(exr.hostKey.Marshal(), receivedKey.Marshal()) {
+		err = target.VerifyHostKey(receivedKey, client)
+		if err != nil {
+			return nil, err
+		}
+		exr.hostKey = receivedKey
+	}
+	return client, nil
+}
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
new file mode 100644
index 000000000..8dabfecad
--- /dev/null
+++ b/lib/dispatchcloud/ssh_executor/executor_test.go
@@ -0,0 +1,102 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package ssh_executor
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+	"sync"
+	"testing"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+	"golang.org/x/crypto/ssh"
+	check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}
+
+var _ = check.Suite(&ExecutorSuite{})
+
+type testTarget struct {
+	test.SSHService
+}
+
+func (*testTarget) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
+	return nil
+}
+
+type ExecutorSuite struct{}
+
+func (s *ExecutorSuite) TestExecute(c *check.C) {
+	command := `foo 'bar' "baz"`
+	stdinData := "foobar\nbaz\n"
+	_, hostpriv := test.LoadTestKey(c, "../test/sshkey_vm")
+	clientpub, clientpriv := test.LoadTestKey(c, "../test/sshkey_dispatch")
+	for _, exitcode := range []int{0, 1, 2} {
+		srv := &testTarget{
+			SSHService: test.SSHService{
+				Exec: func(cmd string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+					c.Check(cmd, check.Equals, command)
+					var wg sync.WaitGroup
+					wg.Add(2)
+					go func() {
+						io.WriteString(stdout, "stdout\n")
+						wg.Done()
+					}()
+					go func() {
+						io.WriteString(stderr, "stderr\n")
+						wg.Done()
+					}()
+					buf, err := ioutil.ReadAll(stdin)
+					wg.Wait()
+					c.Check(err, check.IsNil)
+					if err != nil {
+						return 99
+					}
+					_, err = stdout.Write(buf)
+					c.Check(err, check.IsNil)
+					return uint32(exitcode)
+				},
+				HostKey:        hostpriv,
+				AuthorizedKeys: []ssh.PublicKey{clientpub},
+			},
+		}
+		err := srv.Start()
+		c.Check(err, check.IsNil)
+		c.Logf("srv address %q", srv.Address())
+		defer srv.Close()
+
+		exr := New(srv)
+		exr.SetSigners(clientpriv)
+
+		done := make(chan bool)
+		go func() {
+			stdout, stderr, err := exr.Execute(command, bytes.NewBufferString(stdinData))
+			if exitcode == 0 {
+				c.Check(err, check.IsNil)
+			} else {
+				c.Check(err, check.NotNil)
+				err, ok := err.(*ssh.ExitError)
+				c.Assert(ok, check.Equals, true)
+				c.Check(err.ExitStatus(), check.Equals, exitcode)
+			}
+			c.Check(stdout, check.DeepEquals, []byte("stdout\n"+stdinData))
+			c.Check(stderr, check.DeepEquals, []byte("stderr\n"))
+			close(done)
+		}()
+
+		timeout := time.NewTimer(time.Second)
+		select {
+		case <-done:
+		case <-timeout.C:
+			c.Fatal("timed out")
+		}
+	}
+}
diff --git a/lib/dispatchcloud/test/doc.go b/lib/dispatchcloud/test/doc.go
new file mode 100644
index 000000000..12f3b16b2
--- /dev/null
+++ b/lib/dispatchcloud/test/doc.go
@@ -0,0 +1,7 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package test provides fakes and other tools for testing cloud
+// drivers and other dispatcher modules.
+package test
diff --git a/lib/dispatchcloud/test/fixtures.go b/lib/dispatchcloud/test/fixtures.go
new file mode 100644
index 000000000..7d65ca057
--- /dev/null
+++ b/lib/dispatchcloud/test/fixtures.go
@@ -0,0 +1,27 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+	"fmt"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// ContainerUUID returns a fake container UUID.
+func ContainerUUID(i int) string {
+	return fmt.Sprintf("zzzzz-dz642-%015d", i)
+}
+
+// InstanceType returns a fake arvados.InstanceType called "type{i}"
+// with i CPUs and i GiB of memory.
+func InstanceType(i int) arvados.InstanceType {
+	return arvados.InstanceType{
+		Name:         fmt.Sprintf("type%d", i),
+		ProviderType: fmt.Sprintf("providertype%d", i),
+		VCPUs:        i,
+		RAM:          arvados.ByteSize(i) << 30,
+	}
+}
diff --git a/lib/dispatchcloud/test/lame_instance_set.go b/lib/dispatchcloud/test/lame_instance_set.go
new file mode 100644
index 000000000..baab407a7
--- /dev/null
+++ b/lib/dispatchcloud/test/lame_instance_set.go
@@ -0,0 +1,118 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+	"fmt"
+	"math/rand"
+	"sync"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"golang.org/x/crypto/ssh"
+)
+
+// LameInstanceSet creates instances that boot but can't run
+// containers.
+type LameInstanceSet struct {
+	Hold chan bool // set to make(chan bool) to hold operations until Release is called
+
+	mtx       sync.Mutex
+	instances map[*lameInstance]bool
+}
+
+// Create returns a new instance.
+func (p *LameInstanceSet) Create(instType arvados.InstanceType, imageID cloud.ImageID, tags cloud.InstanceTags, pubkey ssh.PublicKey) (cloud.Instance, error) {
+	inst := &lameInstance{
+		p:            p,
+		id:           cloud.InstanceID(fmt.Sprintf("lame-%x", rand.Uint64())),
+		providerType: instType.ProviderType,
+	}
+	inst.SetTags(tags)
+	if p.Hold != nil {
+		p.Hold <- true
+	}
+	p.mtx.Lock()
+	defer p.mtx.Unlock()
+	if p.instances == nil {
+		p.instances = map[*lameInstance]bool{}
+	}
+	p.instances[inst] = true
+	return inst, nil
+}
+
+// Instances returns the instances that haven't been destroyed.
+func (p *LameInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
+	p.mtx.Lock()
+	defer p.mtx.Unlock()
+	var instances []cloud.Instance
+	for i := range p.instances {
+		instances = append(instances, i)
+	}
+	return instances, nil
+}
+
+// Stop is a no-op, but exists to satisfy cloud.InstanceSet.
+func (p *LameInstanceSet) Stop() {
+}
+
+// Release n held calls. Blocks if n calls aren't already
+// waiting. Blocks forever if Hold is nil.
+func (p *LameInstanceSet) Release(n int) {
+	for i := 0; i < n; i++ {
+		<-p.Hold
+	}
+}
+
+type lameInstance struct {
+	p            *LameInstanceSet
+	id           cloud.InstanceID
+	providerType string
+	tags         cloud.InstanceTags
+}
+
+func (inst *lameInstance) ID() cloud.InstanceID {
+	return inst.id
+}
+
+func (inst *lameInstance) String() string {
+	return fmt.Sprint(inst.id)
+}
+
+func (inst *lameInstance) ProviderType() string {
+	return inst.providerType
+}
+
+func (inst *lameInstance) Address() string {
+	return "0.0.0.0:1234"
+}
+
+func (inst *lameInstance) SetTags(tags cloud.InstanceTags) error {
+	inst.p.mtx.Lock()
+	defer inst.p.mtx.Unlock()
+	inst.tags = cloud.InstanceTags{}
+	for k, v := range tags {
+		inst.tags[k] = v
+	}
+	return nil
+}
+
+func (inst *lameInstance) Destroy() error {
+	if inst.p.Hold != nil {
+		inst.p.Hold <- true
+	}
+	inst.p.mtx.Lock()
+	defer inst.p.mtx.Unlock()
+	delete(inst.p.instances, inst)
+	return nil
+}
+
+func (inst *lameInstance) Tags() cloud.InstanceTags {
+	return inst.tags
+}
+
+func (inst *lameInstance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
+	return nil
+}
diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go
new file mode 100644
index 000000000..909f56114
--- /dev/null
+++ b/lib/dispatchcloud/test/queue.go
@@ -0,0 +1,116 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+	"fmt"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// Queue is a test stub for container.Queue. The caller specifies the
+// initial queue state.
+type Queue struct {
+	// Containers represent the API server database contents.
+	Containers []arvados.Container
+
+	// ChooseType will be called for each entry in Containers. It
+	// must not be nil.
+	ChooseType func(*arvados.Container) (arvados.InstanceType, error)
+
+	entries map[string]container.QueueEnt
+	updTime time.Time
+}
+
+// Entries returns the containers that were queued when Update was
+// last called.
+func (q *Queue) Entries() (map[string]container.QueueEnt, time.Time) {
+	updTime := q.updTime
+	r := map[string]container.QueueEnt{}
+	for uuid, ent := range q.entries {
+		r[uuid] = ent
+	}
+	return r, updTime
+}
+
+// Get returns the container from the cached queue, i.e., as it was
+// when Update was last called -- just like a container.Queue does. If
+// the state has been changed (via Lock, Unlock, or Cancel) since the
+// last Update, the updated state is returned.
+func (q *Queue) Get(uuid string) (arvados.Container, bool) {
+	ent, ok := q.entries[uuid]
+	return ent.Container, ok
+}
+
+func (q *Queue) Forget(uuid string) {
+	delete(q.entries, uuid)
+}
+
+func (q *Queue) Lock(uuid string) error {
+	return q.changeState(uuid, arvados.ContainerStateQueued, arvados.ContainerStateLocked)
+}
+
+func (q *Queue) Unlock(uuid string) error {
+	return q.changeState(uuid, arvados.ContainerStateLocked, arvados.ContainerStateQueued)
+}
+
+func (q *Queue) Cancel(uuid string) error {
+	return q.changeState(uuid, q.entries[uuid].Container.State, arvados.ContainerStateCancelled)
+}
+
+func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
+	ent := q.entries[uuid]
+	if ent.Container.State != from {
+		return fmt.Errorf("lock failed: state=%q", ent.Container.State)
+	}
+	ent.Container.State = to
+	q.entries[uuid] = ent
+	for i, ctr := range q.Containers {
+		if ctr.UUID == uuid {
+			q.Containers[i].State = to
+			break
+		}
+	}
+	return nil
+}
+
+// Update rebuilds the current entries from the Containers slice.
+func (q *Queue) Update() error {
+	updTime := time.Now()
+	upd := map[string]container.QueueEnt{}
+	for _, ctr := range q.Containers {
+		_, exists := q.entries[ctr.UUID]
+		if !exists && (ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled) {
+			continue
+		}
+		it, _ := q.ChooseType(&ctr)
+		upd[ctr.UUID] = container.QueueEnt{
+			Container:    ctr,
+			InstanceType: it,
+		}
+	}
+	q.entries = upd
+	q.updTime = updTime
+	return nil
+}
+
+// Notify adds/updates an entry in the Containers slice.  This
+// simulates the effect of an API update from someone other than the
+// dispatcher -- e.g., crunch-run updating state to "Complete" when a
+// container exits.
+//
+// The resulting changes are not exposed through Get() or Entries()
+// until the next call to Update().
+func (q *Queue) Notify(upd arvados.Container) {
+	for i, ctr := range q.Containers {
+		if ctr.UUID == upd.UUID {
+			q.Containers[i] = upd
+			return
+		}
+	}
+	q.Containers = append(q.Containers, upd)
+}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
new file mode 100644
index 000000000..b1e4e03b1
--- /dev/null
+++ b/lib/dispatchcloud/test/ssh_service.go
@@ -0,0 +1,169 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net"
+	"strings"
+	"sync"
+
+	"golang.org/x/crypto/ssh"
+	check "gopkg.in/check.v1"
+)
+
+func LoadTestKey(c *check.C, fnm string) (ssh.PublicKey, ssh.Signer) {
+	rawpubkey, err := ioutil.ReadFile(fnm + ".pub")
+	c.Assert(err, check.IsNil)
+	pubkey, _, _, _, err := ssh.ParseAuthorizedKey(rawpubkey)
+	c.Assert(err, check.IsNil)
+	rawprivkey, err := ioutil.ReadFile(fnm)
+	c.Assert(err, check.IsNil)
+	privkey, err := ssh.ParsePrivateKey(rawprivkey)
+	c.Assert(err, check.IsNil)
+	return pubkey, privkey
+}
+
+// An SSHExecFunc handles an "exec" session on a multiplexed SSH
+// connection.
+type SSHExecFunc func(command string, stdin io.Reader, stdout, stderr io.Writer) uint32
+
+// An SSHService accepts SSH connections on an available TCP port and
+// passes clients' "exec" sessions to the provided SSHExecFunc.
+type SSHService struct {
+	Exec           SSHExecFunc
+	HostKey        ssh.Signer
+	AuthorizedKeys []ssh.PublicKey
+
+	listener net.Listener
+	conn     *ssh.ServerConn
+	setup    sync.Once
+	mtx      sync.Mutex
+	started  chan bool
+	closed   bool
+	err      error
+}
+
+// Address returns the host:port where the SSH server is listening. It
+// returns "" if called before the server is ready to accept
+// connections.
+func (ss *SSHService) Address() string {
+	ss.setup.Do(ss.start)
+	ss.mtx.Lock()
+	ln := ss.listener
+	ss.mtx.Unlock()
+	if ln == nil {
+		return ""
+	}
+	return ln.Addr().String()
+}
+
+// Close shuts down the server and releases resources. Established
+// connections are unaffected.
+func (ss *SSHService) Close() {
+	ss.Start()
+	ss.mtx.Lock()
+	ln := ss.listener
+	ss.closed = true
+	ss.mtx.Unlock()
+	if ln != nil {
+		ln.Close()
+	}
+}
+
+// Start returns when the server is ready to accept connections.
+func (ss *SSHService) Start() error {
+	ss.setup.Do(ss.start)
+	<-ss.started
+	return ss.err
+}
+
+func (ss *SSHService) start() {
+	ss.started = make(chan bool)
+	go ss.run()
+}
+
+func (ss *SSHService) run() {
+	defer close(ss.started)
+	config := &ssh.ServerConfig{
+		PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
+			for _, ak := range ss.AuthorizedKeys {
+				if bytes.Equal(ak.Marshal(), pubKey.Marshal()) {
+					return &ssh.Permissions{}, nil
+				}
+			}
+			return nil, fmt.Errorf("unknown public key for %q", c.User())
+		},
+	}
+	config.AddHostKey(ss.HostKey)
+
+	listener, err := net.Listen("tcp", ":")
+	if err != nil {
+		ss.err = err
+		return
+	}
+
+	ss.mtx.Lock()
+	ss.listener = listener
+	ss.mtx.Unlock()
+
+	go func() {
+		for {
+			nConn, err := listener.Accept()
+			if err != nil && strings.Contains(err.Error(), "use of closed network connection") && ss.closed {
+				return
+			} else if err != nil {
+				log.Printf("accept: %s", err)
+				return
+			}
+			go ss.serveConn(nConn, config)
+		}
+	}()
+}
+
+func (ss *SSHService) serveConn(nConn net.Conn, config *ssh.ServerConfig) {
+	defer nConn.Close()
+	conn, newchans, reqs, err := ssh.NewServerConn(nConn, config)
+	if err != nil {
+		log.Printf("ssh.NewServerConn: %s", err)
+		return
+	}
+	defer conn.Close()
+	go ssh.DiscardRequests(reqs)
+	for newch := range newchans {
+		if newch.ChannelType() != "session" {
+			newch.Reject(ssh.UnknownChannelType, "unknown channel type")
+			continue
+		}
+		ch, reqs, err := newch.Accept()
+		if err != nil {
+			log.Printf("accept channel: %s", err)
+			return
+		}
+		var execReq struct {
+			Command string
+		}
+		go func() {
+			for req := range reqs {
+				if req.Type == "exec" && execReq.Command == "" {
+					req.Reply(true, nil)
+					ssh.Unmarshal(req.Payload, &execReq)
+					go func() {
+						var resp struct {
+							Status uint32
+						}
+						resp.Status = ss.Exec(execReq.Command, ch, ch, ch.Stderr())
+						ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
+						ch.Close()
+					}()
+				}
+			}
+		}()
+	}
+}
diff --git a/lib/dispatchcloud/test/sshkey_dispatch b/lib/dispatchcloud/test/sshkey_dispatch
new file mode 100644
index 000000000..5584519c7
--- /dev/null
+++ b/lib/dispatchcloud/test/sshkey_dispatch
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEowIBAAKCAQEAqYm4XsQHm8sBSZFwUX5VeW1OkGsfoNzcGPG2nzzYRhNhClYZ
+0ABHhUk82HkaC/8l6d/jpYTf42HrK42nNQ0r0Yzs7qw8yZMQioK4Yk+kFyVLF78E
+GRG4pGAWXFs6pUchs/lm8fo9zcda4R3XeqgI+NO+nEERXmdRJa1FhI+Za3/S/+CV
+mg+6O00wZz2+vKmDPptGN4MCKmQOCKsMJts7wSZGyVcTtdNv7jjfr6yPAIOIL8X7
+LtarBCFaK/pD7uWll/Uj7h7D8K48nIZUrvBJJjXL8Sm4LxCNoz3Z83k8J5ZzuDRD
+gRiQe/C085mhO6VL+2fypDLwcKt1tOL8fI81MwIDAQABAoIBACR3tEnmHsDbNOav
+Oxq8cwRQh9K2yDHg8BMJgz/TZa4FIx2HEbxVIw0/iLADtJ+Z/XzGJQCIiWQuvtg6
+exoFQESt7JUWRWkSkj9JCQJUoTY9Vl7APtBpqG7rIEQzd3TvzQcagZNRQZQO6rR7
+p8sBdBSZ72lK8cJ9tM3G7Kor/VNK7KgRZFNhEWnmvEa3qMd4hzDcQ4faOn7C9NZK
+dwJAuJVVfwOLlOORYcyEkvksLaDOK2DsB/p0AaCpfSmThRbBKN5fPXYaKgUdfp3w
+70Hpp27WWymb1cgjyqSH3DY+V/kvid+5QxgxCBRq865jPLn3FFT9bWEVS/0wvJRj
+iMIRrjECgYEA4Ffv9rBJXqVXonNQbbstd2PaprJDXMUy9/UmfHL6pkq1xdBeuM7v
+yf2ocXheA8AahHtIOhtgKqwv/aRhVK0ErYtiSvIk+tXG+dAtj/1ZAKbKiFyxjkZV
+X72BH7cTlR6As5SRRfWM/HaBGEgED391gKsI5PyMdqWWdczT5KfxAksCgYEAwXYE
+ewPmV1GaR5fbh2RupoPnUJPMj36gJCnwls7sGaXDQIpdlq56zfKgrLocGXGgj+8f
+QH7FHTJQO15YCYebtsXWwB3++iG43gVlJlecPAydsap2CCshqNWC5JU5pan0QzsP
+exzNzWqfUPSbTkR2SRaN+MenZo2Y/WqScOAth7kCgYBgVoLujW9EXH5QfXJpXLq+
+jTvE38I7oVcs0bJwOLPYGzcJtlwmwn6IYAwohgbhV2pLv+EZSs42JPEK278MLKxY
+lgVkp60npgunFTWroqDIvdc1TZDVxvA8h9VeODEJlSqxczgbMcIUXBM9yRctTI+5
+7DiKlMUA4kTFW2sWwuOlFwKBgGXvrYS0FVbFJKm8lmvMu5D5x5RpjEu/yNnFT4Pn
+G/iXoz4Kqi2PWh3STl804UF24cd1k94D7hDoReZCW9kJnz67F+C67XMW+bXi2d1O
+JIBvlVfcHb1IHMA9YG7ZQjrMRmx2Xj3ce4RVPgUGHh8ra7gvLjd72/Tpf0doNClN
+ti/hAoGBAMW5D3LhU05LXWmOqpeT4VDgqk4MrTBcstVe7KdVjwzHrVHCAmI927vI
+pjpphWzpC9m3x4OsTNf8m+g6H7f3IiQS0aiFNtduXYlcuT5FHS2fSATTzg5PBon9
+1E6BudOve+WyFyBs7hFWAqWFBdWujAl4Qk5Ek09U2ilFEPE7RTgJ
+-----END RSA PRIVATE KEY-----
diff --git a/lib/dispatchcloud/test/sshkey_dispatch.pub b/lib/dispatchcloud/test/sshkey_dispatch.pub
new file mode 100644
index 000000000..1d5c1ea1b
--- /dev/null
+++ b/lib/dispatchcloud/test/sshkey_dispatch.pub
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpibhexAebywFJkXBRflV5bU6Qax+g3NwY8bafPNhGE2EKVhnQAEeFSTzYeRoL/yXp3+OlhN/jYesrjac1DSvRjOzurDzJkxCKgrhiT6QXJUsXvwQZEbikYBZcWzqlRyGz+Wbx+j3Nx1rhHdd6qAj4076cQRFeZ1ElrUWEj5lrf9L/4JWaD7o7TTBnPb68qYM+m0Y3gwIqZA4Iqwwm2zvBJkbJVxO102/uON+vrI8Ag4gvxfsu1qsEIVor+kPu5aWX9SPuHsPwrjychlSu8EkmNcvxKbgvEI2jPdnzeTwnlnO4NEOBGJB78LTzmaE7pUv7Z/KkMvBwq3W04vx8jzUz tom at curve
diff --git a/lib/dispatchcloud/test/sshkey_vm b/lib/dispatchcloud/test/sshkey_vm
new file mode 100644
index 000000000..10b7ed1bc
--- /dev/null
+++ b/lib/dispatchcloud/test/sshkey_vm
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEpQIBAAKCAQEApIfWk2StZGDtmunumIeXLJ46AQrbHHvuxrSAkQf6+zUwjB2I
+rse7ezBRHWcge9U5EsigixmhUM4ozFLnUQNwC862jbmsjbyA97arG/REECNlUrEB
+HQPYHhai5yyJ89AfjWVxKyINfW0K2HX1R8nl4kdVraAgpohPLh0dGjfwzm/BcXDG
++TxW9zRz0KCs9ZRI6s2MNdv08ahKQ0azk8gRTqMADJmYNWIo3zPQ+fhlwyr6EZJ/
+HFbRtjpajEPMJPwoVPO+Wj6wztfHDYKkPIrIWbhMl6w+tEKdsmygd3Iq94ktLS3X
+AbRCfn4njS2QSlkKFEepkUJWCSSWZgFn6DLm2wIDAQABAoIBAQCb137LxcTnG1h0
+L7isCWKMBKN0cU/xvwIAfOB6f1CfuVXuodrhkpZmrPFoJFKEeQbCX/6RQwmlfGDw
+iGZKOjNbO8V2oLRs3GxcNk4FAG2ny58hoD8puIZwmYhb57gTlMMOL1PuQyb78tkf
+Bzv5b6ermV3yQ4Ypt1solrMGLo6NOZD0oDX9p0Zt9kueIhjzgP0v5//T1F4PGHZK
++sLSsMiu9u6F+PB+Oc6uv0Zee9Lnts/QiWH5f18oEculjwKWFx+JwJWiLffGg2Bl
+vbpmvHFRoRWkHTpgSiLwSUqs0ZUWU9R5h11ROg5L39MLsxQoBvHsPEnP5ssN8jGt
+aH86EZjBAoGBAM+A5B/UjhIn9m05EhDTDRzI92hGhM8f7uAwobbnjvIQyZbWlBwj
+2TmgbJdpTGVbD+iTBIwKQdcFBbWobTCZsNMpghqA/ir4YIAnZ5OX9VQ1Bc+bWE7V
+dPmMVpCgyg+ERAe+79FrYWcI3vhnBpHCsY/9p9pGQIKDzlGTWNF1HJGjAoGBAMr7
+2CTVnFImTgD3E+rH4AAAfkz+cyqfK6BUhli/NifFYZhWCs16r9QCGSORnp4gPhMY
+3mf7VBs9rk123zOMo89eJt3adTgbZ+QIxXeXilGXpbT3w1+CJMaZRrIy80E1tB5/
+KvDZcrZ78o8XWMNUa+9k55ukvgyC24ICAmOIWNlpAoGBALEFvphBF2r52MtZUsYz
+pw4VjKvS7V5eWcW891k4tsRf+frK2NQg6SK2b63EUT5ur2W0dr6ZyY2MZVCSfYRm
+uWmMEchWn389IeZyt3Q8wTize1+foXivtflm9jqwUXFnXzpUc/du6kuiT8YO7pXP
+SPgUZ+xY3pP5qjwBvlYC2PqNAoGAZ1CKMi1bdGC0wT8BLzXuqHGX136HhcEgRmnf
+O5qPaOzJAO2CcBWrGuC6hOUgc+F7VuMIiKpeo8LgTeNcNfO2iNymMbN4iEdCuMlS
+IM3MBD2IhTS6h4lJSKBJYHgYYi+AbylQ5Of4wDMUQYqjjkAQ8/dK/2h5pwqPyXtW
+VezXNEkCgYEAq4S0++y9tjlLn+w9BIkmx3bAVRDQZIzIEwxTh+jpqaUp1J0iyseJ
+71pwqQojGNF6x8GglVXa6bMrETae21WhEeHnWmzlpCWIODsYPUQ+erjDuAWi9eGk
+HLklqSEoLB8pzC6zDqjxDw+CnGERIDSaoaeoWiNKZ95IH1WiEwYjuxU=
+-----END RSA PRIVATE KEY-----
diff --git a/lib/dispatchcloud/test/sshkey_vm.pub b/lib/dispatchcloud/test/sshkey_vm.pub
new file mode 100644
index 000000000..b9d44c946
--- /dev/null
+++ b/lib/dispatchcloud/test/sshkey_vm.pub
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCkh9aTZK1kYO2a6e6Yh5csnjoBCtsce+7GtICRB/r7NTCMHYiux7t7MFEdZyB71TkSyKCLGaFQzijMUudRA3ALzraNuayNvID3tqsb9EQQI2VSsQEdA9geFqLnLInz0B+NZXErIg19bQrYdfVHyeXiR1WtoCCmiE8uHR0aN/DOb8FxcMb5PFb3NHPQoKz1lEjqzYw12/TxqEpDRrOTyBFOowAMmZg1YijfM9D5+GXDKvoRkn8cVtG2OlqMQ8wk/ChU875aPrDO18cNgqQ8ishZuEyXrD60Qp2ybKB3cir3iS0tLdcBtEJ+fieNLZBKWQoUR6mRQlYJJJZmAWfoMubb tom at curve
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
new file mode 100644
index 000000000..53e312ea2
--- /dev/null
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -0,0 +1,195 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"io"
+	math_rand "math/rand"
+	"sync"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/mitchellh/mapstructure"
+	"golang.org/x/crypto/ssh"
+)
+
+type StubExecFunc func(instance cloud.Instance, command string, stdin io.Reader, stdout, stderr io.Writer) uint32
+
+// A StubDriver implements cloud.Driver by setting up local SSH
+// servers that pass their command execution requests to the provided
+// SSHExecFunc.
+type StubDriver struct {
+	Exec           StubExecFunc
+	HostKey        ssh.Signer
+	AuthorizedKeys []ssh.PublicKey
+	instanceSets   []*StubInstanceSet
+}
+
+// InstanceSet returns a new *StubInstanceSet.
+func (sd *StubDriver) InstanceSet(params map[string]interface{}, id cloud.InstanceSetID) (cloud.InstanceSet, error) {
+	sis := StubInstanceSet{
+		driver:  sd,
+		servers: map[cloud.InstanceID]*stubServer{},
+	}
+	sd.instanceSets = append(sd.instanceSets, &sis)
+	return &sis, mapstructure.Decode(params, &sis)
+}
+
+// InstanceSets returns all instances that have been created by the
+// driver. This can be used to test a component that uses the driver
+// but doesn't expose the InstanceSets it has created.
+func (sd *StubDriver) InstanceSets() []*StubInstanceSet {
+	return sd.instanceSets
+}
+
+type StubInstanceSet struct {
+	driver  *StubDriver
+	servers map[cloud.InstanceID]*stubServer
+	mtx     sync.RWMutex
+	stopped bool
+}
+
+func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, authKey ssh.PublicKey) (cloud.Instance, error) {
+	sis.mtx.Lock()
+	defer sis.mtx.Unlock()
+	if sis.stopped {
+		return nil, errors.New("StubInstanceSet: Create called after Stop")
+	}
+	ak := sis.driver.AuthorizedKeys
+	if authKey != nil {
+		ak = append([]ssh.PublicKey{authKey}, ak...)
+	}
+	var ss *stubServer
+	ss = &stubServer{
+		sis:          sis,
+		id:           cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())),
+		tags:         copyTags(tags),
+		providerType: it.ProviderType,
+		SSHService: SSHService{
+			HostKey:        sis.driver.HostKey,
+			AuthorizedKeys: ak,
+			Exec: func(command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+				return sis.driver.Exec(ss.Instance(), command, stdin, stdout, stderr)
+			},
+		},
+	}
+
+	sis.servers[ss.id] = ss
+	return ss.Instance(), nil
+}
+
+func (sis *StubInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
+	sis.mtx.RLock()
+	defer sis.mtx.RUnlock()
+	var r []cloud.Instance
+	for _, ss := range sis.servers {
+		r = append(r, ss.Instance())
+	}
+	return r, nil
+}
+
+func (sis *StubInstanceSet) Stop() {
+	sis.mtx.Lock()
+	defer sis.mtx.Unlock()
+	if sis.stopped {
+		panic("Stop called twice")
+	}
+	sis.stopped = true
+}
+
+type stubServer struct {
+	sis          *StubInstanceSet
+	id           cloud.InstanceID
+	tags         cloud.InstanceTags
+	providerType string
+	SSHService   SSHService
+	sync.Mutex
+}
+
+func (ss *stubServer) Instance() stubInstance {
+	ss.Lock()
+	defer ss.Unlock()
+	return stubInstance{
+		ss:   ss,
+		addr: ss.SSHService.Address(),
+		// We deliberately return a cached/stale copy of the
+		// real tags here, so that (Instance)Tags() sometimes
+		// returns old data after a call to
+		// (Instance)SetTags().  This is permitted by the
+		// driver interface, and this might help remind
+		// callers that they need to tolerate it.
+		tags: copyTags(ss.tags),
+	}
+}
+
+type stubInstance struct {
+	ss   *stubServer
+	addr string
+	tags cloud.InstanceTags
+}
+
+func (si stubInstance) ID() cloud.InstanceID {
+	return si.ss.id
+}
+
+func (si stubInstance) Address() string {
+	return si.addr
+}
+
+func (si stubInstance) Destroy() error {
+	si.ss.SSHService.Close()
+	sis := si.ss.sis
+	sis.mtx.Lock()
+	defer sis.mtx.Unlock()
+	delete(sis.servers, si.ss.id)
+	return nil
+}
+
+func (si stubInstance) ProviderType() string {
+	return si.ss.providerType
+}
+
+func (si stubInstance) SetTags(tags cloud.InstanceTags) error {
+	tags = copyTags(tags)
+	ss := si.ss
+	go func() {
+		ss.Lock()
+		defer ss.Unlock()
+		ss.tags = tags
+	}()
+	return nil
+}
+
+func (si stubInstance) Tags() cloud.InstanceTags {
+	return si.tags
+}
+
+func (si stubInstance) String() string {
+	return string(si.ss.id)
+}
+
+func (si stubInstance) VerifyHostKey(key ssh.PublicKey, client *ssh.Client) error {
+	buf := make([]byte, 512)
+	_, err := io.ReadFull(rand.Reader, buf)
+	if err != nil {
+		return err
+	}
+	sig, err := si.ss.sis.driver.HostKey.Sign(rand.Reader, buf)
+	if err != nil {
+		return err
+	}
+	return key.Verify(buf, sig)
+}
+
+func copyTags(src cloud.InstanceTags) cloud.InstanceTags {
+	dst := cloud.InstanceTags{}
+	for k, v := range src {
+		dst[k] = v
+	}
+	return dst
+}
diff --git a/lib/dispatchcloud/worker/gocheck_test.go b/lib/dispatchcloud/worker/gocheck_test.go
new file mode 100644
index 000000000..b4ca66c97
--- /dev/null
+++ b/lib/dispatchcloud/worker/gocheck_test.go
@@ -0,0 +1,16 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+	"testing"
+
+	check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
new file mode 100644
index 000000000..cf8fac380
--- /dev/null
+++ b/lib/dispatchcloud/worker/pool.go
@@ -0,0 +1,852 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+	"bytes"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+// A View shows a worker's current state and recent activity.
+type View struct {
+	Instance             string
+	Price                float64
+	ArvadosInstanceType  string
+	ProviderInstanceType string
+	LastContainerUUID    string
+	Unallocated          time.Time
+	WorkerState          string
+}
+
+// An Executor executes shell commands on a remote host.
+type Executor interface {
+	// Run cmd on the current target.
+	Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
+
+	// Use the given target for subsequent operations. The new
+	// target is the same host as the previous target, but it
+	// might return a different address and verify a different
+	// host key.
+	//
+	// SetTarget is called frequently, and in most cases the new
+	// target will behave exactly the same as the old one. An
+	// implementation should optimize accordingly.
+	//
+	// SetTarget must not block on concurrent Execute calls.
+	SetTarget(cloud.ExecutorTarget)
+}
+
+const (
+	defaultSyncInterval       = time.Minute
+	defaultProbeInterval      = time.Second * 10
+	defaultMaxProbesPerSecond = 10
+	defaultTimeoutIdle        = time.Minute
+	defaultTimeoutBooting     = time.Minute * 10
+	defaultTimeoutProbe       = time.Minute * 10
+)
+
+func duration(conf arvados.Duration, def time.Duration) time.Duration {
+	if conf > 0 {
+		return time.Duration(conf)
+	} else {
+		return def
+	}
+}
+
+// NewPool creates a Pool of workers backed by instanceSet.
+//
+// New instances are configured and set up according to the given
+// cluster configuration.
+func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+	wp := &Pool{
+		logger:             logger,
+		instanceSet:        instanceSet,
+		newExecutor:        newExecutor,
+		bootProbeCommand:   cluster.CloudVMs.BootProbeCommand,
+		imageID:            cloud.ImageID(cluster.CloudVMs.ImageID),
+		instanceTypes:      cluster.InstanceTypes,
+		maxProbesPerSecond: cluster.Dispatch.MaxProbesPerSecond,
+		probeInterval:      duration(cluster.Dispatch.ProbeInterval, defaultProbeInterval),
+		syncInterval:       duration(cluster.CloudVMs.SyncInterval, defaultSyncInterval),
+		timeoutIdle:        duration(cluster.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
+		timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
+		timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
+	}
+	wp.registerMetrics(reg)
+	go wp.run()
+	return wp
+}
+
+// Pool is a resizable worker pool backed by a cloud.InstanceSet. A
+// zero Pool should not be used. Call NewPool to create a new Pool.
+type Pool struct {
+	// configuration
+	logger             logrus.FieldLogger
+	instanceSet        cloud.InstanceSet
+	newExecutor        func(cloud.Instance) Executor
+	bootProbeCommand   string
+	imageID            cloud.ImageID
+	instanceTypes      map[string]arvados.InstanceType
+	syncInterval       time.Duration
+	probeInterval      time.Duration
+	maxProbesPerSecond int
+	timeoutIdle        time.Duration
+	timeoutBooting     time.Duration
+	timeoutProbe       time.Duration
+
+	// private state
+	subscribers  map[<-chan struct{}]chan<- struct{}
+	creating     map[arvados.InstanceType]int // goroutines waiting for (InstanceSet)Create to return
+	workers      map[cloud.InstanceID]*worker
+	loaded       bool                 // loaded list of instances from InstanceSet at least once
+	exited       map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
+	atQuotaUntil time.Time
+	stop         chan bool
+	mtx          sync.RWMutex
+	setupOnce    sync.Once
+
+	mInstances         prometheus.Gauge
+	mContainersRunning prometheus.Gauge
+	mVCPUs             prometheus.Gauge
+	mVCPUsInuse        prometheus.Gauge
+	mMemory            prometheus.Gauge
+	mMemoryInuse       prometheus.Gauge
+}
+
+type worker struct {
+	state       State
+	instance    cloud.Instance
+	executor    Executor
+	instType    arvados.InstanceType
+	vcpus       int64
+	memory      int64
+	booted      bool
+	probed      time.Time
+	updated     time.Time
+	busy        time.Time
+	unallocated time.Time
+	lastUUID    string
+	running     map[string]struct{}
+	starting    map[string]struct{}
+	probing     chan struct{}
+}
+
+// Subscribe returns a channel that becomes ready whenever a worker's
+// state changes.
+//
+// Example:
+//
+//	ch := wp.Subscribe()
+//	defer wp.Unsubscribe(ch)
+//	for range ch {
+//		// ...try scheduling some work...
+//		if done {
+//			break
+//		}
+//	}
+func (wp *Pool) Subscribe() <-chan struct{} {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	ch := make(chan struct{}, 1)
+	wp.subscribers[ch] = ch
+	return ch
+}
+
+// Unsubscribe stops sending updates to the given channel.
+func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	delete(wp.subscribers, ch)
+}
+
+// Unallocated returns the number of unallocated (creating + booting +
+// idle + unknown) workers for each instance type.
+func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.RLock()
+	defer wp.mtx.RUnlock()
+	u := map[arvados.InstanceType]int{}
+	for it, c := range wp.creating {
+		u[it] = c
+	}
+	for _, wkr := range wp.workers {
+		if len(wkr.running)+len(wkr.starting) == 0 && (wkr.state == StateRunning || wkr.state == StateBooting || wkr.state == StateUnknown) {
+			u[wkr.instType]++
+		}
+	}
+	return u
+}
+
+// Create a new instance with the given type, and add it to the worker
+// pool. The worker is added immediately; instance creation runs in
+// the background.
+func (wp *Pool) Create(it arvados.InstanceType) error {
+	logger := wp.logger.WithField("InstanceType", it.Name)
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	tags := cloud.InstanceTags{"InstanceType": it.Name}
+	wp.creating[it]++
+	go func() {
+		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+		wp.mtx.Lock()
+		defer wp.mtx.Unlock()
+		wp.creating[it]--
+		if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
+			wp.atQuotaUntil = time.Now().Add(time.Minute)
+		}
+		if err != nil {
+			logger.WithError(err).Error("create failed")
+			go wp.notify()
+			return
+		}
+		wp.updateWorker(inst, it, StateBooting)
+	}()
+	return nil
+}
+
+// AtQuota returns true if Create is not expected to work at the
+// moment.
+func (wp *Pool) AtQuota() bool {
+	return time.Now().Before(wp.atQuotaUntil)
+}
+
+// Add or update worker attached to the given instance. Use
+// initialState if a new worker is created. Caller must have lock.
+func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) {
+	id := inst.ID()
+	if wp.workers[id] != nil {
+		wp.workers[id].executor.SetTarget(inst)
+		wp.workers[id].instance = inst
+		wp.workers[id].updated = time.Now()
+		if initialState == StateBooting && wp.workers[id].state == StateUnknown {
+			wp.workers[id].state = StateBooting
+		}
+		return
+	}
+	if initialState == StateUnknown && inst.Tags()["hold"] != "" {
+		initialState = StateHold
+	}
+	wp.logger.WithFields(logrus.Fields{
+		"InstanceType": it.Name,
+		"Instance":     inst,
+		"State":        initialState,
+	}).Infof("instance appeared in cloud")
+	wp.workers[id] = &worker{
+		executor:    wp.newExecutor(inst),
+		state:       initialState,
+		instance:    inst,
+		instType:    it,
+		probed:      time.Now(),
+		busy:        time.Now(),
+		updated:     time.Now(),
+		unallocated: time.Now(),
+		running:     make(map[string]struct{}),
+		starting:    make(map[string]struct{}),
+		probing:     make(chan struct{}, 1),
+	}
+	go wp.notify()
+}
+
+// Shutdown shuts down a worker with the given type, or returns false
+// if all workers with the given type are busy.
+func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	logger := wp.logger.WithField("InstanceType", it.Name)
+	logger.Info("shutdown requested")
+	for _, tryState := range []State{StateBooting, StateRunning} {
+		// TODO: shutdown the worker with the longest idle
+		// time (Running) or the earliest create time
+		// (Booting)
+		for _, wkr := range wp.workers {
+			if wkr.state != tryState || len(wkr.running)+len(wkr.starting) > 0 {
+				continue
+			}
+			if wkr.instType != it {
+				continue
+			}
+			logger = logger.WithField("Instance", wkr.instance)
+			logger.Info("shutting down")
+			wp.shutdown(wkr, logger)
+			return true
+		}
+	}
+	return false
+}
+
+// caller must have lock
+func (wp *Pool) shutdown(wkr *worker, logger logrus.FieldLogger) {
+	wkr.updated = time.Now()
+	wkr.state = StateShutdown
+	go func() {
+		err := wkr.instance.Destroy()
+		if err != nil {
+			logger.WithError(err).Warn("shutdown failed")
+			return
+		}
+		wp.mtx.Lock()
+		wp.atQuotaUntil = time.Now()
+		wp.mtx.Unlock()
+		wp.notify()
+	}()
+}
+
+// Workers returns the current number of workers in each state.
+func (wp *Pool) Workers() map[State]int {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	r := map[State]int{}
+	for _, w := range wp.workers {
+		r[w.state]++
+	}
+	return r
+}
+
+// Running returns the container UUIDs being prepared/run on workers.
+func (wp *Pool) Running() map[string]time.Time {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	r := map[string]time.Time{}
+	for _, wkr := range wp.workers {
+		for uuid := range wkr.running {
+			r[uuid] = time.Time{}
+		}
+		for uuid := range wkr.starting {
+			r[uuid] = time.Time{}
+		}
+	}
+	for uuid, exited := range wp.exited {
+		r[uuid] = exited
+	}
+	return r
+}
+
+// StartContainer starts a container on an idle worker immediately if
+// possible, otherwise returns false.
+func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
+	logger := wp.logger.WithFields(logrus.Fields{
+		"InstanceType":  it.Name,
+		"ContainerUUID": ctr.UUID,
+		"Priority":      ctr.Priority,
+	})
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	var wkr *worker
+	for _, w := range wp.workers {
+		if w.instType == it && w.state == StateRunning && len(w.running)+len(w.starting) == 0 {
+			if wkr == nil || w.busy.After(wkr.busy) {
+				wkr = w
+			}
+		}
+	}
+	if wkr == nil {
+		return false
+	}
+	logger = logger.WithField("Instance", wkr.instance)
+	logger.Debug("starting container")
+	wkr.starting[ctr.UUID] = struct{}{}
+	go func() {
+		stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
+		wp.mtx.Lock()
+		defer wp.mtx.Unlock()
+		wkr.updated = time.Now()
+		delete(wkr.starting, ctr.UUID)
+		wkr.running[ctr.UUID] = struct{}{}
+		if err != nil {
+			logger.WithField("stdout", string(stdout)).
+				WithField("stderr", string(stderr)).
+				WithError(err).
+				Error("error starting crunch-run process")
+			// Leave uuid in wkr.running, though: it's
+			// possible the error was just a communication
+			// failure and the process was in fact
+			// started.  Wait for next probe to find out.
+			return
+		}
+		logger.Info("crunch-run process started")
+		wkr.lastUUID = ctr.UUID
+	}()
+	return true
+}
+
+// KillContainer kills the crunch-run process for the given container
+// UUID, if it's running on any worker.
+func (wp *Pool) KillContainer(uuid string) {
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	if _, ok := wp.exited[uuid]; ok {
+		wp.logger.WithField("ContainerUUID", uuid).Debug("clearing placeholder for exited crunch-run process")
+		delete(wp.exited, uuid)
+		return
+	}
+	for _, wkr := range wp.workers {
+		if _, ok := wkr.running[uuid]; ok {
+			go wp.kill(wkr, uuid)
+			return
+		}
+	}
+	wp.logger.WithField("ContainerUUID", uuid).Debug("cannot kill: already disappeared")
+}
+
+func (wp *Pool) kill(wkr *worker, uuid string) {
+	logger := wp.logger.WithFields(logrus.Fields{
+		"ContainerUUID": uuid,
+		"Instance":      wkr.instance,
+	})
+	logger.Debug("killing process")
+	stdout, stderr, err := wkr.executor.Execute("crunch-run --kill "+uuid, nil)
+	if err != nil {
+		logger.WithFields(logrus.Fields{
+			"stderr": string(stderr),
+			"stdout": string(stdout),
+			"error":  err,
+		}).Warn("kill failed")
+		return
+	}
+	logger.Debug("killing process succeeded")
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	if _, ok := wkr.running[uuid]; ok {
+		delete(wkr.running, uuid)
+		wkr.updated = time.Now()
+		go wp.notify()
+	}
+}
+
+func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
+	if reg == nil {
+		reg = prometheus.NewRegistry()
+	}
+	wp.mInstances = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "instances_total",
+		Help:      "Number of cloud VMs including pending, booting, running, held, and shutting down.",
+	})
+	reg.MustRegister(wp.mInstances)
+	wp.mContainersRunning = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "containers_running",
+		Help:      "Number of containers reported running by cloud VMs.",
+	})
+	reg.MustRegister(wp.mContainersRunning)
+
+	wp.mVCPUs = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "vcpus_total",
+		Help:      "Total VCPUs on all cloud VMs.",
+	})
+	reg.MustRegister(wp.mVCPUs)
+	wp.mVCPUsInuse = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "vcpus_inuse",
+		Help:      "VCPUs on cloud VMs that are running containers.",
+	})
+	reg.MustRegister(wp.mVCPUsInuse)
+	wp.mMemory = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "memory_bytes_total",
+		Help:      "Total memory on all cloud VMs.",
+	})
+	reg.MustRegister(wp.mMemory)
+	wp.mMemoryInuse = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "memory_bytes_inuse",
+		Help:      "Memory on cloud VMs that are running containers.",
+	})
+	reg.MustRegister(wp.mMemoryInuse)
+}
+
+func (wp *Pool) updateMetrics() {
+	wp.mtx.RLock()
+	defer wp.mtx.RUnlock()
+
+	var alloc, cpu, cpuInuse, mem, memInuse int64
+	for _, wkr := range wp.workers {
+		cpu += int64(wkr.instType.VCPUs)
+		mem += int64(wkr.instType.RAM)
+		if len(wkr.running)+len(wkr.starting) == 0 {
+			continue
+		}
+		alloc += int64(len(wkr.running) + len(wkr.starting))
+		cpuInuse += int64(wkr.instType.VCPUs)
+		memInuse += int64(wkr.instType.RAM)
+	}
+	wp.mInstances.Set(float64(len(wp.workers)))
+	wp.mContainersRunning.Set(float64(alloc))
+	wp.mVCPUs.Set(float64(cpu))
+	wp.mMemory.Set(float64(mem))
+	wp.mVCPUsInuse.Set(float64(cpuInuse))
+	wp.mMemoryInuse.Set(float64(memInuse))
+}
+
+func (wp *Pool) run() {
+	wp.setupOnce.Do(wp.setup)
+
+	go func() {
+		ch := wp.Subscribe()
+		defer wp.Unsubscribe(ch)
+		for range ch {
+			wp.updateMetrics()
+		}
+	}()
+
+	go func() {
+		maxPPS := wp.maxProbesPerSecond
+		if maxPPS < 1 {
+			maxPPS = defaultMaxProbesPerSecond
+		}
+		limitticker := time.NewTicker(time.Second / time.Duration(maxPPS))
+		defer limitticker.Stop()
+
+		probeticker := time.NewTicker(wp.probeInterval)
+		defer probeticker.Stop()
+
+		workers := []cloud.InstanceID{}
+		for range probeticker.C {
+			workers = workers[:0]
+			wp.mtx.Lock()
+			for id, wkr := range wp.workers {
+				if wkr.state == StateShutdown || wp.autoShutdown(wkr) {
+					continue
+				}
+				workers = append(workers, id)
+			}
+			wp.mtx.Unlock()
+
+			for _, id := range workers {
+				wp.mtx.Lock()
+				wkr, ok := wp.workers[id]
+				wp.mtx.Unlock()
+				if !ok || wkr.state == StateShutdown {
+					// Deleted/shutdown while we
+					// were probing others
+					continue
+				}
+				select {
+				case wkr.probing <- struct{}{}:
+					go func() {
+						wp.probeAndUpdate(wkr)
+						<-wkr.probing
+					}()
+				default:
+					wp.logger.WithField("Instance", wkr.instance).Debug("still waiting for last probe to finish")
+				}
+				select {
+				case <-wp.stop:
+					return
+				case <-limitticker.C:
+				}
+			}
+		}
+	}()
+
+	timer := time.NewTimer(time.Nanosecond)
+	for {
+		err := wp.getInstancesAndSync()
+		if err != nil {
+			wp.logger.WithError(err).Warn("sync failed")
+		}
+
+		// Reset timer to desired interval, and ignore the
+		// tick that might have already arrived.
+		timer.Stop()
+		select {
+		case <-timer.C:
+		default:
+		}
+		timer.Reset(wp.syncInterval)
+
+		select {
+		case <-timer.C:
+		case <-wp.stop:
+			wp.logger.Debug("worker.Pool stopped")
+			return
+		}
+	}
+}
+
+// caller must have lock.
+func (wp *Pool) autoShutdown(wkr *worker) bool {
+	if len(wkr.running)+len(wkr.starting) > 0 || wkr.state != StateRunning {
+		return false
+	}
+	age := time.Since(wkr.unallocated)
+	if age < wp.timeoutIdle {
+		return false
+	}
+	logger := wp.logger.WithFields(logrus.Fields{
+		"Age":      age,
+		"Instance": wkr.instance,
+	})
+	logger.Info("shutdown idle worker")
+	wp.shutdown(wkr, logger)
+	return true
+}
+
+// Stop synchronizing with the InstanceSet.
+func (wp *Pool) Stop() {
+	wp.setupOnce.Do(wp.setup)
+	close(wp.stop)
+}
+
+// View reports status information for every worker in the pool.
+func (wp *Pool) View() []View {
+	var r []View
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	for _, w := range wp.workers {
+		r = append(r, View{
+			Instance:             w.instance.String(),
+			Price:                w.instType.Price,
+			ArvadosInstanceType:  w.instType.Name,
+			ProviderInstanceType: w.instType.ProviderType,
+			LastContainerUUID:    w.lastUUID,
+			Unallocated:          w.unallocated,
+			WorkerState:          w.state.String(),
+		})
+	}
+	wp.mtx.Unlock()
+	sort.Slice(r, func(i, j int) bool {
+		return strings.Compare(r[i].Instance, r[j].Instance) < 0
+	})
+	return r
+}
+
+func (wp *Pool) setup() {
+	wp.creating = map[arvados.InstanceType]int{}
+	wp.exited = map[string]time.Time{}
+	wp.workers = map[cloud.InstanceID]*worker{}
+	wp.subscribers = map[<-chan struct{}]chan<- struct{}{}
+}
+
+func (wp *Pool) notify() {
+	wp.mtx.RLock()
+	defer wp.mtx.RUnlock()
+	for _, send := range wp.subscribers {
+		select {
+		case send <- struct{}{}:
+		default:
+		}
+	}
+}
+
+func (wp *Pool) getInstancesAndSync() error {
+	wp.setupOnce.Do(wp.setup)
+	wp.logger.Debug("getting instance list")
+	threshold := time.Now()
+	instances, err := wp.instanceSet.Instances(cloud.InstanceTags{})
+	if err != nil {
+		return err
+	}
+	wp.sync(threshold, instances)
+	wp.logger.Debug("sync done")
+	return nil
+}
+
+// Add/remove/update workers based on instances, which was obtained
+// from the instanceSet. However, don't clobber any other updates that
+// already happened after threshold.
+func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	wp.logger.WithField("Instances", len(instances)).Debug("sync instances")
+
+	for _, inst := range instances {
+		itTag := inst.Tags()["InstanceType"]
+		it, ok := wp.instanceTypes[itTag]
+		if !ok {
+			wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
+			continue
+		}
+		wp.updateWorker(inst, it, StateUnknown)
+	}
+
+	for id, wkr := range wp.workers {
+		if wkr.updated.After(threshold) {
+			continue
+		}
+		logger := wp.logger.WithFields(logrus.Fields{
+			"Instance":    wkr.instance,
+			"WorkerState": wkr.state,
+		})
+		logger.Info("instance disappeared in cloud")
+		delete(wp.workers, id)
+		go wp.notify()
+	}
+
+	if !wp.loaded {
+		wp.loaded = true
+		wp.logger.WithField("N", len(wp.workers)).Info("loaded initial instance list")
+	}
+}
+
+// should be called in a new goroutine
+func (wp *Pool) probeAndUpdate(wkr *worker) {
+	logger := wp.logger.WithField("Instance", wkr.instance)
+	wp.mtx.Lock()
+	updated := wkr.updated
+	booted := wkr.booted
+	wp.mtx.Unlock()
+
+	var (
+		ctrUUIDs []string
+		ok       bool
+		stderr   []byte
+	)
+	if !booted {
+		booted, stderr = wp.probeBooted(wkr)
+		wp.mtx.Lock()
+		if booted && !wkr.booted {
+			wkr.booted = booted
+			logger.Info("instance booted")
+		} else {
+			booted = wkr.booted
+		}
+		wp.mtx.Unlock()
+	}
+	if booted {
+		ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
+	}
+	logger = logger.WithField("stderr", string(stderr))
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
+	if !ok {
+		if wkr.state == StateShutdown {
+			return
+		}
+		dur := time.Since(wkr.probed)
+		logger := logger.WithFields(logrus.Fields{
+			"Duration": dur,
+			"State":    wkr.state,
+		})
+		if wkr.state == StateBooting {
+			logger.Debug("new instance not responding")
+		} else {
+			logger.Info("instance not responding")
+		}
+
+		if wkr.state == StateHold {
+			return
+		}
+
+		label, threshold := "", wp.timeoutProbe
+		if wkr.state == StateBooting {
+			label, threshold = "new ", wp.timeoutBooting
+		}
+		if dur > threshold {
+			logger.WithField("Since", wkr.probed).Warnf("%sinstance unresponsive, shutting down", label)
+			wp.shutdown(wkr, logger)
+		}
+		return
+	}
+
+	updateTime := time.Now()
+	wkr.probed = updateTime
+	if len(ctrUUIDs) > 0 {
+		wkr.busy = updateTime
+		wkr.lastUUID = ctrUUIDs[0]
+	}
+	if wkr.state == StateShutdown || wkr.state == StateHold {
+	} else if booted {
+		if wkr.state != StateRunning {
+			wkr.state = StateRunning
+			go wp.notify()
+		}
+	} else {
+		wkr.state = StateBooting
+	}
+
+	if updated != wkr.updated {
+		// Worker was updated (e.g., by starting a new
+		// container) after the probe began. Avoid clobbering
+		// those changes with the probe results.
+		return
+	}
+
+	if len(ctrUUIDs) == 0 && len(wkr.running) > 0 {
+		wkr.unallocated = updateTime
+	}
+	running := map[string]struct{}{}
+	changed := false
+	for _, uuid := range ctrUUIDs {
+		running[uuid] = struct{}{}
+		if _, ok := wkr.running[uuid]; !ok {
+			changed = true
+		}
+	}
+	for uuid := range wkr.running {
+		if _, ok := running[uuid]; !ok {
+			logger.WithField("ContainerUUID", uuid).Info("crunch-run process ended")
+			wp.exited[uuid] = updateTime
+			changed = true
+		}
+	}
+	if changed {
+		wkr.running = running
+		wkr.updated = updateTime
+		go wp.notify()
+	}
+}
+
+func (wp *Pool) probeRunning(wkr *worker) (running []string, ok bool, stderr []byte) {
+	cmd := "crunch-run --list"
+	stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+	if err != nil {
+		wp.logger.WithFields(logrus.Fields{
+			"Instance": wkr.instance,
+			"Command":  cmd,
+			"stdout":   string(stdout),
+			"stderr":   string(stderr),
+		}).WithError(err).Warn("probe failed")
+		return nil, false, stderr
+	}
+	stdout = bytes.TrimRight(stdout, "\n")
+	if len(stdout) == 0 {
+		return nil, true, stderr
+	}
+	return strings.Split(string(stdout), "\n"), true, stderr
+}
+
+func (wp *Pool) probeBooted(wkr *worker) (ok bool, stderr []byte) {
+	cmd := wp.bootProbeCommand
+	if cmd == "" {
+		cmd = "true"
+	}
+	stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+	logger := wp.logger.WithFields(logrus.Fields{
+		"Instance": wkr.instance,
+		"Command":  cmd,
+		"stdout":   string(stdout),
+		"stderr":   string(stderr),
+	})
+	if err != nil {
+		logger.WithError(err).Debug("boot probe failed")
+		return false, stderr
+	}
+	logger.Info("boot probe succeeded")
+	return true, stderr
+}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
new file mode 100644
index 000000000..cf4bff12d
--- /dev/null
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -0,0 +1,124 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+	"time"
+
+	"git.curoverse.com/arvados.git/lib/cloud"
+	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"github.com/Sirupsen/logrus"
+	check "gopkg.in/check.v1"
+)
+
+const GiB arvados.ByteSize = 1 << 30
+
+var _ = check.Suite(&PoolSuite{})
+
+type PoolSuite struct{}
+
+func (suite *PoolSuite) SetUpSuite(c *check.C) {
+	logrus.StandardLogger().SetLevel(logrus.DebugLevel)
+}
+
+func (suite *PoolSuite) TestStartContainer(c *check.C) {
+	// TODO: use an instanceSet stub with an SSH server
+	c.Fail()
+}
+
+func (suite *PoolSuite) TestVerifyHostKey(c *check.C) {
+	// TODO: use an instanceSet stub with an SSH server
+	c.Fail()
+}
+
+func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
+	lameInstanceSet := &test.LameInstanceSet{Hold: make(chan bool)}
+	type1 := arvados.InstanceType{Name: "a1s", ProviderType: "a1.small", VCPUs: 1, RAM: 1 * GiB, Price: .01}
+	type2 := arvados.InstanceType{Name: "a2m", ProviderType: "a2.medium", VCPUs: 2, RAM: 2 * GiB, Price: .02}
+	pool := &Pool{
+		logger:      logrus.StandardLogger(),
+		newExecutor: func(cloud.Instance) Executor { return &stubExecutor{} },
+		instanceSet: lameInstanceSet,
+		instanceTypes: arvados.InstanceTypeMap{
+			type1.Name: type1,
+			type2.Name: type2,
+		},
+	}
+	notify := pool.Subscribe()
+	defer pool.Unsubscribe(notify)
+	notify2 := pool.Subscribe()
+	defer pool.Unsubscribe(notify2)
+
+	c.Check(pool.Unallocated()[type1], check.Equals, 0)
+	c.Check(pool.Unallocated()[type2], check.Equals, 0)
+	pool.Create(type2)
+	pool.Create(type1)
+	pool.Create(type2)
+	c.Check(pool.Unallocated()[type1], check.Equals, 1)
+	c.Check(pool.Unallocated()[type2], check.Equals, 2)
+	// Unblock the pending Create calls and (before calling Sync!)
+	// wait for the pool to process the returned instances.
+	go lameInstanceSet.Release(3)
+	suite.wait(c, pool, notify, func() bool {
+		list, err := lameInstanceSet.Instances(nil)
+		return err == nil && len(list) == 3
+	})
+
+	c.Check(pool.Unallocated()[type1], check.Equals, 1)
+	c.Check(pool.Unallocated()[type2], check.Equals, 2)
+	pool.getInstancesAndSync()
+	c.Check(pool.Unallocated()[type1], check.Equals, 1)
+	c.Check(pool.Unallocated()[type2], check.Equals, 2)
+
+	c.Check(pool.Shutdown(type2), check.Equals, true)
+	suite.wait(c, pool, notify, func() bool {
+		return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 1
+	})
+	c.Check(pool.Shutdown(type2), check.Equals, true)
+	suite.wait(c, pool, notify, func() bool {
+		return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 0
+	})
+	c.Check(pool.Shutdown(type2), check.Equals, false)
+	for {
+		// Consume any waiting notifications to ensure the
+		// next one we get is from Shutdown.
+		select {
+		case <-notify:
+			continue
+		default:
+		}
+		break
+	}
+	c.Check(pool.Shutdown(type1), check.Equals, true)
+	suite.wait(c, pool, notify, func() bool {
+		return pool.Unallocated()[type1] == 0 && pool.Unallocated()[type2] == 0
+	})
+	select {
+	case <-notify2:
+	case <-time.After(time.Second):
+		c.Error("notify did not receive")
+	}
+	go lameInstanceSet.Release(3) // unblock Destroy calls
+}
+
+func (suite *PoolSuite) wait(c *check.C, pool Pool, notify <-chan struct{}, ready func() bool) {
+	timeout := time.NewTimer(time.Second).C
+	for !ready() {
+		select {
+		case <-notify:
+			continue
+		case <-timeout:
+		}
+		break
+	}
+	c.Check(ready(), check.Equals, true)
+}
+
+type stubExecutor struct{}
+
+func (*stubExecutor) SetInstance(cloud.Instance) {}
+
+func (*stubExecutor) Execute(cmd string, stdin []byte) ([]byte, []byte, error) { return nil, nil, nil }
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
new file mode 100644
index 000000000..7828d4f69
--- /dev/null
+++ b/lib/dispatchcloud/worker/worker.go
@@ -0,0 +1,45 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+	"time"
+)
+
+// State indicates whether a worker is available to do work, and (if
+// not) whether/when it is expected to become ready.
+type State int
+
+const (
+	StateUnknown  State = iota // might be running a container already
+	StateBooting               // instance is booting
+	StateRunning               // instance is running
+	StateShutdown              // worker has stopped monitoring the instance
+	StateHold                  // running, but not available to run new containers
+)
+
+const (
+	// TODO: configurable
+	maxPingFailTime = 10 * time.Minute
+)
+
+var stateString = map[State]string{
+	StateUnknown:  "unknown",
+	StateBooting:  "booting",
+	StateRunning:  "running",
+	StateShutdown: "shutdown",
+	StateHold:     "hold",
+}
+
+// String implements fmt.Stringer.
+func (s State) String() string {
+	return stateString[s]
+}
+
+// MarshalText implements encoding.TextMarshaler so a JSON encoding of
+// map[State]anything uses the state's string representation.
+func (s State) MarshalText() ([]byte, error) {
+	return []byte(stateString[s]), nil
+}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index e2e9907d5..bfa86abf6 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -60,6 +60,8 @@ type Cluster struct {
 	ManagementToken    string
 	NodeProfiles       map[string]NodeProfile
 	InstanceTypes      InstanceTypeMap
+	CloudVMs           CloudVMs
+	Dispatch           Dispatch
 	HTTPRequestTimeout Duration
 	RemoteClusters     map[string]RemoteCluster
 	PostgreSQL         PostgreSQL
@@ -95,6 +97,50 @@ type InstanceType struct {
 	Preemptible  bool
 }
 
+type Dispatch struct {
+	// PEM encoded SSH key (RSA, DSA, or ECDSA) able to log in to
+	// cloud VMs.
+	PrivateKey []byte
+
+	// Max time for workers to come up before abandoning stale
+	// locks from previous run
+	StaleLockTimeout Duration
+
+	// Interval between queue polls
+	PollInterval Duration
+
+	// Interval between probes to each worker
+	ProbeInterval Duration
+
+	// Maximum total worker probes per second
+	MaxProbesPerSecond int
+}
+
+type CloudVMs struct {
+	// Shell command that exits zero IFF the VM is fully booted
+	// and ready to run containers, e.g., "mount | grep
+	// /encrypted-tmp"
+	BootProbeCommand string
+	SyncInterval     Duration
+
+	// Maximum idle time before automatic shutdown
+	TimeoutIdle Duration
+
+	// Maximum booting time before automatic shutdown
+	TimeoutBooting Duration
+
+	// Maximum time with no successful probes before automatic shutdown
+	TimeoutProbe Duration
+
+	// Time after shutdown to retry shutdown
+	TimeoutShutdown Duration
+
+	ImageID string
+
+	Driver           string
+	DriverParameters map[string]interface{}
+}
+
 type InstanceTypeMap map[string]InstanceType
 
 var errDuplicateInstanceTypeName = errors.New("duplicate instance type name")
@@ -159,45 +205,48 @@ func (cc *Cluster) GetNodeProfile(node string) (*NodeProfile, error) {
 }
 
 type NodeProfile struct {
-	Controller  SystemServiceInstance `json:"arvados-controller"`
-	Health      SystemServiceInstance `json:"arvados-health"`
-	Keepbalance SystemServiceInstance `json:"keep-balance"`
-	Keepproxy   SystemServiceInstance `json:"keepproxy"`
-	Keepstore   SystemServiceInstance `json:"keepstore"`
-	Keepweb     SystemServiceInstance `json:"keep-web"`
-	Nodemanager SystemServiceInstance `json:"arvados-node-manager"`
-	RailsAPI    SystemServiceInstance `json:"arvados-api-server"`
-	Websocket   SystemServiceInstance `json:"arvados-ws"`
-	Workbench   SystemServiceInstance `json:"arvados-workbench"`
+	Controller    SystemServiceInstance `json:"arvados-controller"`
+	Health        SystemServiceInstance `json:"arvados-health"`
+	Keepbalance   SystemServiceInstance `json:"keep-balance"`
+	Keepproxy     SystemServiceInstance `json:"keepproxy"`
+	Keepstore     SystemServiceInstance `json:"keepstore"`
+	Keepweb       SystemServiceInstance `json:"keep-web"`
+	Nodemanager   SystemServiceInstance `json:"arvados-node-manager"`
+	DispatchCloud SystemServiceInstance `json:"arvados-dispatch-cloud"`
+	RailsAPI      SystemServiceInstance `json:"arvados-api-server"`
+	Websocket     SystemServiceInstance `json:"arvados-ws"`
+	Workbench     SystemServiceInstance `json:"arvados-workbench"`
 }
 
 type ServiceName string
 
 const (
-	ServiceNameRailsAPI    ServiceName = "arvados-api-server"
-	ServiceNameController  ServiceName = "arvados-controller"
-	ServiceNameNodemanager ServiceName = "arvados-node-manager"
-	ServiceNameWorkbench   ServiceName = "arvados-workbench"
-	ServiceNameWebsocket   ServiceName = "arvados-ws"
-	ServiceNameKeepbalance ServiceName = "keep-balance"
-	ServiceNameKeepweb     ServiceName = "keep-web"
-	ServiceNameKeepproxy   ServiceName = "keepproxy"
-	ServiceNameKeepstore   ServiceName = "keepstore"
+	ServiceNameRailsAPI      ServiceName = "arvados-api-server"
+	ServiceNameController    ServiceName = "arvados-controller"
+	ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
+	ServiceNameNodemanager   ServiceName = "arvados-node-manager"
+	ServiceNameWorkbench     ServiceName = "arvados-workbench"
+	ServiceNameWebsocket     ServiceName = "arvados-ws"
+	ServiceNameKeepbalance   ServiceName = "keep-balance"
+	ServiceNameKeepweb       ServiceName = "keep-web"
+	ServiceNameKeepproxy     ServiceName = "keepproxy"
+	ServiceNameKeepstore     ServiceName = "keepstore"
 )
 
 // ServicePorts returns the configured listening address (or "" if
 // disabled) for each service on the node.
 func (np *NodeProfile) ServicePorts() map[ServiceName]string {
 	return map[ServiceName]string{
-		ServiceNameRailsAPI:    np.RailsAPI.Listen,
-		ServiceNameController:  np.Controller.Listen,
-		ServiceNameNodemanager: np.Nodemanager.Listen,
-		ServiceNameWorkbench:   np.Workbench.Listen,
-		ServiceNameWebsocket:   np.Websocket.Listen,
-		ServiceNameKeepbalance: np.Keepbalance.Listen,
-		ServiceNameKeepweb:     np.Keepweb.Listen,
-		ServiceNameKeepproxy:   np.Keepproxy.Listen,
-		ServiceNameKeepstore:   np.Keepstore.Listen,
+		ServiceNameRailsAPI:      np.RailsAPI.Listen,
+		ServiceNameController:    np.Controller.Listen,
+		ServiceNameDispatchCloud: np.DispatchCloud.Listen,
+		ServiceNameNodemanager:   np.Nodemanager.Listen,
+		ServiceNameWorkbench:     np.Workbench.Listen,
+		ServiceNameWebsocket:     np.Websocket.Listen,
+		ServiceNameKeepbalance:   np.Keepbalance.Listen,
+		ServiceNameKeepweb:       np.Keepweb.Listen,
+		ServiceNameKeepproxy:     np.Keepproxy.Listen,
+		ServiceNameKeepstore:     np.Keepstore.Listen,
 	}
 }
 
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go
index 2622c1370..def4e33cb 100644
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -18,10 +18,11 @@ type Container struct {
 	Mounts               map[string]Mount     `json:"mounts"`
 	Output               string               `json:"output"`
 	OutputPath           string               `json:"output_path"`
-	Priority             int                  `json:"priority"`
+	Priority             int64                `json:"priority"`
 	RuntimeConstraints   RuntimeConstraints   `json:"runtime_constraints"`
 	State                ContainerState       `json:"state"`
 	SchedulingParameters SchedulingParameters `json:"scheduling_parameters"`
+	ExitCode             int                  `json:"exit_code"`
 }
 
 // Container is an arvados#container resource.
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
new file mode 100644
index 000000000..3dbfcfcde
--- /dev/null
+++ b/services/crunch-run/background.go
@@ -0,0 +1,192 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"time"
+)
+
+var (
+	lockdir    = "/var/run"
+	lockprefix = "crunch-run-"
+	locksuffix = ".lock"
+)
+
+// procinfo is saved in each process's lockfile.
+type procinfo struct {
+	UUID   string
+	PID    int
+	Stdout string
+	Stderr string
+}
+
+// Detach acquires a lock for the given uuid, and starts the current
+// program as a child process (with -nodetach prepended to the given
+// arguments so the child knows not to detach again). The lock is
+// passed along to the child process.
+func Detach(uuid string, args []string, stdout, stderr io.Writer) int {
+	return exitcode(stderr, detach(uuid, args, stdout, stderr))
+}
+func detach(uuid string, args []string, stdout, stderr io.Writer) error {
+	lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+	if err != nil {
+		return err
+	}
+	defer lockfile.Close()
+	err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+	if err != nil {
+		return err
+	}
+	lockfile.Truncate(0)
+
+	outfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stdout-")
+	if err != nil {
+		return err
+	}
+	defer outfile.Close()
+	errfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stderr-")
+	if err != nil {
+		os.Remove(outfile.Name())
+		return err
+	}
+	defer errfile.Close()
+
+	cmd := exec.Command(args[0], append([]string{"-nodetach"}, args[1:]...)...)
+	cmd.Stdout = outfile
+	cmd.Stderr = errfile
+	cmd.ExtraFiles = []*os.File{lockfile}
+	err = cmd.Start()
+	if err != nil {
+		os.Remove(outfile.Name())
+		os.Remove(errfile.Name())
+		return err
+	}
+
+	w := io.MultiWriter(stdout, lockfile)
+	err = json.NewEncoder(w).Encode(procinfo{
+		PID:    cmd.Process.Pid,
+		Stdout: outfile.Name(),
+		Stderr: errfile.Name(),
+	})
+	if err != nil {
+		os.Remove(outfile.Name())
+		os.Remove(errfile.Name())
+		return err
+	}
+	return nil
+}
+
+// Kill finds the crunch-run process corresponding to the given uuid,
+// and sends the given signal to it. It then waits up to 1 second for
+// the process to die. It returns 0 if the process is successfully
+// killed or didn't exist in the first place.
+func Kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) int {
+	return exitcode(stderr, kill(uuid, signal, stdout, stderr))
+}
+
+func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
+	path := filepath.Join(lockdir, lockprefix+uuid+locksuffix)
+	f, err := os.Open(path)
+	if os.IsNotExist(err) {
+		return nil
+	} else if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	var pi procinfo
+	err = json.NewDecoder(f).Decode(&pi)
+	if err != nil {
+		return fmt.Errorf("%s: %s\n", path, err)
+	}
+
+	if pi.UUID != uuid || pi.PID == 0 {
+		return fmt.Errorf("%s: bogus procinfo: %+v", path, pi)
+	}
+
+	proc, err := os.FindProcess(pi.PID)
+	if err != nil {
+		return err
+	}
+
+	err = proc.Signal(signal)
+	for deadline := time.Now().Add(time.Second); err == nil && time.Now().Before(deadline); time.Sleep(time.Second / 100) {
+		err = proc.Signal(syscall.Signal(0))
+	}
+	if err == nil {
+		return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal)
+	}
+	fmt.Fprintln(stderr, "pid %d: %s", pi.PID, err)
+	return nil
+}
+
+// List UUIDs of active crunch-run processes.
+func List(stdout, stderr io.Writer) int {
+	return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error {
+		if info.IsDir() {
+			return filepath.SkipDir
+		}
+		if name := info.Name(); !strings.HasPrefix(name, lockprefix) || !strings.HasSuffix(name, locksuffix) {
+			return nil
+		}
+		if info.Size() == 0 {
+			// race: process has opened/locked but hasn't yet written pid/uuid
+			return nil
+		}
+
+		f, err := os.Open(path)
+		if err != nil {
+			return nil
+		}
+		defer f.Close()
+
+		// TODO: Do this check without risk of disrupting lock
+		// acquisition during races, e.g., by connecting to a
+		// unix socket or checking /proc/$pid/fd/$n ->
+		// lockfile.
+		err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
+		if err == nil {
+			// lockfile is stale
+			err := os.Remove(path)
+			if err != nil {
+				fmt.Fprintln(stderr, err)
+			}
+			return nil
+		}
+
+		var pi procinfo
+		err = json.NewDecoder(f).Decode(&pi)
+		if err != nil {
+			fmt.Fprintf(stderr, "%s: %s\n", path, err)
+			return nil
+		}
+		if pi.UUID == "" || pi.PID == 0 {
+			fmt.Fprintf(stderr, "%s: bogus procinfo: %+v", path, pi)
+			return nil
+		}
+
+		fmt.Fprintln(stdout, pi.UUID)
+		return nil
+	}))
+}
+
+// If err is nil, return 0 ("success"); otherwise, print err to stderr
+// and return 1.
+func exitcode(stderr io.Writer, err error) int {
+	if err != nil {
+		fmt.Fprintln(stderr, err)
+		return 1
+	}
+	return 0
+}
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 44560b80a..74ab77ab0 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -1716,6 +1716,10 @@ func main() {
 	cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
 	cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
 	caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
+	detach := flag.Bool("detach", false, "Detach from parent process and run in the background")
+	sleep := flag.Duration("sleep", 0, "Delay before starting (testing use only)")
+	kill := flag.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID")
+	list := flag.Bool("list", false, "List UUIDs of existing crunch-run processes")
 	enableNetwork := flag.String("container-enable-networking", "default",
 		`Specify if networking should be enabled for container.  One of 'default', 'always':
     	default: only enable networking if container requests it.
@@ -1727,8 +1731,29 @@ func main() {
 	memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
 	getVersion := flag.Bool("version", false, "Print version information and exit.")
 	checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
+
+	detached := false
+	if len(os.Args) > 1 && os.Args[1] == "-detached" {
+		// This process was invoked by a parent process, which
+		// has passed along its own arguments, including
+		// -detach, after the leading -detached flag.  Strip
+		// the leading -detached flag (it's not recognized by
+		// flag.Parse()) ... and remember not to detach all
+		// over again in this process.
+		os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
+		detached = true
+	}
 	flag.Parse()
 
+	switch {
+	case *detach && !detached:
+		os.Exit(Detach(flag.Arg(0), os.Args, os.Stdout, os.Stderr))
+	case *kill >= 0:
+		os.Exit(Kill(flag.Arg(0), syscall.Signal(*kill), os.Stdout, os.Stderr))
+	case *list:
+		os.Exit(List(os.Stdout, os.Stderr))
+	}
+
 	// Print version information if requested
 	if *getVersion {
 		fmt.Printf("crunch-run %s\n", version)
@@ -1736,6 +1761,7 @@ func main() {
 	}
 
 	log.Printf("crunch-run %s started", version)
+	time.Sleep(*sleep)
 
 	containerId := flag.Arg(0)
 
diff --git a/vendor/vendor.json b/vendor/vendor.json
index aa6b2d773..aee25beab 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -349,6 +349,12 @@
 			"revisionTime": "2016-12-03T19:45:07Z"
 		},
 		{
+			"checksumSHA1": "ewGq4nGalpCQOHcmBTdAEQx1wW0=",
+			"path": "github.com/mitchellh/mapstructure",
+			"revision": "bb74f1db0675b241733089d5a1faa5dd8b0ef57b",
+			"revisionTime": "2018-05-11T14:21:26Z"
+		},
+		{
 			"checksumSHA1": "OFNit1Qx2DdWhotfREKodDNUwCM=",
 			"path": "github.com/opencontainers/go-digest",
 			"revision": "279bed98673dd5bef374d3b6e4b09e2af76183bf",

-----------------------------------------------------------------------


hooks/post-receive
--