[arvados] created: 2.7.0-6003-g84e7003ad2

git repository hosting git at public.arvados.org
Tue Feb 13 19:06:46 UTC 2024


        at  84e7003ad2adee4a580bf3e40eed122ddf85ea8f (commit)


commit 84e7003ad2adee4a580bf3e40eed122ddf85ea8f
Author: Tom Clegg <tom at curii.com>
Date:   Tue Feb 13 12:09:54 2024 -0500

    2960: Fix some needlessly exported identifiers.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go
index bdd669bb46..31660614f3 100644
--- a/services/keepstore/azure_blob_volume.go
+++ b/services/keepstore/azure_blob_volume.go
@@ -31,7 +31,7 @@ func init() {
 }
 
 func newAzureBlobVolume(params newVolumeParams) (volume, error) {
-	v := &AzureBlobVolume{
+	v := &azureBlobVolume{
 		RequestTimeout:    azureDefaultRequestTimeout,
 		WriteRaceInterval: azureDefaultWriteRaceInterval,
 		WriteRacePollTime: azureDefaultWriteRacePollTime,
@@ -79,7 +79,7 @@ func newAzureBlobVolume(params newVolumeParams) (volume, error) {
 	return v, v.check()
 }
 
-func (v *AzureBlobVolume) check() error {
+func (v *azureBlobVolume) check() error {
 	lbls := prometheus.Labels{"device_id": v.DeviceID()}
 	v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
 	return nil
@@ -93,9 +93,9 @@ const (
 	azureDefaultWriteRacePollTime    = arvados.Duration(time.Second)
 )
 
-// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
+// An azureBlobVolume stores and retrieves blocks in an Azure Blob
 // container.
-type AzureBlobVolume struct {
+type azureBlobVolume struct {
 	StorageAccountName   string
 	StorageAccountKey    string
 	StorageBaseURL       string // "" means default, "core.windows.net"
@@ -125,12 +125,12 @@ func (*singleSender) Send(c *storage.Client, req *http.Request) (resp *http.Resp
 }
 
 // DeviceID returns a globally unique ID for the storage container.
-func (v *AzureBlobVolume) DeviceID() string {
+func (v *azureBlobVolume) DeviceID() string {
 	return "azure://" + v.StorageBaseURL + "/" + v.StorageAccountName + "/" + v.ContainerName
 }
 
 // Return true if expires_at metadata attribute is found on the block
-func (v *AzureBlobVolume) checkTrashed(loc string) (bool, map[string]string, error) {
+func (v *azureBlobVolume) checkTrashed(loc string) (bool, map[string]string, error) {
 	metadata, err := v.container.GetBlobMetadata(loc)
 	if err != nil {
 		return false, metadata, v.translateError(err)
@@ -147,7 +147,7 @@ func (v *AzureBlobVolume) checkTrashed(loc string) (bool, map[string]string, err
 // If the block is younger than azureWriteRaceInterval and is
 // unexpectedly empty, assume a BlockWrite operation is in progress,
 // and wait for it to finish writing.
-func (v *AzureBlobVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
+func (v *azureBlobVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
 	trashed, _, err := v.checkTrashed(hash)
 	if err != nil {
 		return 0, err
@@ -201,7 +201,7 @@ func (v *AzureBlobVolume) BlockRead(ctx context.Context, hash string, writeTo io
 	return streamer.Wrote(), err
 }
 
-func (v *AzureBlobVolume) get(ctx context.Context, hash string, dst io.WriterAt) (int, error) {
+func (v *azureBlobVolume) get(ctx context.Context, hash string, dst io.WriterAt) (int, error) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()
 
@@ -318,7 +318,7 @@ func (v *AzureBlobVolume) get(ctx context.Context, hash string, dst io.WriterAt)
 
 // BlockWrite stores a block on the volume. If it already exists, its
 // timestamp is updated.
-func (v *AzureBlobVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
+func (v *azureBlobVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
 	// Send the block data through a pipe, so that (if we need to)
 	// we can close the pipe early and abandon our
 	// CreateBlockBlobFromReader() goroutine, without worrying
@@ -359,7 +359,7 @@ func (v *AzureBlobVolume) BlockWrite(ctx context.Context, hash string, data []by
 }
 
 // BlockTouch updates the last-modified property of a block blob.
-func (v *AzureBlobVolume) BlockTouch(hash string) error {
+func (v *azureBlobVolume) BlockTouch(hash string) error {
 	trashed, metadata, err := v.checkTrashed(hash)
 	if err != nil {
 		return err
@@ -373,7 +373,7 @@ func (v *AzureBlobVolume) BlockTouch(hash string) error {
 }
 
 // Mtime returns the last-modified property of a block blob.
-func (v *AzureBlobVolume) Mtime(hash string) (time.Time, error) {
+func (v *azureBlobVolume) Mtime(hash string) (time.Time, error) {
 	trashed, _, err := v.checkTrashed(hash)
 	if err != nil {
 		return time.Time{}, err
@@ -391,7 +391,7 @@ func (v *AzureBlobVolume) Mtime(hash string) (time.Time, error) {
 
 // Index writes a list of Keep blocks that are stored in the
 // container.
-func (v *AzureBlobVolume) Index(ctx context.Context, prefix string, writer io.Writer) error {
+func (v *azureBlobVolume) Index(ctx context.Context, prefix string, writer io.Writer) error {
 	params := storage.ListBlobsParameters{
 		Prefix:  prefix,
 		Include: &storage.IncludeBlobDataset{Metadata: true},
@@ -432,7 +432,7 @@ func (v *AzureBlobVolume) Index(ctx context.Context, prefix string, writer io.Wr
 }
 
 // call v.container.ListBlobs, retrying if needed.
-func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters) (resp storage.BlobListResponse, err error) {
+func (v *azureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters) (resp storage.BlobListResponse, err error) {
 	for i := 0; i < v.ListBlobsMaxAttempts; i++ {
 		resp, err = v.container.ListBlobs(params)
 		err = v.translateError(err)
@@ -448,7 +448,7 @@ func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters
 }
 
 // Trash a Keep block.
-func (v *AzureBlobVolume) BlockTrash(loc string) error {
+func (v *azureBlobVolume) BlockTrash(loc string) error {
 	// Ideally we would use If-Unmodified-Since, but that
 	// particular condition seems to be ignored by Azure. Instead,
 	// we get the Etag before checking Mtime, and use If-Match to
@@ -481,7 +481,7 @@ func (v *AzureBlobVolume) BlockTrash(loc string) error {
 
 // BlockUntrash deletes the expires_at metadata attribute for the
 // specified block blob.
-func (v *AzureBlobVolume) BlockUntrash(hash string) error {
+func (v *azureBlobVolume) BlockUntrash(hash string) error {
 	// if expires_at does not exist, return NotFoundError
 	metadata, err := v.container.GetBlobMetadata(hash)
 	if err != nil {
@@ -499,7 +499,7 @@ func (v *AzureBlobVolume) BlockUntrash(hash string) error {
 
 // If possible, translate an Azure SDK error to a recognizable error
 // like os.ErrNotExist.
-func (v *AzureBlobVolume) translateError(err error) error {
+func (v *azureBlobVolume) translateError(err error) error {
 	switch {
 	case err == nil:
 		return err
@@ -519,13 +519,13 @@ func (v *AzureBlobVolume) translateError(err error) error {
 
 var keepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
-func (v *AzureBlobVolume) isKeepBlock(s string) bool {
+func (v *azureBlobVolume) isKeepBlock(s string) bool {
 	return keepBlockRegexp.MatchString(s)
 }
 
 // EmptyTrash looks for trashed blocks that exceeded BlobTrashLifetime
 // and deletes them from the volume.
-func (v *AzureBlobVolume) EmptyTrash() {
+func (v *azureBlobVolume) EmptyTrash() {
 	var bytesDeleted, bytesInTrash int64
 	var blocksDeleted, blocksInTrash int64
 
@@ -593,7 +593,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
 }
 
 // InternalStats returns bucket I/O and API call counters.
-func (v *AzureBlobVolume) InternalStats() interface{} {
+func (v *azureBlobVolume) InternalStats() interface{} {
 	return &v.container.stats
 }
 
diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go
index a543dfc245..c629c9dc15 100644
--- a/services/keepstore/azure_blob_volume_test.go
+++ b/services/keepstore/azure_blob_volume_test.go
@@ -222,7 +222,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
 		rw.WriteHeader(http.StatusCreated)
 	case r.Method == "PUT" && r.Form.Get("comp") == "metadata":
 		// "Set Metadata Headers" API. We don't bother
-		// stubbing "Get Metadata Headers": AzureBlobVolume
+		// stubbing "Get Metadata Headers": azureBlobVolume
 		// sets metadata headers only as a way to bump Etag
 		// and Last-Modified.
 		if !blobExists {
@@ -367,7 +367,7 @@ func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
 }
 
 type testableAzureBlobVolume struct {
-	*AzureBlobVolume
+	*azureBlobVolume
 	azHandler *azStubHandler
 	azStub    *httptest.Server
 	t         TB
@@ -397,7 +397,7 @@ func (s *stubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, params newVolum
 	azClient.Sender = &singleSender{}
 
 	bs := azClient.GetBlobService()
-	v := &AzureBlobVolume{
+	v := &azureBlobVolume{
 		ContainerName:        container,
 		WriteRaceInterval:    arvados.Duration(time.Millisecond),
 		WriteRacePollTime:    arvados.Duration(time.Nanosecond),
@@ -416,7 +416,7 @@ func (s *stubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, params newVolum
 	}
 
 	return &testableAzureBlobVolume{
-		AzureBlobVolume: v,
+		azureBlobVolume: v,
 		azHandler:       azHandler,
 		azStub:          azStub,
 		t:               t,
@@ -553,8 +553,8 @@ func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRaceDeadline(c *che
 		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
 		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
 	})
-	v.AzureBlobVolume.WriteRaceInterval.Set("2s")
-	v.AzureBlobVolume.WriteRacePollTime.Set("5ms")
+	v.azureBlobVolume.WriteRaceInterval.Set("2s")
+	v.azureBlobVolume.WriteRacePollTime.Set("5ms")
 	defer v.Teardown()
 
 	v.BlockWriteRaw(TestHash, nil)
diff --git a/services/keepstore/keepstore.go b/services/keepstore/keepstore.go
index 89afa9089f..62b6d15e56 100644
--- a/services/keepstore/keepstore.go
+++ b/services/keepstore/keepstore.go
@@ -2,6 +2,12 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
+// Package keepstore implements the keepstore service component and
+// back-end storage drivers.
+//
+// It is an internal module, only intended to be imported by
+// /cmd/arvados-server and other server-side components in this
+// repository.
 package keepstore
 
 import (
@@ -47,7 +53,7 @@ var (
 	driver               = make(map[string]volumeDriver)
 )
 
-type IndexOptions struct {
+type indexOptions struct {
 	MountUUID string
 	Prefix    string
 	WriteTo   io.Writer
@@ -653,7 +659,7 @@ func (ks *keepstore) Mounts() []*mount {
 	return ks.mountsR
 }
 
-func (ks *keepstore) Index(ctx context.Context, opts IndexOptions) error {
+func (ks *keepstore) Index(ctx context.Context, opts indexOptions) error {
 	mounts := ks.mountsR
 	if opts.MountUUID != "" {
 		mnt, ok := ks.mounts[opts.MountUUID]
diff --git a/services/keepstore/router.go b/services/keepstore/router.go
index 7ff82aa80f..256bc18c26 100644
--- a/services/keepstore/router.go
+++ b/services/keepstore/router.go
@@ -155,7 +155,7 @@ func (rtr *router) handleIndex(w http.ResponseWriter, req *http.Request) {
 		prefix = mux.Vars(req)["prefix"]
 	}
 	cw := &countingWriter{writer: w}
-	err := rtr.keepstore.Index(req.Context(), IndexOptions{
+	err := rtr.keepstore.Index(req.Context(), indexOptions{
 		MountUUID: mux.Vars(req)["uuid"],
 		Prefix:    prefix,
 		WriteTo:   cw,
diff --git a/services/keepstore/router_test.go b/services/keepstore/router_test.go
index a729ee0df3..f4bcdd4ae4 100644
--- a/services/keepstore/router_test.go
+++ b/services/keepstore/router_test.go
@@ -302,8 +302,8 @@ func (s *routerSuite) TestBadRequest(c *C) {
 		"GET /mounts/blocks/123",
 		"GET /trash",
 		"GET /pull",
-		"GET /debug.json",
-		"GET /status.json",
+		"GET /debug.json",  // old endpoint, no longer exists
+		"GET /status.json", // old endpoint, no longer exists
 		"POST /",
 		"POST /aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
 		"POST /trash",
diff --git a/services/keepstore/s3aws_volume.go b/services/keepstore/s3aws_volume.go
index 2417bb8149..8e93eed12c 100644
--- a/services/keepstore/s3aws_volume.go
+++ b/services/keepstore/s3aws_volume.go
@@ -34,7 +34,7 @@ import (
 )
 
 func init() {
-	driver["S3"] = newS3AWSVolume
+	driver["S3"] = news3Volume
 }
 
 const (
@@ -49,11 +49,13 @@ const (
 )
 
 var (
-	ErrS3TrashDisabled = fmt.Errorf("trash function is disabled because Collections.BlobTrashLifetime=0 and DriverParameters.UnsafeDelete=false")
+	errS3TrashDisabled   = fmt.Errorf("trash function is disabled because Collections.BlobTrashLifetime=0 and DriverParameters.UnsafeDelete=false")
+	s3AWSKeepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
+	s3AWSZeroTime        time.Time
 )
 
-// S3AWSVolume implements Volume using an S3 bucket.
-type S3AWSVolume struct {
+// s3Volume implements Volume using an S3 bucket.
+type s3Volume struct {
 	arvados.S3VolumeDriverParameters
 	AuthToken      string    // populated automatically when IAMRole is used
 	AuthExpiration time.Time // populated automatically when IAMRole is used
@@ -78,12 +80,7 @@ type s3AWSbucket struct {
 	mu     sync.Mutex
 }
 
-const ()
-
-var s3AWSKeepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
-var s3AWSZeroTime time.Time
-
-func (v *S3AWSVolume) isKeepBlock(s string) (string, bool) {
+func (v *s3Volume) isKeepBlock(s string) (string, bool) {
 	if v.PrefixLength > 0 && len(s) == v.PrefixLength+33 && s[:v.PrefixLength] == s[v.PrefixLength+1:v.PrefixLength*2+1] {
 		s = s[v.PrefixLength+1:]
 	}
@@ -93,7 +90,7 @@ func (v *S3AWSVolume) isKeepBlock(s string) (string, bool) {
 // Return the key used for a given loc. If PrefixLength==0 then
 // key("abcdef0123") is "abcdef0123", if PrefixLength==3 then key is
 // "abc/abcdef0123", etc.
-func (v *S3AWSVolume) key(loc string) string {
+func (v *s3Volume) key(loc string) string {
 	if v.PrefixLength > 0 && v.PrefixLength < len(loc)-1 {
 		return loc[:v.PrefixLength] + "/" + loc
 	} else {
@@ -101,8 +98,8 @@ func (v *S3AWSVolume) key(loc string) string {
 	}
 }
 
-func newS3AWSVolume(params newVolumeParams) (volume, error) {
-	v := &S3AWSVolume{
+func news3Volume(params newVolumeParams) (volume, error) {
+	v := &s3Volume{
 		cluster:    params.Cluster,
 		volume:     params.ConfigVolume,
 		metrics:    params.MetricsVecs,
@@ -116,7 +113,7 @@ func newS3AWSVolume(params newVolumeParams) (volume, error) {
 	return v, v.check("")
 }
 
-func (v *S3AWSVolume) translateError(err error) error {
+func (v *s3Volume) translateError(err error) error {
 	if _, ok := err.(*aws.RequestCanceledError); ok {
 		return context.Canceled
 	} else if aerr, ok := err.(awserr.Error); ok {
@@ -135,7 +132,7 @@ func (v *S3AWSVolume) translateError(err error) error {
 //
 // (If something goes wrong during the copy, the error will be
 // embedded in the 200 OK response)
-func (v *S3AWSVolume) safeCopy(dst, src string) error {
+func (v *s3Volume) safeCopy(dst, src string) error {
 	input := &s3.CopyObjectInput{
 		Bucket:      aws.String(v.bucket.bucket),
 		ContentType: aws.String("application/octet-stream"),
@@ -161,7 +158,7 @@ func (v *S3AWSVolume) safeCopy(dst, src string) error {
 	return nil
 }
 
-func (v *S3AWSVolume) check(ec2metadataHostname string) error {
+func (v *s3Volume) check(ec2metadataHostname string) error {
 	if v.Bucket == "" {
 		return errors.New("DriverParameters: Bucket must be provided")
 	}
@@ -238,13 +235,13 @@ func (v *S3AWSVolume) check(ec2metadataHostname string) error {
 }
 
 // DeviceID returns a globally unique ID for the storage bucket.
-func (v *S3AWSVolume) DeviceID() string {
+func (v *s3Volume) DeviceID() string {
 	return "s3://" + v.Endpoint + "/" + v.Bucket
 }
 
 // EmptyTrash looks for trashed blocks that exceeded BlobTrashLifetime
 // and deletes them from the volume.
-func (v *S3AWSVolume) EmptyTrash() {
+func (v *s3Volume) EmptyTrash() {
 	var bytesInTrash, blocksInTrash, bytesDeleted, blocksDeleted int64
 
 	// Define "ready to delete" as "...when EmptyTrash started".
@@ -358,7 +355,7 @@ func (v *S3AWSVolume) EmptyTrash() {
 // exist. If the timestamps on "recent/X" and "trash/X" indicate there
 // was a race between Put and Trash, fixRace recovers from the race by
 // Untrashing the block.
-func (v *S3AWSVolume) fixRace(key string) bool {
+func (v *s3Volume) fixRace(key string) bool {
 	trash, err := v.head("trash/" + key)
 	if err != nil {
 		if !os.IsNotExist(v.translateError(err)) {
@@ -392,7 +389,7 @@ func (v *S3AWSVolume) fixRace(key string) bool {
 	return true
 }
 
-func (v *S3AWSVolume) head(key string) (result *s3.HeadObjectOutput, err error) {
+func (v *s3Volume) head(key string) (result *s3.HeadObjectOutput, err error) {
 	input := &s3.HeadObjectInput{
 		Bucket: aws.String(v.bucket.bucket),
 		Key:    aws.String(key),
@@ -414,7 +411,7 @@ func (v *S3AWSVolume) head(key string) (result *s3.HeadObjectOutput, err error)
 
 // BlockRead reads a Keep block that has been stored as a block blob
 // in the S3 bucket.
-func (v *S3AWSVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
+func (v *s3Volume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
 	key := v.key(hash)
 	buf, err := v.bufferPool.GetContext(ctx)
 	if err != nil {
@@ -460,7 +457,7 @@ func (v *S3AWSVolume) BlockRead(ctx context.Context, hash string, writeTo io.Wri
 	return streamer.Wrote(), nil
 }
 
-func (v *S3AWSVolume) readWorker(ctx context.Context, key string, dst io.WriterAt) error {
+func (v *s3Volume) readWorker(ctx context.Context, key string, dst io.WriterAt) error {
 	downloader := s3manager.NewDownloaderWithClient(v.bucket.svc, func(u *s3manager.Downloader) {
 		u.PartSize = s3downloaderPartSize
 		u.Concurrency = s3downloaderReadConcurrency
@@ -476,7 +473,7 @@ func (v *S3AWSVolume) readWorker(ctx context.Context, key string, dst io.WriterA
 	return v.translateError(err)
 }
 
-func (v *S3AWSVolume) writeObject(ctx context.Context, key string, r io.Reader) error {
+func (v *s3Volume) writeObject(ctx context.Context, key string, r io.Reader) error {
 	if r == nil {
 		// r == nil leads to a memory violation in func readFillBuf in
 		// aws-sdk-go-v2 at v0.23.0/service/s3/s3manager/upload.go
@@ -526,7 +523,7 @@ func (v *S3AWSVolume) writeObject(ctx context.Context, key string, r io.Reader)
 }
 
 // Put writes a block.
-func (v *S3AWSVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
+func (v *s3Volume) BlockWrite(ctx context.Context, hash string, data []byte) error {
 	// Do not use putWithPipe here; we want to pass an io.ReadSeeker to the S3
 	// sdk to avoid memory allocation there. See #17339 for more information.
 	rdr := bytes.NewReader(data)
@@ -628,7 +625,7 @@ func (lister *s3awsLister) pop() (k *s3.Object) {
 
 // Index writes a complete list of locators with the given prefix
 // for which Get() can retrieve data.
-func (v *S3AWSVolume) Index(ctx context.Context, prefix string, writer io.Writer) error {
+func (v *s3Volume) Index(ctx context.Context, prefix string, writer io.Writer) error {
 	prefix = v.key(prefix)
 	// Use a merge sort to find matching sets of X and recent/X.
 	dataL := s3awsLister{
@@ -695,7 +692,7 @@ func (v *S3AWSVolume) Index(ctx context.Context, prefix string, writer io.Writer
 }
 
 // Mtime returns the stored timestamp for the given locator.
-func (v *S3AWSVolume) Mtime(loc string) (time.Time, error) {
+func (v *s3Volume) Mtime(loc string) (time.Time, error) {
 	key := v.key(loc)
 	_, err := v.head(key)
 	if err != nil {
@@ -724,12 +721,12 @@ func (v *S3AWSVolume) Mtime(loc string) (time.Time, error) {
 }
 
 // InternalStats returns bucket I/O and API call counters.
-func (v *S3AWSVolume) InternalStats() interface{} {
+func (v *s3Volume) InternalStats() interface{} {
 	return &v.bucket.stats
 }
 
 // BlockTouch sets the timestamp for the given locator to the current time.
-func (v *S3AWSVolume) BlockTouch(hash string) error {
+func (v *s3Volume) BlockTouch(hash string) error {
 	key := v.key(hash)
 	_, err := v.head(key)
 	err = v.translateError(err)
@@ -745,7 +742,7 @@ func (v *S3AWSVolume) BlockTouch(hash string) error {
 
 // checkRaceWindow returns a non-nil error if trash/key is, or might
 // be, in the race window (i.e., it's not safe to trash key).
-func (v *S3AWSVolume) checkRaceWindow(key string) error {
+func (v *s3Volume) checkRaceWindow(key string) error {
 	resp, err := v.head("trash/" + key)
 	err = v.translateError(err)
 	if os.IsNotExist(err) {
@@ -785,7 +782,7 @@ func (b *s3AWSbucket) Del(path string) error {
 }
 
 // Trash a Keep block.
-func (v *S3AWSVolume) BlockTrash(loc string) error {
+func (v *s3Volume) BlockTrash(loc string) error {
 	if t, err := v.Mtime(loc); err != nil {
 		return err
 	} else if time.Since(t) < v.cluster.Collections.BlobSigningTTL.Duration() {
@@ -794,7 +791,7 @@ func (v *S3AWSVolume) BlockTrash(loc string) error {
 	key := v.key(loc)
 	if v.cluster.Collections.BlobTrashLifetime == 0 {
 		if !v.UnsafeDelete {
-			return ErrS3TrashDisabled
+			return errS3TrashDisabled
 		}
 		return v.translateError(v.bucket.Del(key))
 	}
@@ -810,7 +807,7 @@ func (v *S3AWSVolume) BlockTrash(loc string) error {
 }
 
 // BlockUntrash moves block from trash back into store
-func (v *S3AWSVolume) BlockUntrash(hash string) error {
+func (v *s3Volume) BlockUntrash(hash string) error {
 	key := v.key(hash)
 	err := v.safeCopy(key, "trash/"+key)
 	if err != nil {
diff --git a/services/keepstore/s3aws_volume_test.go b/services/keepstore/s3aws_volume_test.go
index f05cbee848..d9dcbc52d6 100644
--- a/services/keepstore/s3aws_volume_test.go
+++ b/services/keepstore/s3aws_volume_test.go
@@ -58,7 +58,7 @@ type StubbedS3AWSSuite struct {
 	s3server *httptest.Server
 	metadata *httptest.Server
 	cluster  *arvados.Cluster
-	volumes  []*TestableS3AWSVolume
+	volumes  []*testableS3Volume
 }
 
 func (s *StubbedS3AWSSuite) SetUpTest(c *check.C) {
@@ -102,7 +102,8 @@ func (s *StubbedS3AWSSuite) TestIndex(c *check.C) {
 	}, 0)
 	v.IndexPageSize = 3
 	for i := 0; i < 256; i++ {
-		v.PutRaw(fmt.Sprintf("%02x%030x", i, i), []byte{102, 111, 111})
+		err := v.blockWriteWithoutMD5Check(fmt.Sprintf("%02x%030x", i, i), []byte{102, 111, 111})
+		c.Assert(err, check.IsNil)
 	}
 	for _, spec := range []struct {
 		prefix      string
@@ -132,7 +133,7 @@ func (s *StubbedS3AWSSuite) TestSignature(c *check.C) {
 
 	// The aws-sdk-go-v2 driver only supports S3 V4 signatures. S3 v2 signatures are being phased out
 	// as of June 24, 2020. Cf. https://forums.aws.amazon.com/ann.jspa?annID=5816
-	vol := S3AWSVolume{
+	vol := s3Volume{
 		S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
 			AccessKeyID:     "xxx",
 			SecretAccessKey: "xxx",
@@ -165,7 +166,7 @@ func (s *StubbedS3AWSSuite) TestIAMRoleCredentials(c *check.C) {
 	}))
 	defer s.metadata.Close()
 
-	v := &S3AWSVolume{
+	v := &s3Volume{
 		S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
 			IAMRole:  s.metadata.URL + "/latest/api/token",
 			Endpoint: "http://localhost:12345",
@@ -186,7 +187,7 @@ func (s *StubbedS3AWSSuite) TestIAMRoleCredentials(c *check.C) {
 	s.metadata = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNotFound)
 	}))
-	deadv := &S3AWSVolume{
+	deadv := &s3Volume{
 		S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
 			IAMRole:  s.metadata.URL + "/fake-metadata/test-role",
 			Endpoint: "http://localhost:12345",
@@ -259,19 +260,19 @@ func (h *s3AWSBlockingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
 }
 
 func (s *StubbedS3AWSSuite) TestGetContextCancel(c *check.C) {
-	s.testContextCancel(c, func(ctx context.Context, v *TestableS3AWSVolume) error {
+	s.testContextCancel(c, func(ctx context.Context, v *testableS3Volume) error {
 		_, err := v.BlockRead(ctx, fooHash, io.Discard)
 		return err
 	})
 }
 
 func (s *StubbedS3AWSSuite) TestPutContextCancel(c *check.C) {
-	s.testContextCancel(c, func(ctx context.Context, v *TestableS3AWSVolume) error {
+	s.testContextCancel(c, func(ctx context.Context, v *testableS3Volume) error {
 		return v.BlockWrite(ctx, fooHash, []byte("foo"))
 	})
 }
 
-func (s *StubbedS3AWSSuite) testContextCancel(c *check.C, testFunc func(context.Context, *TestableS3AWSVolume) error) {
+func (s *StubbedS3AWSSuite) testContextCancel(c *check.C, testFunc func(context.Context, *testableS3Volume) error) {
 	handler := &s3AWSBlockingHandler{}
 	s.s3server = httptest.NewServer(handler)
 	defer s.s3server.Close()
@@ -534,8 +535,8 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 	}
 }
 
-type TestableS3AWSVolume struct {
-	*S3AWSVolume
+type testableS3Volume struct {
+	*s3Volume
 	server      *httptest.Server
 	c           *check.C
 	serverClock *s3AWSFakeClock
@@ -558,7 +559,7 @@ func (l LogrusLog) Print(level gofakes3.LogLevel, v ...interface{}) {
 	}
 }
 
-func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, params newVolumeParams, raceWindow time.Duration) *TestableS3AWSVolume {
+func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, params newVolumeParams, raceWindow time.Duration) *testableS3Volume {
 
 	clock := &s3AWSFakeClock{}
 	// fake s3
@@ -581,8 +582,8 @@ func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, params newVolumeParams
 		iamRole, accessKey, secretKey = s.metadata.URL+"/fake-metadata/test-role", "", ""
 	}
 
-	v := &TestableS3AWSVolume{
-		S3AWSVolume: &S3AWSVolume{
+	v := &testableS3Volume{
+		s3Volume: &s3Volume{
 			S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
 				IAMRole:            iamRole,
 				AccessKeyID:        accessKey,
@@ -604,24 +605,23 @@ func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, params newVolumeParams
 		server:      srv,
 		serverClock: clock,
 	}
-	c.Assert(v.S3AWSVolume.check(""), check.IsNil)
+	c.Assert(v.s3Volume.check(""), check.IsNil)
 	// Our test S3 server uses the older 'Path Style'
-	v.S3AWSVolume.bucket.svc.ForcePathStyle = true
+	v.s3Volume.bucket.svc.ForcePathStyle = true
 	// Create the testbucket
 	input := &s3.CreateBucketInput{
 		Bucket: aws.String(S3AWSTestBucketName),
 	}
-	req := v.S3AWSVolume.bucket.svc.CreateBucketRequest(input)
+	req := v.s3Volume.bucket.svc.CreateBucketRequest(input)
 	_, err := req.Send(context.Background())
 	c.Assert(err, check.IsNil)
 	// We couldn't set RaceWindow until now because check()
 	// rejects negative values.
-	v.S3AWSVolume.RaceWindow = arvados.Duration(raceWindow)
+	v.s3Volume.RaceWindow = arvados.Duration(raceWindow)
 	return v
 }
 
-// PutRaw skips the ContentMD5 test
-func (v *TestableS3AWSVolume) PutRaw(loc string, block []byte) {
+func (v *testableS3Volume) blockWriteWithoutMD5Check(loc string, block []byte) error {
 	key := v.key(loc)
 	r := newCountingReader(bytes.NewReader(block), v.bucket.stats.TickOutBytes)
 
@@ -636,7 +636,7 @@ func (v *TestableS3AWSVolume) PutRaw(loc string, block []byte) {
 		Body:   r,
 	})
 	if err != nil {
-		v.logger.Printf("PutRaw: %s: %+v", key, err)
+		return err
 	}
 
 	empty := bytes.NewReader([]byte{})
@@ -645,15 +645,13 @@ func (v *TestableS3AWSVolume) PutRaw(loc string, block []byte) {
 		Key:    aws.String("recent/" + key),
 		Body:   empty,
 	})
-	if err != nil {
-		v.logger.Printf("PutRaw: recent/%s: %+v", key, err)
-	}
+	return err
 }
 
 // TouchWithDate turns back the clock while doing a Touch(). We assume
 // there are no other operations happening on the same s3test server
 // while we do this.
-func (v *TestableS3AWSVolume) TouchWithDate(loc string, lastPut time.Time) {
+func (v *testableS3Volume) TouchWithDate(loc string, lastPut time.Time) {
 	v.serverClock.now = &lastPut
 
 	uploader := s3manager.NewUploaderWithClient(v.bucket.svc)
@@ -670,10 +668,10 @@ func (v *TestableS3AWSVolume) TouchWithDate(loc string, lastPut time.Time) {
 	v.serverClock.now = nil
 }
 
-func (v *TestableS3AWSVolume) Teardown() {
+func (v *testableS3Volume) Teardown() {
 	v.server.Close()
 }
 
-func (v *TestableS3AWSVolume) ReadWriteOperationLabelValues() (r, w string) {
+func (v *testableS3Volume) ReadWriteOperationLabelValues() (r, w string) {
 	return "get", "put"
 }
diff --git a/services/keepstore/unix_volume.go b/services/keepstore/unix_volume.go
index 98edfae14d..f01ad97553 100644
--- a/services/keepstore/unix_volume.go
+++ b/services/keepstore/unix_volume.go
@@ -32,7 +32,7 @@ func init() {
 }
 
 func newUnixVolume(params newVolumeParams) (volume, error) {
-	v := &UnixVolume{
+	v := &unixVolume{
 		uuid:    params.UUID,
 		cluster: params.Cluster,
 		volume:  params.ConfigVolume,
@@ -47,7 +47,7 @@ func newUnixVolume(params newVolumeParams) (volume, error) {
 	return v, v.check()
 }
 
-func (v *UnixVolume) check() error {
+func (v *unixVolume) check() error {
 	if v.Root == "" {
 		return errors.New("DriverParameters.Root was not provided")
 	}
@@ -66,8 +66,8 @@ func (v *UnixVolume) check() error {
 	return err
 }
 
-// A UnixVolume stores and retrieves blocks in a local directory.
-type UnixVolume struct {
+// A unixVolume stores and retrieves blocks in a local directory.
+type unixVolume struct {
 	Root      string // path to the volume's root directory
 	Serialize bool
 
@@ -89,7 +89,7 @@ type UnixVolume struct {
 // filesystem root to storage directory, joined by "/". For example,
 // the device ID for a local directory "/mnt/xvda1/keep" might be
 // "fa0b6166-3b55-4994-bd3f-92f4e00a1bb0/keep".
-func (v *UnixVolume) DeviceID() string {
+func (v *unixVolume) DeviceID() string {
 	giveup := func(f string, args ...interface{}) string {
 		v.logger.Infof(f+"; using hostname:path for volume %s", append(args, v.uuid)...)
 		host, _ := os.Hostname()
@@ -163,7 +163,7 @@ func (v *UnixVolume) DeviceID() string {
 }
 
 // BlockTouch sets the timestamp for the given locator to the current time
-func (v *UnixVolume) BlockTouch(hash string) error {
+func (v *unixVolume) BlockTouch(hash string) error {
 	p := v.blockPath(hash)
 	f, err := v.os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
 	if err != nil {
@@ -187,7 +187,7 @@ func (v *UnixVolume) BlockTouch(hash string) error {
 }
 
 // Mtime returns the stored timestamp for the given locator.
-func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
+func (v *unixVolume) Mtime(loc string) (time.Time, error) {
 	p := v.blockPath(loc)
 	fi, err := v.os.Stat(p)
 	if err != nil {
@@ -198,7 +198,7 @@ func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
 
 // Lock the locker (if one is in use), open the file for reading, and
 // call the given function if and when the file is ready to read.
-func (v *UnixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader) error) error {
+func (v *unixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader) error) error {
 	if err := v.lock(ctx); err != nil {
 		return err
 	}
@@ -212,7 +212,7 @@ func (v *UnixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader
 }
 
 // stat is os.Stat() with some extra sanity checks.
-func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
+func (v *unixVolume) stat(path string) (os.FileInfo, error) {
 	stat, err := v.os.Stat(path)
 	if err == nil {
 		if stat.Size() < 0 {
@@ -225,7 +225,7 @@ func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
 }
 
 // BlockRead reads a block from the volume.
-func (v *UnixVolume) BlockRead(ctx context.Context, hash string, w io.Writer) (int, error) {
+func (v *unixVolume) BlockRead(ctx context.Context, hash string, w io.Writer) (int, error) {
 	path := v.blockPath(hash)
 	stat, err := v.stat(path)
 	if err != nil {
@@ -244,7 +244,7 @@ func (v *UnixVolume) BlockRead(ctx context.Context, hash string, w io.Writer) (i
 
 // BlockWrite stores a block on the volume. If it already exists, its
 // timestamp is updated.
-func (v *UnixVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
+func (v *unixVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
 	if v.isFull() {
 		return errFull
 	}
@@ -293,7 +293,7 @@ func (v *UnixVolume) BlockWrite(ctx context.Context, hash string, data []byte) e
 var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
 var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
-func (v *UnixVolume) Index(ctx context.Context, prefix string, w io.Writer) error {
+func (v *unixVolume) Index(ctx context.Context, prefix string, w io.Writer) error {
 	rootdir, err := v.os.Open(v.Root)
 	if err != nil {
 		return err
@@ -374,7 +374,7 @@ func (v *UnixVolume) Index(ctx context.Context, prefix string, w io.Writer) erro
 // BlobTrashLifetime == 0, the block is deleted; otherwise, the block
 // is renamed as path/{loc}.trash.{deadline}, where deadline = now +
 // BlobTrashLifetime.
-func (v *UnixVolume) BlockTrash(loc string) error {
+func (v *unixVolume) BlockTrash(loc string) error {
 	// Touch() must be called before calling Write() on a block.  Touch()
 	// also uses lockfile().  This avoids a race condition between Write()
 	// and Trash() because either (a) the file will be trashed and Touch()
@@ -417,7 +417,7 @@ func (v *UnixVolume) BlockTrash(loc string) error {
 // BlockUntrash moves block from trash back into store
 // Look for path/{loc}.trash.{deadline} in storage,
 // and rename the first such file as path/{loc}
-func (v *UnixVolume) BlockUntrash(hash string) error {
+func (v *unixVolume) BlockUntrash(hash string) error {
 	v.os.stats.TickOps("readdir")
 	v.os.stats.Tick(&v.os.stats.ReaddirOps)
 	files, err := ioutil.ReadDir(v.blockDir(hash))
@@ -450,19 +450,19 @@ func (v *UnixVolume) BlockUntrash(hash string) error {
 
 // blockDir returns the fully qualified directory name for the directory
 // where loc is (or would be) stored on this volume.
-func (v *UnixVolume) blockDir(loc string) string {
+func (v *unixVolume) blockDir(loc string) string {
 	return filepath.Join(v.Root, loc[0:3])
 }
 
 // blockPath returns the fully qualified pathname for the path to loc
 // on this volume.
-func (v *UnixVolume) blockPath(loc string) string {
+func (v *unixVolume) blockPath(loc string) string {
 	return filepath.Join(v.blockDir(loc), loc)
 }
 
 // isFull returns true if the free space on the volume is less than
 // MinFreeKilobytes.
-func (v *UnixVolume) isFull() (isFull bool) {
+func (v *unixVolume) isFull() (isFull bool) {
 	fullSymlink := v.Root + "/full"
 
 	// Check if the volume has been marked as full in the last hour.
@@ -492,7 +492,7 @@ func (v *UnixVolume) isFull() (isFull bool) {
 
 // FreeDiskSpace returns the number of unused 1k blocks available on
 // the volume.
-func (v *UnixVolume) FreeDiskSpace() (free uint64, err error) {
+func (v *unixVolume) FreeDiskSpace() (free uint64, err error) {
 	var fs syscall.Statfs_t
 	err = syscall.Statfs(v.Root, &fs)
 	if err == nil {
@@ -504,14 +504,14 @@ func (v *UnixVolume) FreeDiskSpace() (free uint64, err error) {
 }
 
 // InternalStats returns I/O and filesystem ops counters.
-func (v *UnixVolume) InternalStats() interface{} {
+func (v *unixVolume) InternalStats() interface{} {
 	return &v.os.stats
 }
 
 // lock acquires the serialize lock, if one is in use. If ctx is done
 // before the lock is acquired, lock returns ctx.Err() instead of
 // acquiring the lock.
-func (v *UnixVolume) lock(ctx context.Context) error {
+func (v *unixVolume) lock(ctx context.Context) error {
 	if v.locker == nil {
 		return nil
 	}
@@ -535,7 +535,7 @@ func (v *UnixVolume) lock(ctx context.Context) error {
 }
 
 // unlock releases the serialize lock, if one is in use.
-func (v *UnixVolume) unlock() {
+func (v *unixVolume) unlock() {
 	if v.locker == nil {
 		return
 	}
@@ -543,7 +543,7 @@ func (v *UnixVolume) unlock() {
 }
 
 // lockfile and unlockfile use flock(2) to manage kernel file locks.
-func (v *UnixVolume) lockfile(f *os.File) error {
+func (v *unixVolume) lockfile(f *os.File) error {
 	v.os.stats.TickOps("flock")
 	v.os.stats.Tick(&v.os.stats.FlockOps)
 	err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
@@ -551,7 +551,7 @@ func (v *UnixVolume) lockfile(f *os.File) error {
 	return err
 }
 
-func (v *UnixVolume) unlockfile(f *os.File) error {
+func (v *unixVolume) unlockfile(f *os.File) error {
 	err := syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
 	v.os.stats.TickErr(err)
 	return err
@@ -559,7 +559,7 @@ func (v *UnixVolume) unlockfile(f *os.File) error {
 
 // Where appropriate, translate a more specific filesystem error to an
 // error recognized by handlers, like os.ErrNotExist.
-func (v *UnixVolume) translateError(err error) error {
+func (v *unixVolume) translateError(err error) error {
 	switch err.(type) {
 	case *os.PathError:
 		// stat() returns a PathError if the parent directory
@@ -574,7 +574,7 @@ var unixTrashLocRegexp = regexp.MustCompile(`/([0-9a-f]{32})\.trash\.(\d+)$`)
 
 // EmptyTrash walks hierarchy looking for {hash}.trash.*
 // and deletes those with deadline < now.
-func (v *UnixVolume) EmptyTrash() {
+func (v *unixVolume) EmptyTrash() {
 	var bytesDeleted, bytesInTrash int64
 	var blocksDeleted, blocksInTrash int64
 
diff --git a/services/keepstore/unix_volume_test.go b/services/keepstore/unix_volume_test.go
index a8dc4e809a..de8d3c42d8 100644
--- a/services/keepstore/unix_volume_test.go
+++ b/services/keepstore/unix_volume_test.go
@@ -23,7 +23,7 @@ import (
 )
 
 type testableUnixVolume struct {
-	UnixVolume
+	unixVolume
 	t TB
 }
 
@@ -77,7 +77,7 @@ func (s *unixVolumeSuite) newTestableUnixVolume(c *check.C, params newVolumePara
 		locker = &sync.Mutex{}
 	}
 	v := &testableUnixVolume{
-		UnixVolume: UnixVolume{
+		unixVolume: unixVolume{
 			Root:    d,
 			locker:  locker,
 			uuid:    params.UUID,
@@ -313,7 +313,7 @@ func (s *unixVolumeSuite) TestStats(c *check.C) {
 		return string(buf)
 	}
 
-	c.Check(stats(), check.Matches, `.*"StatOps":1,.*`) // (*UnixVolume)check() calls Stat() once
+	c.Check(stats(), check.Matches, `.*"StatOps":1,.*`) // (*unixVolume)check() calls Stat() once
 	c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
 
 	_, err := vol.BlockRead(context.Background(), fooHash, io.Discard)
@@ -353,14 +353,14 @@ func (s *unixVolumeSuite) TestStats(c *check.C) {
 func (s *unixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
 	vol := s.newTestableUnixVolume(c, s.params, false)
 
-	err := os.Mkdir(vol.UnixVolume.Root+"/aaa", 0777)
+	err := os.Mkdir(vol.unixVolume.Root+"/aaa", 0777)
 	c.Assert(err, check.IsNil)
-	err = os.Mkdir(vol.UnixVolume.Root+"/.aaa", 0777) // EmptyTrash should not look here
+	err = os.Mkdir(vol.unixVolume.Root+"/.aaa", 0777) // EmptyTrash should not look here
 	c.Assert(err, check.IsNil)
-	deleteme := vol.UnixVolume.Root + "/aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+	deleteme := vol.unixVolume.Root + "/aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
 	err = ioutil.WriteFile(deleteme, []byte{1, 2, 3}, 0777)
 	c.Assert(err, check.IsNil)
-	skipme := vol.UnixVolume.Root + "/.aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+	skipme := vol.unixVolume.Root + "/.aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
 	err = ioutil.WriteFile(skipme, []byte{1, 2, 3}, 0777)
 	c.Assert(err, check.IsNil)
 	vol.EmptyTrash()
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go
index 41a0eba86f..a0b6fda7d3 100644
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -48,7 +48,3 @@ type ioStats struct {
 	InBytes    uint64
 	OutBytes   uint64
 }
-
-type InternalStatser interface {
-	InternalStats() interface{}
-}

commit e9604ac2d04b9ffe2872525241534d1869940d0b
Author: Tom Clegg <tom at curii.com>
Date:   Mon Feb 12 14:46:30 2024 -0500

    2960: Refactor keepstore into a streaming server.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index a6b240e214..e7310818f7 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -242,8 +242,9 @@ type LogoutOptions struct {
 }
 
 type BlockReadOptions struct {
-	Locator string
-	WriteTo io.Writer
+	Locator      string
+	WriteTo      io.Writer
+	LocalLocator func(string)
 }
 
 type BlockWriteOptions struct {
@@ -258,8 +259,9 @@ type BlockWriteOptions struct {
 }
 
 type BlockWriteResponse struct {
-	Locator  string
-	Replicas int
+	Locator        string
+	Replicas       int
+	StorageClasses map[string]int
 }
 
 type WebDAVOptions struct {
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 2bd7996b59..64f7e47b7e 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -75,6 +75,8 @@ type ErrNotFound struct {
 	multipleResponseError
 }
 
+func (*ErrNotFound) HTTPStatus() int { return http.StatusNotFound }
+
 type InsufficientReplicasError struct{ error }
 
 type OversizeBlockError struct{ error }
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index f472c0830e..12f4e3171c 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -174,7 +174,7 @@ class KeepPermissionTestCase(run_test_server.TestCaseWithServers, DiskCacheBase)
             bar_locator,
             r'^37b51d194a7513e45b56f6524f2d51f2\+3\+A[a-f0-9]+@[a-f0-9]+$',
             'invalid locator from Keep.put("bar"): ' + bar_locator)
-        self.assertRaises(arvados.errors.NotFoundError,
+        self.assertRaises(arvados.errors.KeepReadError,
                           keep_client.get,
                           unsigned_bar_locator)
 
diff --git a/services/keep-balance/change_set.go b/services/keep-balance/change_set.go
index c3579556bb..771e277d60 100644
--- a/services/keep-balance/change_set.go
+++ b/services/keep-balance/change_set.go
@@ -10,6 +10,7 @@ import (
 	"sync"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/services/keepstore"
 )
 
 // Pull is a request to retrieve a block from a remote server, and
@@ -23,13 +24,8 @@ type Pull struct {
 // MarshalJSON formats a pull request the way keepstore wants to see
 // it.
 func (p Pull) MarshalJSON() ([]byte, error) {
-	type KeepstorePullRequest struct {
-		Locator   string   `json:"locator"`
-		Servers   []string `json:"servers"`
-		MountUUID string   `json:"mount_uuid"`
-	}
-	return json.Marshal(KeepstorePullRequest{
-		Locator:   string(p.SizedDigest[:32]),
+	return json.Marshal(keepstore.PullListItem{
+		Locator:   string(p.SizedDigest),
 		Servers:   []string{p.From.URLBase()},
 		MountUUID: p.To.KeepMount.UUID,
 	})
@@ -45,13 +41,8 @@ type Trash struct {
 // MarshalJSON formats a trash request the way keepstore wants to see
 // it, i.e., as a bare locator with no +size hint.
 func (t Trash) MarshalJSON() ([]byte, error) {
-	type KeepstoreTrashRequest struct {
-		Locator    string `json:"locator"`
-		BlockMtime int64  `json:"block_mtime"`
-		MountUUID  string `json:"mount_uuid"`
-	}
-	return json.Marshal(KeepstoreTrashRequest{
-		Locator:    string(t.SizedDigest[:32]),
+	return json.Marshal(keepstore.TrashListItem{
+		Locator:    string(t.SizedDigest),
 		BlockMtime: t.Mtime,
 		MountUUID:  t.From.KeepMount.UUID,
 	})
diff --git a/services/keep-balance/change_set_test.go b/services/keep-balance/change_set_test.go
index 5474d29fb5..f2b9429017 100644
--- a/services/keep-balance/change_set_test.go
+++ b/services/keep-balance/change_set_test.go
@@ -33,12 +33,12 @@ func (s *changeSetSuite) TestJSONFormat(c *check.C) {
 		To:          mnt,
 		From:        srv}})
 	c.Check(err, check.IsNil)
-	c.Check(string(buf), check.Equals, `[{"locator":"acbd18db4cc2f85cedef654fccc4a4d8","servers":["http://keep1.zzzzz.arvadosapi.com:25107"],"mount_uuid":"zzzzz-mount-abcdefghijklmno"}]`)
+	c.Check(string(buf), check.Equals, `[{"locator":"acbd18db4cc2f85cedef654fccc4a4d8+3","servers":["http://keep1.zzzzz.arvadosapi.com:25107"],"mount_uuid":"zzzzz-mount-abcdefghijklmno"}]`)
 
 	buf, err = json.Marshal([]Trash{{
 		SizedDigest: arvados.SizedDigest("acbd18db4cc2f85cedef654fccc4a4d8+3"),
 		From:        mnt,
 		Mtime:       123456789}})
 	c.Check(err, check.IsNil)
-	c.Check(string(buf), check.Equals, `[{"locator":"acbd18db4cc2f85cedef654fccc4a4d8","block_mtime":123456789,"mount_uuid":"zzzzz-mount-abcdefghijklmno"}]`)
+	c.Check(string(buf), check.Equals, `[{"locator":"acbd18db4cc2f85cedef654fccc4a4d8+3","block_mtime":123456789,"mount_uuid":"zzzzz-mount-abcdefghijklmno"}]`)
 }
diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go
index 56a52c913a..bdd669bb46 100644
--- a/services/keepstore/azure_blob_volume.go
+++ b/services/keepstore/azure_blob_volume.go
@@ -5,13 +5,11 @@
 package keepstore
 
 import (
-	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"net/http"
 	"os"
 	"regexp"
@@ -32,17 +30,18 @@ func init() {
 	driver["Azure"] = newAzureBlobVolume
 }
 
-func newAzureBlobVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
+func newAzureBlobVolume(params newVolumeParams) (volume, error) {
 	v := &AzureBlobVolume{
 		RequestTimeout:    azureDefaultRequestTimeout,
 		WriteRaceInterval: azureDefaultWriteRaceInterval,
 		WriteRacePollTime: azureDefaultWriteRacePollTime,
-		cluster:           cluster,
-		volume:            volume,
-		logger:            logger,
-		metrics:           metrics,
+		cluster:           params.Cluster,
+		volume:            params.ConfigVolume,
+		logger:            params.Logger,
+		metrics:           params.MetricsVecs,
+		bufferPool:        params.BufferPool,
 	}
-	err := json.Unmarshal(volume.DriverParameters, &v)
+	err := json.Unmarshal(params.ConfigVolume.DriverParameters, &v)
 	if err != nil {
 		return nil, err
 	}
@@ -81,7 +80,7 @@ func newAzureBlobVolume(cluster *arvados.Cluster, volume arvados.Volume, logger
 }
 
 func (v *AzureBlobVolume) check() error {
-	lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+	lbls := prometheus.Labels{"device_id": v.DeviceID()}
 	v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
 	return nil
 }
@@ -108,12 +107,13 @@ type AzureBlobVolume struct {
 	WriteRaceInterval    arvados.Duration
 	WriteRacePollTime    arvados.Duration
 
-	cluster   *arvados.Cluster
-	volume    arvados.Volume
-	logger    logrus.FieldLogger
-	metrics   *volumeMetricsVecs
-	azClient  storage.Client
-	container *azureContainer
+	cluster    *arvados.Cluster
+	volume     arvados.Volume
+	logger     logrus.FieldLogger
+	metrics    *volumeMetricsVecs
+	bufferPool *bufferPool
+	azClient   storage.Client
+	container  *azureContainer
 }
 
 // singleSender is a single-attempt storage.Sender.
@@ -124,13 +124,8 @@ func (*singleSender) Send(c *storage.Client, req *http.Request) (resp *http.Resp
 	return c.HTTPClient.Do(req)
 }
 
-// Type implements Volume.
-func (v *AzureBlobVolume) Type() string {
-	return "Azure"
-}
-
-// GetDeviceID returns a globally unique ID for the storage container.
-func (v *AzureBlobVolume) GetDeviceID() string {
+// DeviceID returns a globally unique ID for the storage container.
+func (v *AzureBlobVolume) DeviceID() string {
 	return "azure://" + v.StorageBaseURL + "/" + v.StorageAccountName + "/" + v.ContainerName
 }
 
@@ -146,30 +141,36 @@ func (v *AzureBlobVolume) checkTrashed(loc string) (bool, map[string]string, err
 	return false, metadata, nil
 }
 
-// Get reads a Keep block that has been stored as a block blob in the
-// container.
+// BlockRead reads a Keep block that has been stored as a block blob
+// in the container.
 //
 // If the block is younger than azureWriteRaceInterval and is
-// unexpectedly empty, assume a PutBlob operation is in progress, and
-// wait for it to finish writing.
-func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
-	trashed, _, err := v.checkTrashed(loc)
+// unexpectedly empty, assume a BlockWrite operation is in progress,
+// and wait for it to finish writing.
+func (v *AzureBlobVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
+	trashed, _, err := v.checkTrashed(hash)
 	if err != nil {
 		return 0, err
 	}
 	if trashed {
 		return 0, os.ErrNotExist
 	}
+	buf, err := v.bufferPool.GetContext(ctx)
+	if err != nil {
+		return 0, err
+	}
+	defer v.bufferPool.Put(buf)
+	streamer := newStreamWriterAt(writeTo, 65536, buf)
+	defer streamer.Close()
 	var deadline time.Time
-	haveDeadline := false
-	size, err := v.get(ctx, loc, buf)
-	for err == nil && size == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
+	size, err := v.get(ctx, hash, streamer)
+	for err == nil && size == 0 && streamer.WroteAt() == 0 && hash != "d41d8cd98f00b204e9800998ecf8427e" {
 		// Seeing a brand new empty block probably means we're
 		// in a race with CreateBlob, which under the hood
 		// (apparently) does "CreateEmpty" and "CommitData"
 		// with no additional transaction locking.
-		if !haveDeadline {
-			t, err := v.Mtime(loc)
+		if deadline.IsZero() {
+			t, err := v.Mtime(hash)
 			if err != nil {
 				ctxlog.FromContext(ctx).Print("Got empty block (possible race) but Mtime failed: ", err)
 				break
@@ -178,8 +179,7 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
 			if time.Now().After(deadline) {
 				break
 			}
-			ctxlog.FromContext(ctx).Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
-			haveDeadline = true
+			ctxlog.FromContext(ctx).Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", hash, time.Since(t), deadline)
 		} else if time.Now().After(deadline) {
 			break
 		}
@@ -188,15 +188,20 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
 			return 0, ctx.Err()
 		case <-time.After(v.WriteRacePollTime.Duration()):
 		}
-		size, err = v.get(ctx, loc, buf)
+		size, err = v.get(ctx, hash, streamer)
 	}
-	if haveDeadline {
+	if !deadline.IsZero() {
 		ctxlog.FromContext(ctx).Printf("Race ended with size==%d", size)
 	}
-	return size, err
+	if err != nil {
+		streamer.Close()
+		return streamer.Wrote(), err
+	}
+	err = streamer.Close()
+	return streamer.Wrote(), err
 }
 
-func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int, error) {
+func (v *AzureBlobVolume) get(ctx context.Context, hash string, dst io.WriterAt) (int, error) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()
 
@@ -206,16 +211,17 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 	}
 
 	pieces := 1
-	expectSize := len(buf)
+	expectSize := BlockSize
 	if pieceSize < BlockSize {
-		// Unfortunately the handler doesn't tell us how long the blob
-		// is expected to be, so we have to ask Azure.
-		props, err := v.container.GetBlobProperties(loc)
+		// Unfortunately the handler doesn't tell us how long
+		// the blob is expected to be, so we have to ask
+		// Azure.
+		props, err := v.container.GetBlobProperties(hash)
 		if err != nil {
 			return 0, v.translateError(err)
 		}
 		if props.ContentLength > int64(BlockSize) || props.ContentLength < 0 {
-			return 0, fmt.Errorf("block %s invalid size %d (max %d)", loc, props.ContentLength, BlockSize)
+			return 0, fmt.Errorf("block %s invalid size %d (max %d)", hash, props.ContentLength, BlockSize)
 		}
 		expectSize = int(props.ContentLength)
 		pieces = (expectSize + pieceSize - 1) / pieceSize
@@ -252,9 +258,9 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 			go func() {
 				defer close(gotRdr)
 				if startPos == 0 && endPos == expectSize {
-					rdr, err = v.container.GetBlob(loc)
+					rdr, err = v.container.GetBlob(hash)
 				} else {
-					rdr, err = v.container.GetBlobRange(loc, startPos, endPos-1, nil)
+					rdr, err = v.container.GetBlobRange(hash, startPos, endPos-1, nil)
 				}
 			}()
 			select {
@@ -282,7 +288,7 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 				<-ctx.Done()
 				rdr.Close()
 			}()
-			n, err := io.ReadFull(rdr, buf[startPos:endPos])
+			n, err := io.CopyN(io.NewOffsetWriter(dst, int64(startPos)), rdr, int64(endPos-startPos))
 			if pieces == 1 && (err == io.ErrUnexpectedEOF || err == io.EOF) {
 				// If we don't know the actual size,
 				// and just tried reading 64 MiB, it's
@@ -295,7 +301,7 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 				return
 			}
 			if p == pieces-1 {
-				actualSize = startPos + n
+				actualSize = startPos + int(n)
 			}
 		}(p)
 	}
@@ -310,58 +316,23 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 	return actualSize, nil
 }
 
-// Compare the given data with existing stored data.
-func (v *AzureBlobVolume) Compare(ctx context.Context, loc string, expect []byte) error {
-	trashed, _, err := v.checkTrashed(loc)
-	if err != nil {
-		return err
-	}
-	if trashed {
-		return os.ErrNotExist
-	}
-	var rdr io.ReadCloser
-	gotRdr := make(chan struct{})
-	go func() {
-		defer close(gotRdr)
-		rdr, err = v.container.GetBlob(loc)
-	}()
-	select {
-	case <-ctx.Done():
-		go func() {
-			<-gotRdr
-			if err == nil {
-				rdr.Close()
-			}
-		}()
-		return ctx.Err()
-	case <-gotRdr:
-	}
-	if err != nil {
-		return v.translateError(err)
-	}
-	defer rdr.Close()
-	return compareReaderWithBuf(ctx, rdr, expect, loc[:32])
-}
-
-// Put stores a Keep block as a block blob in the container.
-func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) error {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
+// BlockWrite stores a block on the volume. If it already exists, its
+// timestamp is updated.
+func (v *AzureBlobVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
 	// Send the block data through a pipe, so that (if we need to)
 	// we can close the pipe early and abandon our
 	// CreateBlockBlobFromReader() goroutine, without worrying
-	// about CreateBlockBlobFromReader() accessing our block
+	// about CreateBlockBlobFromReader() accessing our data
 	// buffer after we release it.
 	bufr, bufw := io.Pipe()
 	go func() {
-		io.Copy(bufw, bytes.NewReader(block))
+		bufw.Write(data)
 		bufw.Close()
 	}()
-	errChan := make(chan error)
+	errChan := make(chan error, 1)
 	go func() {
 		var body io.Reader = bufr
-		if len(block) == 0 {
+		if len(data) == 0 {
 			// We must send a "Content-Length: 0" header,
 			// but the http client interprets
 			// ContentLength==0 as "unknown" unless it can
@@ -370,18 +341,15 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
 			body = http.NoBody
 			bufr.Close()
 		}
-		errChan <- v.container.CreateBlockBlobFromReader(loc, len(block), body, nil)
+		errChan <- v.container.CreateBlockBlobFromReader(hash, len(data), body, nil)
 	}()
 	select {
 	case <-ctx.Done():
 		ctxlog.FromContext(ctx).Debugf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
-		// Our pipe might be stuck in Write(), waiting for
-		// io.Copy() to read. If so, un-stick it. This means
-		// CreateBlockBlobFromReader will get corrupt data,
-		// but that's OK: the size won't match, so the write
-		// will fail.
-		go io.Copy(ioutil.Discard, bufr)
-		// CloseWithError() will return once pending I/O is done.
+		// bufw.CloseWithError() interrupts bufw.Write() if
+		// necessary, ensuring CreateBlockBlobFromReader can't
+		// read any more of our data slice via bufr after we
+		// return.
 		bufw.CloseWithError(ctx.Err())
 		ctxlog.FromContext(ctx).Debugf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
 		return ctx.Err()
@@ -390,12 +358,9 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
 	}
 }
 
-// Touch updates the last-modified property of a block blob.
-func (v *AzureBlobVolume) Touch(loc string) error {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	trashed, metadata, err := v.checkTrashed(loc)
+// BlockTouch updates the last-modified property of a block blob.
+func (v *AzureBlobVolume) BlockTouch(hash string) error {
+	trashed, metadata, err := v.checkTrashed(hash)
 	if err != nil {
 		return err
 	}
@@ -404,12 +369,12 @@ func (v *AzureBlobVolume) Touch(loc string) error {
 	}
 
 	metadata["touch"] = fmt.Sprintf("%d", time.Now().Unix())
-	return v.container.SetBlobMetadata(loc, metadata, nil)
+	return v.container.SetBlobMetadata(hash, metadata, nil)
 }
 
 // Mtime returns the last-modified property of a block blob.
-func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
-	trashed, _, err := v.checkTrashed(loc)
+func (v *AzureBlobVolume) Mtime(hash string) (time.Time, error) {
+	trashed, _, err := v.checkTrashed(hash)
 	if err != nil {
 		return time.Time{}, err
 	}
@@ -417,21 +382,25 @@ func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
 		return time.Time{}, os.ErrNotExist
 	}
 
-	props, err := v.container.GetBlobProperties(loc)
+	props, err := v.container.GetBlobProperties(hash)
 	if err != nil {
 		return time.Time{}, err
 	}
 	return time.Time(props.LastModified), nil
 }
 
-// IndexTo writes a list of Keep blocks that are stored in the
+// Index writes a list of Keep blocks that are stored in the
 // container.
-func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
+func (v *AzureBlobVolume) Index(ctx context.Context, prefix string, writer io.Writer) error {
 	params := storage.ListBlobsParameters{
 		Prefix:  prefix,
 		Include: &storage.IncludeBlobDataset{Metadata: true},
 	}
 	for page := 1; ; page++ {
+		err := ctx.Err()
+		if err != nil {
+			return err
+		}
 		resp, err := v.listBlobs(page, params)
 		if err != nil {
 			return err
@@ -467,7 +436,7 @@ func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters
 	for i := 0; i < v.ListBlobsMaxAttempts; i++ {
 		resp, err = v.container.ListBlobs(params)
 		err = v.translateError(err)
-		if err == VolumeBusyError {
+		if err == errVolumeUnavailable {
 			v.logger.Printf("ListBlobs: will retry page %d in %s after error: %s", page, v.ListBlobsRetryDelay, err)
 			time.Sleep(time.Duration(v.ListBlobsRetryDelay))
 			continue
@@ -479,10 +448,7 @@ func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters
 }
 
 // Trash a Keep block.
-func (v *AzureBlobVolume) Trash(loc string) error {
-	if v.volume.ReadOnly && !v.volume.AllowTrashWhenReadOnly {
-		return MethodDisabledError
-	}
+func (v *AzureBlobVolume) BlockTrash(loc string) error {
 	// Ideally we would use If-Unmodified-Since, but that
 	// particular condition seems to be ignored by Azure. Instead,
 	// we get the Etag before checking Mtime, and use If-Match to
@@ -513,11 +479,11 @@ func (v *AzureBlobVolume) Trash(loc string) error {
 	})
 }
 
-// Untrash a Keep block.
-// Delete the expires_at metadata attribute
-func (v *AzureBlobVolume) Untrash(loc string) error {
+// BlockUntrash deletes the expires_at metadata attribute for the
+// specified block blob.
+func (v *AzureBlobVolume) BlockUntrash(hash string) error {
 	// if expires_at does not exist, return NotFoundError
-	metadata, err := v.container.GetBlobMetadata(loc)
+	metadata, err := v.container.GetBlobMetadata(hash)
 	if err != nil {
 		return v.translateError(err)
 	}
@@ -527,24 +493,10 @@ func (v *AzureBlobVolume) Untrash(loc string) error {
 
 	// reset expires_at metadata attribute
 	metadata["expires_at"] = ""
-	err = v.container.SetBlobMetadata(loc, metadata, nil)
+	err = v.container.SetBlobMetadata(hash, metadata, nil)
 	return v.translateError(err)
 }
 
-// Status returns a VolumeStatus struct with placeholder data.
-func (v *AzureBlobVolume) Status() *VolumeStatus {
-	return &VolumeStatus{
-		DeviceNum: 1,
-		BytesFree: BlockSize * 1000,
-		BytesUsed: 1,
-	}
-}
-
-// String returns a volume label, including the container name.
-func (v *AzureBlobVolume) String() string {
-	return fmt.Sprintf("azure-storage-container:%+q", v.ContainerName)
-}
-
 // If possible, translate an Azure SDK error to a recognizable error
 // like os.ErrNotExist.
 func (v *AzureBlobVolume) translateError(err error) error {
@@ -553,7 +505,7 @@ func (v *AzureBlobVolume) translateError(err error) error {
 		return err
 	case strings.Contains(err.Error(), "StatusCode=503"):
 		// "storage: service returned error: StatusCode=503, ErrorCode=ServerBusy, ErrorMessage=The server is busy" (See #14804)
-		return VolumeBusyError
+		return errVolumeUnavailable
 	case strings.Contains(err.Error(), "Not Found"):
 		// "storage: service returned without a response body (404 Not Found)"
 		return os.ErrNotExist
@@ -637,7 +589,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
 	close(todo)
 	wg.Wait()
 
-	v.logger.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
+	v.logger.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.DeviceID(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
 }
 
 // InternalStats returns bucket I/O and API call counters.
@@ -708,7 +660,7 @@ func (c *azureContainer) GetBlob(bname string) (io.ReadCloser, error) {
 	b := c.ctr.GetBlobReference(bname)
 	rdr, err := b.Get(nil)
 	c.stats.TickErr(err)
-	return NewCountingReader(rdr, c.stats.TickInBytes), err
+	return newCountingReader(rdr, c.stats.TickInBytes), err
 }
 
 func (c *azureContainer) GetBlobRange(bname string, start, end int, opts *storage.GetBlobOptions) (io.ReadCloser, error) {
@@ -723,7 +675,7 @@ func (c *azureContainer) GetBlobRange(bname string, start, end int, opts *storag
 		GetBlobOptions: opts,
 	})
 	c.stats.TickErr(err)
-	return NewCountingReader(rdr, c.stats.TickInBytes), err
+	return newCountingReader(rdr, c.stats.TickInBytes), err
 }
 
 // If we give it an io.Reader that doesn't also have a Len() int
@@ -744,7 +696,7 @@ func (c *azureContainer) CreateBlockBlobFromReader(bname string, size int, rdr i
 	c.stats.Tick(&c.stats.Ops, &c.stats.CreateOps)
 	if size != 0 {
 		rdr = &readerWithAzureLen{
-			Reader: NewCountingReader(rdr, c.stats.TickOutBytes),
+			Reader: newCountingReader(rdr, c.stats.TickOutBytes),
 			len:    size,
 		}
 	}
diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go
index 48d58ee9bf..a543dfc245 100644
--- a/services/keepstore/azure_blob_volume_test.go
+++ b/services/keepstore/azure_blob_volume_test.go
@@ -13,6 +13,7 @@ import (
 	"encoding/xml"
 	"flag"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"math/rand"
 	"net"
@@ -87,7 +88,7 @@ func (h *azStubHandler) TouchWithDate(container, hash string, t time.Time) {
 	blob.Mtime = t
 }
 
-func (h *azStubHandler) PutRaw(container, hash string, data []byte) {
+func (h *azStubHandler) BlockWriteRaw(container, hash string, data []byte) {
 	h.Lock()
 	defer h.Unlock()
 	h.blobs[container+"|"+hash] = &azBlob{
@@ -365,14 +366,14 @@ func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
 	return d.Dialer.Dial(network, address)
 }
 
-type TestableAzureBlobVolume struct {
+type testableAzureBlobVolume struct {
 	*AzureBlobVolume
 	azHandler *azStubHandler
 	azStub    *httptest.Server
 	t         TB
 }
 
-func (s *StubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs) *TestableAzureBlobVolume {
+func (s *stubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, params newVolumeParams) *testableAzureBlobVolume {
 	azHandler := newAzStubHandler(t.(*check.C))
 	azStub := httptest.NewServer(azHandler)
 
@@ -404,16 +405,17 @@ func (s *StubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, cluster *arvado
 		ListBlobsRetryDelay:  arvados.Duration(time.Millisecond),
 		azClient:             azClient,
 		container:            &azureContainer{ctr: bs.GetContainerReference(container)},
-		cluster:              cluster,
-		volume:               volume,
+		cluster:              params.Cluster,
+		volume:               params.ConfigVolume,
 		logger:               ctxlog.TestLogger(t),
-		metrics:              metrics,
+		metrics:              params.MetricsVecs,
+		bufferPool:           params.BufferPool,
 	}
 	if err = v.check(); err != nil {
 		t.Fatal(err)
 	}
 
-	return &TestableAzureBlobVolume{
+	return &testableAzureBlobVolume{
 		AzureBlobVolume: v,
 		azHandler:       azHandler,
 		azStub:          azStub,
@@ -421,48 +423,54 @@ func (s *StubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, cluster *arvado
 	}
 }
 
-var _ = check.Suite(&StubbedAzureBlobSuite{})
+var _ = check.Suite(&stubbedAzureBlobSuite{})
 
-type StubbedAzureBlobSuite struct {
+type stubbedAzureBlobSuite struct {
 	origHTTPTransport http.RoundTripper
 }
 
-func (s *StubbedAzureBlobSuite) SetUpTest(c *check.C) {
+func (s *stubbedAzureBlobSuite) SetUpSuite(c *check.C) {
 	s.origHTTPTransport = http.DefaultTransport
 	http.DefaultTransport = &http.Transport{
 		Dial: (&azStubDialer{logger: ctxlog.TestLogger(c)}).Dial,
 	}
 }
 
-func (s *StubbedAzureBlobSuite) TearDownTest(c *check.C) {
+func (s *stubbedAzureBlobSuite) TearDownSuite(c *check.C) {
 	http.DefaultTransport = s.origHTTPTransport
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeWithGeneric(c *check.C) {
-	DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableAzureBlobVolume(t, cluster, volume, metrics)
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeWithGeneric(c *check.C) {
+	DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableAzureBlobVolume(t, params)
 	})
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeConcurrentRanges(c *check.C) {
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeConcurrentRanges(c *check.C) {
 	// Test (BlockSize mod azureMaxGetBytes)==0 and !=0 cases
-	for _, b := range []int{2 << 22, 2<<22 - 1} {
-		DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-			v := s.newTestableAzureBlobVolume(t, cluster, volume, metrics)
+	for _, b := range []int{2<<22 - 1, 2<<22 - 1} {
+		c.Logf("=== MaxGetBytes=%d", b)
+		DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
+			v := s.newTestableAzureBlobVolume(t, params)
 			v.MaxGetBytes = b
 			return v
 		})
 	}
 }
 
-func (s *StubbedAzureBlobSuite) TestReadonlyAzureBlobVolumeWithGeneric(c *check.C) {
-	DoGenericVolumeTests(c, false, func(c TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableAzureBlobVolume(c, cluster, volume, metrics)
+func (s *stubbedAzureBlobSuite) TestReadonlyAzureBlobVolumeWithGeneric(c *check.C) {
+	DoGenericVolumeTests(c, false, func(c TB, params newVolumeParams) TestableVolume {
+		return s.newTestableAzureBlobVolume(c, params)
 	})
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeRangeFenceposts(c *check.C) {
-	v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeRangeFenceposts(c *check.C) {
+	v := s.newTestableAzureBlobVolume(c, newVolumeParams{
+		Cluster:      testCluster(c),
+		ConfigVolume: arvados.Volume{Replication: 3},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	})
 	defer v.Teardown()
 
 	for _, size := range []int{
@@ -478,16 +486,16 @@ func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeRangeFenceposts(c *check.C) {
 			data[i] = byte((i + 7) & 0xff)
 		}
 		hash := fmt.Sprintf("%x", md5.Sum(data))
-		err := v.Put(context.Background(), hash, data)
+		err := v.BlockWrite(context.Background(), hash, data)
 		if err != nil {
 			c.Error(err)
 		}
-		gotData := make([]byte, len(data))
-		gotLen, err := v.Get(context.Background(), hash, gotData)
+		gotData := bytes.NewBuffer(nil)
+		gotLen, err := v.BlockRead(context.Background(), hash, gotData)
 		if err != nil {
 			c.Error(err)
 		}
-		gotHash := fmt.Sprintf("%x", md5.Sum(gotData))
+		gotHash := fmt.Sprintf("%x", md5.Sum(gotData.Bytes()))
 		if gotLen != size {
 			c.Errorf("length mismatch: got %d != %d", gotLen, size)
 		}
@@ -497,8 +505,13 @@ func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeRangeFenceposts(c *check.C) {
 	}
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRace(c *check.C) {
-	v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRace(c *check.C) {
+	v := s.newTestableAzureBlobVolume(c, newVolumeParams{
+		Cluster:      testCluster(c),
+		ConfigVolume: arvados.Volume{Replication: 3},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	})
 	defer v.Teardown()
 
 	var wg sync.WaitGroup
@@ -508,42 +521,46 @@ func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRace(c *check.C) {
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
-		err := v.Put(context.Background(), TestHash, TestBlock)
+		err := v.BlockWrite(context.Background(), TestHash, TestBlock)
 		if err != nil {
 			c.Error(err)
 		}
 	}()
-	continuePut := make(chan struct{})
-	// Wait for the stub's Put to create the empty blob
-	v.azHandler.race <- continuePut
+	continueBlockWrite := make(chan struct{})
+	// Wait for the stub's BlockWrite to create the empty blob
+	v.azHandler.race <- continueBlockWrite
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
-		buf := make([]byte, len(TestBlock))
-		_, err := v.Get(context.Background(), TestHash, buf)
+		_, err := v.BlockRead(context.Background(), TestHash, io.Discard)
 		if err != nil {
 			c.Error(err)
 		}
 	}()
-	// Wait for the stub's Get to get the empty blob
+	// Wait for the stub's BlockRead to get the empty blob
 	close(v.azHandler.race)
-	// Allow stub's Put to continue, so the real data is ready
-	// when the volume's Get retries
-	<-continuePut
-	// Wait for Get() and Put() to finish
+	// Allow stub's BlockWrite to continue, so the real data is ready
+	// when the volume's BlockRead retries
+	<-continueBlockWrite
+	// Wait for BlockRead() and BlockWrite() to finish
 	wg.Wait()
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRaceDeadline(c *check.C) {
-	v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRaceDeadline(c *check.C) {
+	v := s.newTestableAzureBlobVolume(c, newVolumeParams{
+		Cluster:      testCluster(c),
+		ConfigVolume: arvados.Volume{Replication: 3},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	})
 	v.AzureBlobVolume.WriteRaceInterval.Set("2s")
 	v.AzureBlobVolume.WriteRacePollTime.Set("5ms")
 	defer v.Teardown()
 
-	v.PutRaw(TestHash, nil)
+	v.BlockWriteRaw(TestHash, nil)
 
 	buf := new(bytes.Buffer)
-	v.IndexTo("", buf)
+	v.Index(context.Background(), "", buf)
 	if buf.Len() != 0 {
 		c.Errorf("Index %+q should be empty", buf.Bytes())
 	}
@@ -553,52 +570,50 @@ func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRaceDeadline(c *che
 	allDone := make(chan struct{})
 	go func() {
 		defer close(allDone)
-		buf := make([]byte, BlockSize)
-		n, err := v.Get(context.Background(), TestHash, buf)
+		buf := bytes.NewBuffer(nil)
+		n, err := v.BlockRead(context.Background(), TestHash, buf)
 		if err != nil {
 			c.Error(err)
 			return
 		}
 		if n != 0 {
-			c.Errorf("Got %+q, expected empty buf", buf[:n])
+			c.Errorf("Got %+q (n=%d), expected empty buf", buf.Bytes(), n)
 		}
 	}()
 	select {
 	case <-allDone:
 	case <-time.After(time.Second):
-		c.Error("Get should have stopped waiting for race when block was 2s old")
+		c.Error("BlockRead should have stopped waiting for race when block was 2s old")
 	}
 
 	buf.Reset()
-	v.IndexTo("", buf)
+	v.Index(context.Background(), "", buf)
 	if !bytes.HasPrefix(buf.Bytes(), []byte(TestHash+"+0")) {
 		c.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
 	}
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelGet(c *check.C) {
-	s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
-		v.PutRaw(TestHash, TestBlock)
-		_, err := v.Get(ctx, TestHash, make([]byte, BlockSize))
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelBlockRead(c *check.C) {
+	s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *testableAzureBlobVolume) error {
+		v.BlockWriteRaw(TestHash, TestBlock)
+		_, err := v.BlockRead(ctx, TestHash, io.Discard)
 		return err
 	})
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelPut(c *check.C) {
-	s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
-		return v.Put(ctx, TestHash, make([]byte, BlockSize))
+func (s *stubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelBlockWrite(c *check.C) {
+	s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *testableAzureBlobVolume) error {
+		return v.BlockWrite(ctx, TestHash, make([]byte, BlockSize))
 	})
 }
 
-func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelCompare(c *check.C) {
-	s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
-		v.PutRaw(TestHash, TestBlock)
-		return v.Compare(ctx, TestHash, TestBlock2)
+func (s *stubbedAzureBlobSuite) testAzureBlobVolumeContextCancel(c *check.C, testFunc func(context.Context, *testableAzureBlobVolume) error) {
+	v := s.newTestableAzureBlobVolume(c, newVolumeParams{
+		Cluster:      testCluster(c),
+		ConfigVolume: arvados.Volume{Replication: 3},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
 	})
-}
-
-func (s *StubbedAzureBlobSuite) testAzureBlobVolumeContextCancel(c *check.C, testFunc func(context.Context, *TestableAzureBlobVolume) error) {
-	v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
 	defer v.Teardown()
 	v.azHandler.race = make(chan chan struct{})
 
@@ -633,8 +648,13 @@ func (s *StubbedAzureBlobSuite) testAzureBlobVolumeContextCancel(c *check.C, tes
 	}()
 }
 
-func (s *StubbedAzureBlobSuite) TestStats(c *check.C) {
-	volume := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+func (s *stubbedAzureBlobSuite) TestStats(c *check.C) {
+	volume := s.newTestableAzureBlobVolume(c, newVolumeParams{
+		Cluster:      testCluster(c),
+		ConfigVolume: arvados.Volume{Replication: 3},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	})
 	defer volume.Teardown()
 
 	stats := func() string {
@@ -647,38 +667,38 @@ func (s *StubbedAzureBlobSuite) TestStats(c *check.C) {
 	c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
 
 	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	_, err := volume.Get(context.Background(), loc, make([]byte, 3))
+	_, err := volume.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.NotNil)
 	c.Check(stats(), check.Matches, `.*"Ops":[^0],.*`)
 	c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
 	c.Check(stats(), check.Matches, `.*"storage\.AzureStorageServiceError 404 \(404 Not Found\)":[^0].*`)
 	c.Check(stats(), check.Matches, `.*"InBytes":0,.*`)
 
-	err = volume.Put(context.Background(), loc, []byte("foo"))
+	err = volume.BlockWrite(context.Background(), loc, []byte("foo"))
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
 	c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
 
-	_, err = volume.Get(context.Background(), loc, make([]byte, 3))
+	_, err = volume.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.IsNil)
-	_, err = volume.Get(context.Background(), loc, make([]byte, 3))
+	_, err = volume.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
 }
 
-func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
-	v.azHandler.PutRaw(v.ContainerName, locator, data)
+func (v *testableAzureBlobVolume) BlockWriteRaw(locator string, data []byte) {
+	v.azHandler.BlockWriteRaw(v.ContainerName, locator, data)
 }
 
-func (v *TestableAzureBlobVolume) TouchWithDate(locator string, lastPut time.Time) {
-	v.azHandler.TouchWithDate(v.ContainerName, locator, lastPut)
+func (v *testableAzureBlobVolume) TouchWithDate(locator string, lastBlockWrite time.Time) {
+	v.azHandler.TouchWithDate(v.ContainerName, locator, lastBlockWrite)
 }
 
-func (v *TestableAzureBlobVolume) Teardown() {
+func (v *testableAzureBlobVolume) Teardown() {
 	v.azStub.Close()
 }
 
-func (v *TestableAzureBlobVolume) ReadWriteOperationLabelValues() (r, w string) {
+func (v *testableAzureBlobVolume) ReadWriteOperationLabelValues() (r, w string) {
 	return "get", "create"
 }
 
diff --git a/services/keepstore/bufferpool.go b/services/keepstore/bufferpool.go
index b4cc5d38e1..811715b191 100644
--- a/services/keepstore/bufferpool.go
+++ b/services/keepstore/bufferpool.go
@@ -5,13 +5,17 @@
 package keepstore
 
 import (
+	"context"
 	"sync"
 	"sync/atomic"
 	"time"
 
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
 )
 
+var bufferPoolBlockSize = BlockSize // modified by tests
+
 type bufferPool struct {
 	log logrus.FieldLogger
 	// limiter has a "true" placeholder for each in-use buffer.
@@ -22,17 +26,67 @@ type bufferPool struct {
 	sync.Pool
 }
 
-func newBufferPool(log logrus.FieldLogger, count int, bufSize int) *bufferPool {
+func newBufferPool(log logrus.FieldLogger, count int, reg *prometheus.Registry) *bufferPool {
 	p := bufferPool{log: log}
 	p.Pool.New = func() interface{} {
-		atomic.AddUint64(&p.allocated, uint64(bufSize))
-		return make([]byte, bufSize)
+		atomic.AddUint64(&p.allocated, uint64(bufferPoolBlockSize))
+		return make([]byte, bufferPoolBlockSize)
 	}
 	p.limiter = make(chan bool, count)
+	if reg != nil {
+		reg.MustRegister(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace: "arvados",
+				Subsystem: "keepstore",
+				Name:      "bufferpool_allocated_bytes",
+				Help:      "Number of bytes allocated to buffers",
+			},
+			func() float64 { return float64(p.Alloc()) },
+		))
+		reg.MustRegister(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace: "arvados",
+				Subsystem: "keepstore",
+				Name:      "bufferpool_max_buffers",
+				Help:      "Maximum number of buffers allowed",
+			},
+			func() float64 { return float64(p.Cap()) },
+		))
+		reg.MustRegister(prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace: "arvados",
+				Subsystem: "keepstore",
+				Name:      "bufferpool_inuse_buffers",
+				Help:      "Number of buffers in use",
+			},
+			func() float64 { return float64(p.Len()) },
+		))
+	}
 	return &p
 }
 
-func (p *bufferPool) Get(size int) []byte {
+// GetContext gets a buffer from the pool -- but gives up and returns
+// ctx.Err() if ctx ends before a buffer is available.
+func (p *bufferPool) GetContext(ctx context.Context) ([]byte, error) {
+	bufReady := make(chan []byte)
+	go func() {
+		bufReady <- p.Get()
+	}()
+	select {
+	case buf := <-bufReady:
+		return buf, nil
+	case <-ctx.Done():
+		go func() {
+			// Even if closeNotifier happened first, we
+			// need to keep waiting for our buf so we can
+			// return it to the pool.
+			p.Put(<-bufReady)
+		}()
+		return nil, ctx.Err()
+	}
+}
+
+func (p *bufferPool) Get() []byte {
 	select {
 	case p.limiter <- true:
 	default:
@@ -42,14 +96,14 @@ func (p *bufferPool) Get(size int) []byte {
 		p.log.Printf("waited %v for a buffer", time.Since(t0))
 	}
 	buf := p.Pool.Get().([]byte)
-	if cap(buf) < size {
-		p.log.Fatalf("bufferPool Get(size=%d) but max=%d", size, cap(buf))
+	if len(buf) < bufferPoolBlockSize {
+		p.log.Fatalf("bufferPoolBlockSize=%d but cap(buf)=%d", bufferPoolBlockSize, len(buf))
 	}
-	return buf[:size]
+	return buf
 }
 
 func (p *bufferPool) Put(buf []byte) {
-	p.Pool.Put(buf)
+	p.Pool.Put(buf[:cap(buf)])
 	<-p.limiter
 }
 
diff --git a/services/keepstore/bufferpool_test.go b/services/keepstore/bufferpool_test.go
index 13e1cb4f33..8ecc833228 100644
--- a/services/keepstore/bufferpool_test.go
+++ b/services/keepstore/bufferpool_test.go
@@ -5,55 +5,54 @@
 package keepstore
 
 import (
-	"context"
 	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"github.com/prometheus/client_golang/prometheus"
 	. "gopkg.in/check.v1"
 )
 
 var _ = Suite(&BufferPoolSuite{})
 
+var bufferPoolTestSize = 10
+
 type BufferPoolSuite struct{}
 
-// Initialize a default-sized buffer pool for the benefit of test
-// suites that don't run main().
-func init() {
-	bufs = newBufferPool(ctxlog.FromContext(context.Background()), 12, BlockSize)
+func (s *BufferPoolSuite) SetUpTest(c *C) {
+	bufferPoolBlockSize = bufferPoolTestSize
 }
 
-// Restore sane default after bufferpool's own tests
 func (s *BufferPoolSuite) TearDownTest(c *C) {
-	bufs = newBufferPool(ctxlog.FromContext(context.Background()), 12, BlockSize)
+	bufferPoolBlockSize = BlockSize
 }
 
 func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
-	bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
-	b1 := bufs.Get(1)
-	bufs.Get(2)
+	bufs := newBufferPool(ctxlog.TestLogger(c), 2, prometheus.NewRegistry())
+	b1 := bufs.Get()
+	bufs.Get()
 	bufs.Put(b1)
-	b3 := bufs.Get(3)
-	c.Check(len(b3), Equals, 3)
+	b3 := bufs.Get()
+	c.Check(len(b3), Equals, bufferPoolTestSize)
 }
 
 func (s *BufferPoolSuite) TestBufferPoolUnderLimit(c *C) {
-	bufs := newBufferPool(ctxlog.TestLogger(c), 3, 10)
-	b1 := bufs.Get(10)
-	bufs.Get(10)
+	bufs := newBufferPool(ctxlog.TestLogger(c), 3, prometheus.NewRegistry())
+	b1 := bufs.Get()
+	bufs.Get()
 	testBufferPoolRace(c, bufs, b1, "Get")
 }
 
 func (s *BufferPoolSuite) TestBufferPoolAtLimit(c *C) {
-	bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
-	b1 := bufs.Get(10)
-	bufs.Get(10)
+	bufs := newBufferPool(ctxlog.TestLogger(c), 2, prometheus.NewRegistry())
+	b1 := bufs.Get()
+	bufs.Get()
 	testBufferPoolRace(c, bufs, b1, "Put")
 }
 
 func testBufferPoolRace(c *C, bufs *bufferPool, unused []byte, expectWin string) {
 	race := make(chan string)
 	go func() {
-		bufs.Get(10)
+		bufs.Get()
 		time.Sleep(time.Millisecond)
 		race <- "Get"
 	}()
@@ -68,9 +67,9 @@ func testBufferPoolRace(c *C, bufs *bufferPool, unused []byte, expectWin string)
 }
 
 func (s *BufferPoolSuite) TestBufferPoolReuse(c *C) {
-	bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
-	bufs.Get(10)
-	last := bufs.Get(10)
+	bufs := newBufferPool(ctxlog.TestLogger(c), 2, prometheus.NewRegistry())
+	bufs.Get()
+	last := bufs.Get()
 	// The buffer pool is allowed to throw away unused buffers
 	// (e.g., during sync.Pool's garbage collection hook, in the
 	// the current implementation). However, if unused buffers are
@@ -81,7 +80,7 @@ func (s *BufferPoolSuite) TestBufferPoolReuse(c *C) {
 	reuses := 0
 	for i := 0; i < allocs; i++ {
 		bufs.Put(last)
-		next := bufs.Get(10)
+		next := bufs.Get()
 		copy(last, []byte("last"))
 		copy(next, []byte("next"))
 		if last[0] == 'n' {
diff --git a/services/keepstore/collision.go b/services/keepstore/collision.go
deleted file mode 100644
index 16f2d09232..0000000000
--- a/services/keepstore/collision.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"bytes"
-	"context"
-	"crypto/md5"
-	"fmt"
-	"io"
-)
-
-// Compute the MD5 digest of a data block (consisting of buf1 + buf2 +
-// all bytes readable from rdr). If all data is read successfully,
-// return DiskHashError or CollisionError depending on whether it
-// matches expectMD5. If an error occurs while reading, return that
-// error.
-//
-// "content has expected MD5" is called a collision because this
-// function is used in cases where we have another block in hand with
-// the given MD5 but different content.
-func collisionOrCorrupt(expectMD5 string, buf1, buf2 []byte, rdr io.Reader) error {
-	outcome := make(chan error)
-	data := make(chan []byte, 1)
-	go func() {
-		h := md5.New()
-		for b := range data {
-			h.Write(b)
-		}
-		if fmt.Sprintf("%x", h.Sum(nil)) == expectMD5 {
-			outcome <- CollisionError
-		} else {
-			outcome <- DiskHashError
-		}
-	}()
-	data <- buf1
-	if buf2 != nil {
-		data <- buf2
-	}
-	var err error
-	for rdr != nil && err == nil {
-		buf := make([]byte, 1<<18)
-		var n int
-		n, err = rdr.Read(buf)
-		data <- buf[:n]
-	}
-	close(data)
-	if rdr != nil && err != io.EOF {
-		<-outcome
-		return err
-	}
-	return <-outcome
-}
-
-func compareReaderWithBuf(ctx context.Context, rdr io.Reader, expect []byte, hash string) error {
-	bufLen := 1 << 20
-	if bufLen > len(expect) && len(expect) > 0 {
-		// No need for bufLen to be longer than
-		// expect, except that len(buf)==0 would
-		// prevent us from handling empty readers the
-		// same way as non-empty readers: reading 0
-		// bytes at a time never reaches EOF.
-		bufLen = len(expect)
-	}
-	buf := make([]byte, bufLen)
-	cmp := expect
-
-	// Loop invariants: all data read so far matched what
-	// we expected, and the first N bytes of cmp are
-	// expected to equal the next N bytes read from
-	// rdr.
-	for {
-		ready := make(chan bool)
-		var n int
-		var err error
-		go func() {
-			n, err = rdr.Read(buf)
-			close(ready)
-		}()
-		select {
-		case <-ready:
-		case <-ctx.Done():
-			return ctx.Err()
-		}
-		if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
-			return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], buf[:n], rdr)
-		}
-		cmp = cmp[n:]
-		if err == io.EOF {
-			if len(cmp) != 0 {
-				return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], nil, nil)
-			}
-			return nil
-		} else if err != nil {
-			return err
-		}
-	}
-}
diff --git a/services/keepstore/collision_test.go b/services/keepstore/collision_test.go
deleted file mode 100644
index aa8f0cbaa1..0000000000
--- a/services/keepstore/collision_test.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"bytes"
-	"testing/iotest"
-
-	check "gopkg.in/check.v1"
-)
-
-var _ = check.Suite(&CollisionSuite{})
-
-type CollisionSuite struct{}
-
-func (s *CollisionSuite) TestCollisionOrCorrupt(c *check.C) {
-	fooMD5 := "acbd18db4cc2f85cedef654fccc4a4d8"
-
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f'}, []byte{'o'}, bytes.NewBufferString("o")),
-		check.Equals, CollisionError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f'}, nil, bytes.NewBufferString("oo")),
-		check.Equals, CollisionError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f'}, []byte{'o', 'o'}, nil),
-		check.Equals, CollisionError)
-	c.Check(collisionOrCorrupt(fooMD5, nil, []byte{}, bytes.NewBufferString("foo")),
-		check.Equals, CollisionError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'o', 'o'}, nil, bytes.NewBufferString("")),
-		check.Equals, CollisionError)
-	c.Check(collisionOrCorrupt(fooMD5, nil, nil, iotest.NewReadLogger("foo: ", iotest.DataErrReader(iotest.OneByteReader(bytes.NewBufferString("foo"))))),
-		check.Equals, CollisionError)
-
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'o', 'o'}, nil, bytes.NewBufferString("bar")),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'o'}, nil, nil),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{}, nil, bytes.NewBufferString("")),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'O'}, nil, bytes.NewBufferString("o")),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'O', 'o'}, nil, nil),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'o'}, []byte{'O'}, nil),
-		check.Equals, DiskHashError)
-	c.Check(collisionOrCorrupt(fooMD5, []byte{'f', 'o'}, nil, bytes.NewBufferString("O")),
-		check.Equals, DiskHashError)
-
-	c.Check(collisionOrCorrupt(fooMD5, []byte{}, nil, iotest.TimeoutReader(iotest.OneByteReader(bytes.NewBufferString("foo")))),
-		check.Equals, iotest.ErrTimeout)
-}
diff --git a/services/keepstore/command.go b/services/keepstore/command.go
index 48c8256a3c..d01b30c907 100644
--- a/services/keepstore/command.go
+++ b/services/keepstore/command.go
@@ -8,20 +8,13 @@ import (
 	"context"
 	"errors"
 	"flag"
-	"fmt"
 	"io"
-	"math/rand"
-	"net/http"
-	"os"
-	"sync"
 
 	"git.arvados.org/arvados.git/lib/cmd"
 	"git.arvados.org/arvados.git/lib/config"
 	"git.arvados.org/arvados.git/lib/service"
 	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/arvadosclient"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
 )
@@ -108,112 +101,17 @@ func convertKeepstoreFlagsToServiceFlags(prog string, args []string, lgr logrus.
 	return loader.MungeLegacyConfigArgs(lgr, args, "-legacy-keepstore-config"), true, 0
 }
 
-type handler struct {
-	http.Handler
-	Cluster *arvados.Cluster
-	Logger  logrus.FieldLogger
-
-	pullq      *WorkQueue
-	trashq     *WorkQueue
-	volmgr     *RRVolumeManager
-	keepClient *keepclient.KeepClient
-
-	err       error
-	setupOnce sync.Once
-}
-
-func (h *handler) CheckHealth() error {
-	return h.err
-}
-
-func (h *handler) Done() <-chan struct{} {
-	return nil
-}
-
 func newHandlerOrErrorHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
-	var h handler
 	serviceURL, ok := service.URLFromContext(ctx)
 	if !ok {
 		return service.ErrorHandler(ctx, cluster, errors.New("BUG: no URL from service.URLFromContext"))
 	}
-	err := h.setup(ctx, cluster, token, reg, serviceURL)
+	ks, err := newKeepstore(ctx, cluster, token, reg, serviceURL)
 	if err != nil {
 		return service.ErrorHandler(ctx, cluster, err)
 	}
-	return &h
-}
-
-func (h *handler) setup(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry, serviceURL arvados.URL) error {
-	h.Cluster = cluster
-	h.Logger = ctxlog.FromContext(ctx)
-	if h.Cluster.API.MaxKeepBlobBuffers <= 0 {
-		return fmt.Errorf("API.MaxKeepBlobBuffers must be greater than zero")
-	}
-	bufs = newBufferPool(h.Logger, h.Cluster.API.MaxKeepBlobBuffers, BlockSize)
-
-	if h.Cluster.API.MaxConcurrentRequests > 0 && h.Cluster.API.MaxConcurrentRequests < h.Cluster.API.MaxKeepBlobBuffers {
-		h.Logger.Warnf("Possible configuration mistake: not useful to set API.MaxKeepBlobBuffers (%d) higher than API.MaxConcurrentRequests (%d)", h.Cluster.API.MaxKeepBlobBuffers, h.Cluster.API.MaxConcurrentRequests)
-	}
-
-	if h.Cluster.Collections.BlobSigningKey != "" {
-	} else if h.Cluster.Collections.BlobSigning {
-		return errors.New("cannot enable Collections.BlobSigning with no Collections.BlobSigningKey")
-	} else {
-		h.Logger.Warn("Running without a blob signing key. Block locators returned by this server will not be signed, and will be rejected by a server that enforces permissions. To fix this, configure Collections.BlobSigning and Collections.BlobSigningKey.")
-	}
-
-	if len(h.Cluster.Volumes) == 0 {
-		return errors.New("no volumes configured")
-	}
-
-	h.Logger.Printf("keepstore %s starting, pid %d", cmd.Version.String(), os.Getpid())
-
-	// Start a round-robin VolumeManager with the configured volumes.
-	vm, err := makeRRVolumeManager(h.Logger, h.Cluster, serviceURL, newVolumeMetricsVecs(reg))
-	if err != nil {
-		return err
-	}
-	if len(vm.readables) == 0 {
-		return fmt.Errorf("no volumes configured for %s", serviceURL)
-	}
-	h.volmgr = vm
-
-	// Initialize the pullq and workers
-	h.pullq = NewWorkQueue()
-	for i := 0; i < 1 || i < h.Cluster.Collections.BlobReplicateConcurrency; i++ {
-		go h.runPullWorker(h.pullq)
-	}
-
-	// Initialize the trashq and workers
-	h.trashq = NewWorkQueue()
-	for i := 0; i < h.Cluster.Collections.BlobTrashConcurrency; i++ {
-		go RunTrashWorker(h.volmgr, h.Logger, h.Cluster, h.trashq)
-	}
-
-	// Set up routes and metrics
-	h.Handler = MakeRESTRouter(ctx, cluster, reg, vm, h.pullq, h.trashq)
-
-	// Initialize keepclient for pull workers
-	c, err := arvados.NewClientFromConfig(cluster)
-	if err != nil {
-		return err
-	}
-	ac, err := arvadosclient.New(c)
-	if err != nil {
-		return err
-	}
-	h.keepClient = &keepclient.KeepClient{
-		Arvados:       ac,
-		Want_replicas: 1,
-		DiskCacheSize: keepclient.DiskCacheDisabled,
-	}
-	h.keepClient.Arvados.ApiToken = fmt.Sprintf("%x", rand.Int63())
-
-	if d := h.Cluster.Collections.BlobTrashCheckInterval.Duration(); d > 0 &&
-		h.Cluster.Collections.BlobTrash &&
-		h.Cluster.Collections.BlobDeleteConcurrency > 0 {
-		go emptyTrash(h.volmgr.mounts, d)
-	}
-
-	return nil
+	puller := newPuller(ctx, ks, reg)
+	trasher := newTrasher(ctx, ks, reg)
+	_ = newTrashEmptier(ctx, ks, reg)
+	return newRouter(ks, puller, trasher)
 }
diff --git a/services/keepstore/count.go b/services/keepstore/count.go
index 700ca19dec..51434a803e 100644
--- a/services/keepstore/count.go
+++ b/services/keepstore/count.go
@@ -8,21 +8,21 @@ import (
 	"io"
 )
 
-func NewCountingWriter(w io.Writer, f func(uint64)) io.WriteCloser {
+func newCountingWriter(w io.Writer, f func(uint64)) io.WriteCloser {
 	return &countingReadWriter{
 		writer:  w,
 		counter: f,
 	}
 }
 
-func NewCountingReader(r io.Reader, f func(uint64)) io.ReadCloser {
+func newCountingReader(r io.Reader, f func(uint64)) io.ReadCloser {
 	return &countingReadWriter{
 		reader:  r,
 		counter: f,
 	}
 }
 
-func NewCountingReaderAtSeeker(r readerAtSeeker, f func(uint64)) *countingReaderAtSeeker {
+func newCountingReaderAtSeeker(r readerAtSeeker, f func(uint64)) *countingReaderAtSeeker {
 	return &countingReaderAtSeeker{readerAtSeeker: r, counter: f}
 }
 
diff --git a/services/keepstore/gocheck_test.go b/services/keepstore/gocheck_test.go
deleted file mode 100644
index 90076db5b2..0000000000
--- a/services/keepstore/gocheck_test.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"gopkg.in/check.v1"
-	"testing"
-)
-
-func TestGocheck(t *testing.T) {
-	check.TestingT(t)
-}
diff --git a/services/keepstore/handler_test.go b/services/keepstore/handler_test.go
deleted file mode 100644
index 5bdafb77c2..0000000000
--- a/services/keepstore/handler_test.go
+++ /dev/null
@@ -1,1405 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-// Tests for Keep HTTP handlers:
-//
-// - GetBlockHandler
-// - PutBlockHandler
-// - IndexHandler
-//
-// The HTTP handlers are responsible for enforcing permission policy,
-// so these tests must exercise all possible permission permutations.
-
-package keepstore
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"sort"
-	"strings"
-	"sync/atomic"
-	"time"
-
-	"git.arvados.org/arvados.git/lib/config"
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/arvadostest"
-	"git.arvados.org/arvados.git/sdk/go/ctxlog"
-	"github.com/prometheus/client_golang/prometheus"
-	check "gopkg.in/check.v1"
-)
-
-var testServiceURL = func() arvados.URL {
-	return arvados.URL{Host: "localhost:12345", Scheme: "http"}
-}()
-
-func testCluster(t TB) *arvados.Cluster {
-	cfg, err := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), ctxlog.TestLogger(t)).Load()
-	if err != nil {
-		t.Fatal(err)
-	}
-	cluster, err := cfg.GetCluster("")
-	if err != nil {
-		t.Fatal(err)
-	}
-	cluster.SystemRootToken = arvadostest.SystemRootToken
-	cluster.ManagementToken = arvadostest.ManagementToken
-	cluster.Collections.BlobSigning = false
-	return cluster
-}
-
-var _ = check.Suite(&HandlerSuite{})
-
-type HandlerSuite struct {
-	cluster *arvados.Cluster
-	handler *handler
-}
-
-func (s *HandlerSuite) SetUpTest(c *check.C) {
-	s.cluster = testCluster(c)
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "mock"},
-		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "mock"},
-	}
-	s.handler = &handler{}
-}
-
-// A RequestTester represents the parameters for an HTTP request to
-// be issued on behalf of a unit test.
-type RequestTester struct {
-	uri            string
-	apiToken       string
-	method         string
-	requestBody    []byte
-	storageClasses string
-}
-
-// Test GetBlockHandler on the following situations:
-//   - permissions off, unauthenticated request, unsigned locator
-//   - permissions on, authenticated request, signed locator
-//   - permissions on, authenticated request, unsigned locator
-//   - permissions on, unauthenticated request, signed locator
-//   - permissions on, authenticated request, expired locator
-//   - permissions on, authenticated request, signed locator, transient error from backend
-func (s *HandlerSuite) TestGetHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	vols := s.handler.volmgr.AllWritable()
-	err := vols[0].Put(context.Background(), TestHash, TestBlock)
-	c.Check(err, check.IsNil)
-
-	// Create locators for testing.
-	// Turn on permission settings so we can generate signed locators.
-	s.cluster.Collections.BlobSigning = true
-	s.cluster.Collections.BlobSigningKey = knownKey
-	s.cluster.Collections.BlobSigningTTL.Set("5m")
-
-	var (
-		unsignedLocator  = "/" + TestHash
-		validTimestamp   = time.Now().Add(s.cluster.Collections.BlobSigningTTL.Duration())
-		expiredTimestamp = time.Now().Add(-time.Hour)
-		signedLocator    = "/" + SignLocator(s.cluster, TestHash, knownToken, validTimestamp)
-		expiredLocator   = "/" + SignLocator(s.cluster, TestHash, knownToken, expiredTimestamp)
-	)
-
-	// -----------------
-	// Test unauthenticated request with permissions off.
-	s.cluster.Collections.BlobSigning = false
-
-	// Unauthenticated request, unsigned locator
-	// => OK
-	response := IssueRequest(s.handler,
-		&RequestTester{
-			method: "GET",
-			uri:    unsignedLocator,
-		})
-	ExpectStatusCode(c,
-		"Unauthenticated request, unsigned locator", http.StatusOK, response)
-	ExpectBody(c,
-		"Unauthenticated request, unsigned locator",
-		string(TestBlock),
-		response)
-
-	receivedLen := response.Header().Get("Content-Length")
-	expectedLen := fmt.Sprintf("%d", len(TestBlock))
-	if receivedLen != expectedLen {
-		c.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
-	}
-
-	// ----------------
-	// Permissions: on.
-	s.cluster.Collections.BlobSigning = true
-
-	// Authenticated request, signed locator
-	// => OK
-	response = IssueRequest(s.handler, &RequestTester{
-		method:   "GET",
-		uri:      signedLocator,
-		apiToken: knownToken,
-	})
-	ExpectStatusCode(c,
-		"Authenticated request, signed locator", http.StatusOK, response)
-	ExpectBody(c,
-		"Authenticated request, signed locator", string(TestBlock), response)
-
-	receivedLen = response.Header().Get("Content-Length")
-	expectedLen = fmt.Sprintf("%d", len(TestBlock))
-	if receivedLen != expectedLen {
-		c.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
-	}
-
-	// Authenticated request, unsigned locator
-	// => PermissionError
-	response = IssueRequest(s.handler, &RequestTester{
-		method:   "GET",
-		uri:      unsignedLocator,
-		apiToken: knownToken,
-	})
-	ExpectStatusCode(c, "unsigned locator", PermissionError.HTTPCode, response)
-
-	// Unauthenticated request, signed locator
-	// => PermissionError
-	response = IssueRequest(s.handler, &RequestTester{
-		method: "GET",
-		uri:    signedLocator,
-	})
-	ExpectStatusCode(c,
-		"Unauthenticated request, signed locator",
-		PermissionError.HTTPCode, response)
-
-	// Authenticated request, expired locator
-	// => ExpiredError
-	response = IssueRequest(s.handler, &RequestTester{
-		method:   "GET",
-		uri:      expiredLocator,
-		apiToken: knownToken,
-	})
-	ExpectStatusCode(c,
-		"Authenticated request, expired locator",
-		ExpiredError.HTTPCode, response)
-
-	// Authenticated request, signed locator
-	// => 503 Server busy (transient error)
-
-	// Set up the block owning volume to respond with errors
-	vols[0].Volume.(*MockVolume).Bad = true
-	vols[0].Volume.(*MockVolume).BadVolumeError = VolumeBusyError
-	response = IssueRequest(s.handler, &RequestTester{
-		method:   "GET",
-		uri:      signedLocator,
-		apiToken: knownToken,
-	})
-	// A transient error from one volume while the other doesn't find the block
-	// should make the service return a 503 so that clients can retry.
-	ExpectStatusCode(c,
-		"Volume backend busy",
-		503, response)
-}
-
-// Test PutBlockHandler on the following situations:
-//   - no server key
-//   - with server key, authenticated request, unsigned locator
-//   - with server key, unauthenticated request, unsigned locator
-func (s *HandlerSuite) TestPutHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	// --------------
-	// No server key.
-
-	s.cluster.Collections.BlobSigningKey = ""
-
-	// Unauthenticated request, no server key
-	// => OK (unsigned response)
-	unsignedLocator := "/" + TestHash
-	response := IssueRequest(s.handler,
-		&RequestTester{
-			method:      "PUT",
-			uri:         unsignedLocator,
-			requestBody: TestBlock,
-		})
-
-	ExpectStatusCode(c,
-		"Unauthenticated request, no server key", http.StatusOK, response)
-	ExpectBody(c,
-		"Unauthenticated request, no server key",
-		TestHashPutResp, response)
-
-	// ------------------
-	// With a server key.
-
-	s.cluster.Collections.BlobSigningKey = knownKey
-	s.cluster.Collections.BlobSigningTTL.Set("5m")
-
-	// When a permission key is available, the locator returned
-	// from an authenticated PUT request will be signed.
-
-	// Authenticated PUT, signed locator
-	// => OK (signed response)
-	response = IssueRequest(s.handler,
-		&RequestTester{
-			method:      "PUT",
-			uri:         unsignedLocator,
-			requestBody: TestBlock,
-			apiToken:    knownToken,
-		})
-
-	ExpectStatusCode(c,
-		"Authenticated PUT, signed locator, with server key",
-		http.StatusOK, response)
-	responseLocator := strings.TrimSpace(response.Body.String())
-	if VerifySignature(s.cluster, responseLocator, knownToken) != nil {
-		c.Errorf("Authenticated PUT, signed locator, with server key:\n"+
-			"response '%s' does not contain a valid signature",
-			responseLocator)
-	}
-
-	// Unauthenticated PUT, unsigned locator
-	// => OK
-	response = IssueRequest(s.handler,
-		&RequestTester{
-			method:      "PUT",
-			uri:         unsignedLocator,
-			requestBody: TestBlock,
-		})
-
-	ExpectStatusCode(c,
-		"Unauthenticated PUT, unsigned locator, with server key",
-		http.StatusOK, response)
-	ExpectBody(c,
-		"Unauthenticated PUT, unsigned locator, with server key",
-		TestHashPutResp, response)
-}
-
-func (s *HandlerSuite) TestPutAndDeleteSkipReadonlyVolumes(c *check.C) {
-	s.cluster.Volumes["zzzzz-nyw5e-000000000000000"] = arvados.Volume{Driver: "mock", ReadOnly: true}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	s.cluster.SystemRootToken = "fake-data-manager-token"
-	IssueRequest(s.handler,
-		&RequestTester{
-			method:      "PUT",
-			uri:         "/" + TestHash,
-			requestBody: TestBlock,
-		})
-
-	s.cluster.Collections.BlobTrash = true
-	IssueRequest(s.handler,
-		&RequestTester{
-			method:      "DELETE",
-			uri:         "/" + TestHash,
-			requestBody: TestBlock,
-			apiToken:    s.cluster.SystemRootToken,
-		})
-	type expect struct {
-		volid     string
-		method    string
-		callcount int
-	}
-	for _, e := range []expect{
-		{"zzzzz-nyw5e-000000000000000", "Get", 0},
-		{"zzzzz-nyw5e-000000000000000", "Compare", 0},
-		{"zzzzz-nyw5e-000000000000000", "Touch", 0},
-		{"zzzzz-nyw5e-000000000000000", "Put", 0},
-		{"zzzzz-nyw5e-000000000000000", "Delete", 0},
-		{"zzzzz-nyw5e-111111111111111", "Get", 0},
-		{"zzzzz-nyw5e-111111111111111", "Compare", 1},
-		{"zzzzz-nyw5e-111111111111111", "Touch", 1},
-		{"zzzzz-nyw5e-111111111111111", "Put", 1},
-		{"zzzzz-nyw5e-111111111111111", "Delete", 1},
-	} {
-		if calls := s.handler.volmgr.mountMap[e.volid].Volume.(*MockVolume).CallCount(e.method); calls != e.callcount {
-			c.Errorf("Got %d %s() on vol %s, expect %d", calls, e.method, e.volid, e.callcount)
-		}
-	}
-}
-
-func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) {
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-111111111111111": {
-			Driver:         "mock",
-			Replication:    1,
-			StorageClasses: map[string]bool{"class1": true}},
-		"zzzzz-nyw5e-222222222222222": {
-			Driver:         "mock",
-			Replication:    1,
-			StorageClasses: map[string]bool{"class2": true, "class3": true}},
-	}
-
-	for _, trial := range []struct {
-		priority1 int // priority of class1, thus vol1
-		priority2 int // priority of class2
-		priority3 int // priority of class3 (vol2 priority will be max(priority2, priority3))
-		get1      int // expected number of "get" ops on vol1
-		get2      int // expected number of "get" ops on vol2
-	}{
-		{100, 50, 50, 1, 0},   // class1 has higher priority => try vol1 first, no need to try vol2
-		{100, 100, 100, 1, 0}, // same priority, vol1 is first lexicographically => try vol1 first and succeed
-		{66, 99, 33, 1, 1},    // class2 has higher priority => try vol2 first, then try vol1
-		{66, 33, 99, 1, 1},    // class3 has highest priority => vol2 has highest => try vol2 first, then try vol1
-	} {
-		c.Logf("%+v", trial)
-		s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
-			"class1": {Priority: trial.priority1},
-			"class2": {Priority: trial.priority2},
-			"class3": {Priority: trial.priority3},
-		}
-		c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-		IssueRequest(s.handler,
-			&RequestTester{
-				method:         "PUT",
-				uri:            "/" + TestHash,
-				requestBody:    TestBlock,
-				storageClasses: "class1",
-			})
-		IssueRequest(s.handler,
-			&RequestTester{
-				method: "GET",
-				uri:    "/" + TestHash,
-			})
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Get"), check.Equals, trial.get1)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Get"), check.Equals, trial.get2)
-	}
-}
-
-func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) {
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-111111111111111": {
-			Driver:         "mock",
-			Replication:    1,
-			ReadOnly:       true,
-			StorageClasses: map[string]bool{"class1": true}},
-	}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	resp := IssueRequest(s.handler,
-		&RequestTester{
-			method:         "PUT",
-			uri:            "/" + TestHash,
-			requestBody:    TestBlock,
-			storageClasses: "class1",
-		})
-	c.Check(resp.Code, check.Equals, FullError.HTTPCode)
-	c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0)
-}
-
-func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) {
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-111111111111111": {
-			Driver:         "mock",
-			Replication:    1,
-			StorageClasses: map[string]bool{"class1": true}},
-		"zzzzz-nyw5e-121212121212121": {
-			Driver:         "mock",
-			Replication:    1,
-			StorageClasses: map[string]bool{"class1": true, "class2": true}},
-		"zzzzz-nyw5e-222222222222222": {
-			Driver:         "mock",
-			Replication:    1,
-			StorageClasses: map[string]bool{"class2": true}},
-	}
-
-	for _, trial := range []struct {
-		setCounter uint32 // value to stuff vm.counter, to control offset
-		classes    string // desired classes
-		put111     int    // expected number of "put" ops on 11111... after 2x put reqs
-		put121     int    // expected number of "put" ops on 12121...
-		put222     int    // expected number of "put" ops on 22222...
-		cmp111     int    // expected number of "compare" ops on 11111... after 2x put reqs
-		cmp121     int    // expected number of "compare" ops on 12121...
-		cmp222     int    // expected number of "compare" ops on 22222...
-	}{
-		{0, "class1",
-			1, 0, 0,
-			2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121
-		{0, "class2",
-			0, 1, 0,
-			0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121
-		{0, "class1,class2",
-			1, 1, 0,
-			2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
-		{1, "class1,class2",
-			0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121
-			2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
-		{0, "class1,class2,class404",
-			1, 1, 0,
-			2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121
-	} {
-		c.Logf("%+v", trial)
-		s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
-			"class1": {},
-			"class2": {},
-			"class3": {},
-		}
-		c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-		atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter)
-		for i := 0; i < 2; i++ {
-			IssueRequest(s.handler,
-				&RequestTester{
-					method:         "PUT",
-					uri:            "/" + TestHash,
-					requestBody:    TestBlock,
-					storageClasses: trial.classes,
-				})
-		}
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121)
-		c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222)
-	}
-}
-
-// Test TOUCH requests.
-func (s *HandlerSuite) TestTouchHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	vols := s.handler.volmgr.AllWritable()
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-	vols[0].Volume.(*MockVolume).TouchWithDate(TestHash, time.Now().Add(-time.Hour))
-	afterPut := time.Now()
-	t, err := vols[0].Mtime(TestHash)
-	c.Assert(err, check.IsNil)
-	c.Assert(t.Before(afterPut), check.Equals, true)
-
-	ExpectStatusCode(c,
-		"touch with no credentials",
-		http.StatusUnauthorized,
-		IssueRequest(s.handler, &RequestTester{
-			method: "TOUCH",
-			uri:    "/" + TestHash,
-		}))
-
-	ExpectStatusCode(c,
-		"touch with non-root credentials",
-		http.StatusUnauthorized,
-		IssueRequest(s.handler, &RequestTester{
-			method:   "TOUCH",
-			uri:      "/" + TestHash,
-			apiToken: arvadostest.ActiveTokenV2,
-		}))
-
-	ExpectStatusCode(c,
-		"touch non-existent block",
-		http.StatusNotFound,
-		IssueRequest(s.handler, &RequestTester{
-			method:   "TOUCH",
-			uri:      "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-			apiToken: s.cluster.SystemRootToken,
-		}))
-
-	beforeTouch := time.Now()
-	ExpectStatusCode(c,
-		"touch block",
-		http.StatusOK,
-		IssueRequest(s.handler, &RequestTester{
-			method:   "TOUCH",
-			uri:      "/" + TestHash,
-			apiToken: s.cluster.SystemRootToken,
-		}))
-	t, err = vols[0].Mtime(TestHash)
-	c.Assert(err, check.IsNil)
-	c.Assert(t.After(beforeTouch), check.Equals, true)
-}
-
-// Test /index requests:
-//   - unauthenticated /index request
-//   - unauthenticated /index/prefix request
-//   - authenticated   /index request        | non-superuser
-//   - authenticated   /index/prefix request | non-superuser
-//   - authenticated   /index request        | superuser
-//   - authenticated   /index/prefix request | superuser
-//
-// The only /index requests that should succeed are those issued by the
-// superuser. They should pass regardless of the value of BlobSigning.
-func (s *HandlerSuite) TestIndexHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	// Include multiple blocks on different volumes, and
-	// some metadata files (which should be omitted from index listings)
-	vols := s.handler.volmgr.AllWritable()
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-	vols[1].Put(context.Background(), TestHash2, TestBlock2)
-	vols[0].Put(context.Background(), TestHash+".meta", []byte("metadata"))
-	vols[1].Put(context.Background(), TestHash2+".meta", []byte("metadata"))
-
-	s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
-
-	unauthenticatedReq := &RequestTester{
-		method: "GET",
-		uri:    "/index",
-	}
-	authenticatedReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index",
-		apiToken: knownToken,
-	}
-	superuserReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index",
-		apiToken: s.cluster.SystemRootToken,
-	}
-	unauthPrefixReq := &RequestTester{
-		method: "GET",
-		uri:    "/index/" + TestHash[0:3],
-	}
-	authPrefixReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index/" + TestHash[0:3],
-		apiToken: knownToken,
-	}
-	superuserPrefixReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index/" + TestHash[0:3],
-		apiToken: s.cluster.SystemRootToken,
-	}
-	superuserNoSuchPrefixReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index/abcd",
-		apiToken: s.cluster.SystemRootToken,
-	}
-	superuserInvalidPrefixReq := &RequestTester{
-		method:   "GET",
-		uri:      "/index/xyz",
-		apiToken: s.cluster.SystemRootToken,
-	}
-
-	// -------------------------------------------------------------
-	// Only the superuser should be allowed to issue /index requests.
-
-	// ---------------------------
-	// BlobSigning enabled
-	// This setting should not affect tests passing.
-	s.cluster.Collections.BlobSigning = true
-
-	// unauthenticated /index request
-	// => UnauthorizedError
-	response := IssueRequest(s.handler, unauthenticatedReq)
-	ExpectStatusCode(c,
-		"permissions on, unauthenticated request",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// unauthenticated /index/prefix request
-	// => UnauthorizedError
-	response = IssueRequest(s.handler, unauthPrefixReq)
-	ExpectStatusCode(c,
-		"permissions on, unauthenticated /index/prefix request",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// authenticated /index request, non-superuser
-	// => UnauthorizedError
-	response = IssueRequest(s.handler, authenticatedReq)
-	ExpectStatusCode(c,
-		"permissions on, authenticated request, non-superuser",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// authenticated /index/prefix request, non-superuser
-	// => UnauthorizedError
-	response = IssueRequest(s.handler, authPrefixReq)
-	ExpectStatusCode(c,
-		"permissions on, authenticated /index/prefix request, non-superuser",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// superuser /index request
-	// => OK
-	response = IssueRequest(s.handler, superuserReq)
-	ExpectStatusCode(c,
-		"permissions on, superuser request",
-		http.StatusOK,
-		response)
-
-	// ----------------------------
-	// BlobSigning disabled
-	// Valid Request should still pass.
-	s.cluster.Collections.BlobSigning = false
-
-	// superuser /index request
-	// => OK
-	response = IssueRequest(s.handler, superuserReq)
-	ExpectStatusCode(c,
-		"permissions on, superuser request",
-		http.StatusOK,
-		response)
-
-	expected := `^` + TestHash + `\+\d+ \d+\n` +
-		TestHash2 + `\+\d+ \d+\n\n$`
-	c.Check(response.Body.String(), check.Matches, expected, check.Commentf(
-		"permissions on, superuser request"))
-
-	// superuser /index/prefix request
-	// => OK
-	response = IssueRequest(s.handler, superuserPrefixReq)
-	ExpectStatusCode(c,
-		"permissions on, superuser request",
-		http.StatusOK,
-		response)
-
-	expected = `^` + TestHash + `\+\d+ \d+\n\n$`
-	c.Check(response.Body.String(), check.Matches, expected, check.Commentf(
-		"permissions on, superuser /index/prefix request"))
-
-	// superuser /index/{no-such-prefix} request
-	// => OK
-	response = IssueRequest(s.handler, superuserNoSuchPrefixReq)
-	ExpectStatusCode(c,
-		"permissions on, superuser request",
-		http.StatusOK,
-		response)
-
-	if "\n" != response.Body.String() {
-		c.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
-	}
-
-	// superuser /index/{invalid-prefix} request
-	// => StatusBadRequest
-	response = IssueRequest(s.handler, superuserInvalidPrefixReq)
-	ExpectStatusCode(c,
-		"permissions on, superuser request",
-		http.StatusBadRequest,
-		response)
-}
-
-// TestDeleteHandler
-//
-// Cases tested:
-//
-//	With no token and with a non-data-manager token:
-//	* Delete existing block
-//	  (test for 403 Forbidden, confirm block not deleted)
-//
-//	With data manager token:
-//
-//	* Delete existing block
-//	  (test for 200 OK, response counts, confirm block deleted)
-//
-//	* Delete nonexistent block
-//	  (test for 200 OK, response counts)
-//
-//	TODO(twp):
-//
-//	* Delete block on read-only and read-write volume
-//	  (test for 200 OK, response with copies_deleted=1,
-//	  copies_failed=1, confirm block deleted only on r/w volume)
-//
-//	* Delete block on read-only volume only
-//	  (test for 200 OK, response with copies_deleted=0, copies_failed=1,
-//	  confirm block not deleted)
-func (s *HandlerSuite) TestDeleteHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	vols := s.handler.volmgr.AllWritable()
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-
-	// Explicitly set the BlobSigningTTL to 0 for these
-	// tests, to ensure the MockVolume deletes the blocks
-	// even though they have just been created.
-	s.cluster.Collections.BlobSigningTTL = arvados.Duration(0)
-
-	var userToken = "NOT DATA MANAGER TOKEN"
-	s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
-
-	s.cluster.Collections.BlobTrash = true
-
-	unauthReq := &RequestTester{
-		method: "DELETE",
-		uri:    "/" + TestHash,
-	}
-
-	userReq := &RequestTester{
-		method:   "DELETE",
-		uri:      "/" + TestHash,
-		apiToken: userToken,
-	}
-
-	superuserExistingBlockReq := &RequestTester{
-		method:   "DELETE",
-		uri:      "/" + TestHash,
-		apiToken: s.cluster.SystemRootToken,
-	}
-
-	superuserNonexistentBlockReq := &RequestTester{
-		method:   "DELETE",
-		uri:      "/" + TestHash2,
-		apiToken: s.cluster.SystemRootToken,
-	}
-
-	// Unauthenticated request returns PermissionError.
-	var response *httptest.ResponseRecorder
-	response = IssueRequest(s.handler, unauthReq)
-	ExpectStatusCode(c,
-		"unauthenticated request",
-		PermissionError.HTTPCode,
-		response)
-
-	// Authenticated non-admin request returns PermissionError.
-	response = IssueRequest(s.handler, userReq)
-	ExpectStatusCode(c,
-		"authenticated non-admin request",
-		PermissionError.HTTPCode,
-		response)
-
-	// Authenticated admin request for nonexistent block.
-	type deletecounter struct {
-		Deleted int `json:"copies_deleted"`
-		Failed  int `json:"copies_failed"`
-	}
-	var responseDc, expectedDc deletecounter
-
-	response = IssueRequest(s.handler, superuserNonexistentBlockReq)
-	ExpectStatusCode(c,
-		"data manager request, nonexistent block",
-		http.StatusNotFound,
-		response)
-
-	// Authenticated admin request for existing block while BlobTrash is false.
-	s.cluster.Collections.BlobTrash = false
-	response = IssueRequest(s.handler, superuserExistingBlockReq)
-	ExpectStatusCode(c,
-		"authenticated request, existing block, method disabled",
-		MethodDisabledError.HTTPCode,
-		response)
-	s.cluster.Collections.BlobTrash = true
-
-	// Authenticated admin request for existing block.
-	response = IssueRequest(s.handler, superuserExistingBlockReq)
-	ExpectStatusCode(c,
-		"data manager request, existing block",
-		http.StatusOK,
-		response)
-	// Expect response {"copies_deleted":1,"copies_failed":0}
-	expectedDc = deletecounter{1, 0}
-	json.NewDecoder(response.Body).Decode(&responseDc)
-	if responseDc != expectedDc {
-		c.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
-			expectedDc, responseDc)
-	}
-	// Confirm the block has been deleted
-	buf := make([]byte, BlockSize)
-	_, err := vols[0].Get(context.Background(), TestHash, buf)
-	var blockDeleted = os.IsNotExist(err)
-	if !blockDeleted {
-		c.Error("superuserExistingBlockReq: block not deleted")
-	}
-
-	// A DELETE request on a block newer than BlobSigningTTL
-	// should return success but leave the block on the volume.
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-	s.cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour)
-
-	response = IssueRequest(s.handler, superuserExistingBlockReq)
-	ExpectStatusCode(c,
-		"data manager request, existing block",
-		http.StatusOK,
-		response)
-	// Expect response {"copies_deleted":1,"copies_failed":0}
-	expectedDc = deletecounter{1, 0}
-	json.NewDecoder(response.Body).Decode(&responseDc)
-	if responseDc != expectedDc {
-		c.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
-			expectedDc, responseDc)
-	}
-	// Confirm the block has NOT been deleted.
-	_, err = vols[0].Get(context.Background(), TestHash, buf)
-	if err != nil {
-		c.Errorf("testing delete on new block: %s\n", err)
-	}
-}
-
-// TestPullHandler
-//
-// Test handling of the PUT /pull statement.
-//
-// Cases tested: syntactically valid and invalid pull lists, from the
-// data manager and from unprivileged users:
-//
-//  1. Valid pull list from an ordinary user
-//     (expected result: 401 Unauthorized)
-//
-//  2. Invalid pull request from an ordinary user
-//     (expected result: 401 Unauthorized)
-//
-//  3. Valid pull request from the data manager
-//     (expected result: 200 OK with request body "Received 3 pull
-//     requests"
-//
-//  4. Invalid pull request from the data manager
-//     (expected result: 400 Bad Request)
-//
-// Test that in the end, the pull manager received a good pull list with
-// the expected number of requests.
-//
-// TODO(twp): test concurrency: launch 100 goroutines to update the
-// pull list simultaneously.  Make sure that none of them return 400
-// Bad Request and that pullq.GetList() returns a valid list.
-func (s *HandlerSuite) TestPullHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	// Replace the router's pullq -- which the worker goroutines
-	// started by setup() are now receiving from -- with a new
-	// one, so we can see what the handler sends to it.
-	pullq := NewWorkQueue()
-	s.handler.Handler.(*router).pullq = pullq
-
-	var userToken = "USER TOKEN"
-	s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
-
-	goodJSON := []byte(`[
-		{
-			"locator":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+12345",
-			"servers":[
-				"http://server1",
-				"http://server2"
-		 	]
-		},
-		{
-			"locator":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+12345",
-			"servers":[]
-		},
-		{
-			"locator":"cccccccccccccccccccccccccccccccc+12345",
-			"servers":["http://server1"]
-		}
-	]`)
-
-	badJSON := []byte(`{ "key":"I'm a little teapot" }`)
-
-	type pullTest struct {
-		name         string
-		req          RequestTester
-		responseCode int
-		responseBody string
-	}
-	var testcases = []pullTest{
-		{
-			"Valid pull list from an ordinary user",
-			RequestTester{"/pull", userToken, "PUT", goodJSON, ""},
-			http.StatusUnauthorized,
-			"Unauthorized\n",
-		},
-		{
-			"Invalid pull request from an ordinary user",
-			RequestTester{"/pull", userToken, "PUT", badJSON, ""},
-			http.StatusUnauthorized,
-			"Unauthorized\n",
-		},
-		{
-			"Valid pull request from the data manager",
-			RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", goodJSON, ""},
-			http.StatusOK,
-			"Received 3 pull requests\n",
-		},
-		{
-			"Invalid pull request from the data manager",
-			RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", badJSON, ""},
-			http.StatusBadRequest,
-			"",
-		},
-	}
-
-	for _, tst := range testcases {
-		response := IssueRequest(s.handler, &tst.req)
-		ExpectStatusCode(c, tst.name, tst.responseCode, response)
-		ExpectBody(c, tst.name, tst.responseBody, response)
-	}
-
-	// The Keep pull manager should have received one good list with 3
-	// requests on it.
-	for i := 0; i < 3; i++ {
-		var item interface{}
-		select {
-		case item = <-pullq.NextItem:
-		case <-time.After(time.Second):
-			c.Error("timed out")
-		}
-		if _, ok := item.(PullRequest); !ok {
-			c.Errorf("item %v could not be parsed as a PullRequest", item)
-		}
-	}
-
-	expectChannelEmpty(c, pullq.NextItem)
-}
-
-// TestTrashHandler
-//
-// Test cases:
-//
-// Cases tested: syntactically valid and invalid trash lists, from the
-// data manager and from unprivileged users:
-//
-//  1. Valid trash list from an ordinary user
-//     (expected result: 401 Unauthorized)
-//
-//  2. Invalid trash list from an ordinary user
-//     (expected result: 401 Unauthorized)
-//
-//  3. Valid trash list from the data manager
-//     (expected result: 200 OK with request body "Received 3 trash
-//     requests"
-//
-//  4. Invalid trash list from the data manager
-//     (expected result: 400 Bad Request)
-//
-// Test that in the end, the trash collector received a good list
-// trash list with the expected number of requests.
-//
-// TODO(twp): test concurrency: launch 100 goroutines to update the
-// pull list simultaneously.  Make sure that none of them return 400
-// Bad Request and that replica.Dump() returns a valid list.
-func (s *HandlerSuite) TestTrashHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	// Replace the router's trashq -- which the worker goroutines
-	// started by setup() are now receiving from -- with a new
-	// one, so we can see what the handler sends to it.
-	trashq := NewWorkQueue()
-	s.handler.Handler.(*router).trashq = trashq
-
-	var userToken = "USER TOKEN"
-	s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
-
-	goodJSON := []byte(`[
-		{
-			"locator":"block1",
-			"block_mtime":1409082153
-		},
-		{
-			"locator":"block2",
-			"block_mtime":1409082153
-		},
-		{
-			"locator":"block3",
-			"block_mtime":1409082153
-		}
-	]`)
-
-	badJSON := []byte(`I am not a valid JSON string`)
-
-	type trashTest struct {
-		name         string
-		req          RequestTester
-		responseCode int
-		responseBody string
-	}
-
-	var testcases = []trashTest{
-		{
-			"Valid trash list from an ordinary user",
-			RequestTester{"/trash", userToken, "PUT", goodJSON, ""},
-			http.StatusUnauthorized,
-			"Unauthorized\n",
-		},
-		{
-			"Invalid trash list from an ordinary user",
-			RequestTester{"/trash", userToken, "PUT", badJSON, ""},
-			http.StatusUnauthorized,
-			"Unauthorized\n",
-		},
-		{
-			"Valid trash list from the data manager",
-			RequestTester{"/trash", s.cluster.SystemRootToken, "PUT", goodJSON, ""},
-			http.StatusOK,
-			"Received 3 trash requests\n",
-		},
-		{
-			"Invalid trash list from the data manager",
-			RequestTester{"/trash", s.cluster.SystemRootToken, "PUT", badJSON, ""},
-			http.StatusBadRequest,
-			"",
-		},
-	}
-
-	for _, tst := range testcases {
-		response := IssueRequest(s.handler, &tst.req)
-		ExpectStatusCode(c, tst.name, tst.responseCode, response)
-		ExpectBody(c, tst.name, tst.responseBody, response)
-	}
-
-	// The trash collector should have received one good list with 3
-	// requests on it.
-	for i := 0; i < 3; i++ {
-		item := <-trashq.NextItem
-		if _, ok := item.(TrashRequest); !ok {
-			c.Errorf("item %v could not be parsed as a TrashRequest", item)
-		}
-	}
-
-	expectChannelEmpty(c, trashq.NextItem)
-}
-
-// ====================
-// Helper functions
-// ====================
-
-// IssueTestRequest executes an HTTP request described by rt, to a
-// REST router.  It returns the HTTP response to the request.
-func IssueRequest(handler http.Handler, rt *RequestTester) *httptest.ResponseRecorder {
-	response := httptest.NewRecorder()
-	body := bytes.NewReader(rt.requestBody)
-	req, _ := http.NewRequest(rt.method, rt.uri, body)
-	if rt.apiToken != "" {
-		req.Header.Set("Authorization", "OAuth2 "+rt.apiToken)
-	}
-	if rt.storageClasses != "" {
-		req.Header.Set("X-Keep-Storage-Classes", rt.storageClasses)
-	}
-	handler.ServeHTTP(response, req)
-	return response
-}
-
-func IssueHealthCheckRequest(handler http.Handler, rt *RequestTester) *httptest.ResponseRecorder {
-	response := httptest.NewRecorder()
-	body := bytes.NewReader(rt.requestBody)
-	req, _ := http.NewRequest(rt.method, rt.uri, body)
-	if rt.apiToken != "" {
-		req.Header.Set("Authorization", "Bearer "+rt.apiToken)
-	}
-	handler.ServeHTTP(response, req)
-	return response
-}
-
-// ExpectStatusCode checks whether a response has the specified status code,
-// and reports a test failure if not.
-func ExpectStatusCode(
-	c *check.C,
-	testname string,
-	expectedStatus int,
-	response *httptest.ResponseRecorder) {
-	c.Check(response.Code, check.Equals, expectedStatus, check.Commentf("%s", testname))
-}
-
-func ExpectBody(
-	c *check.C,
-	testname string,
-	expectedBody string,
-	response *httptest.ResponseRecorder) {
-	if expectedBody != "" && response.Body.String() != expectedBody {
-		c.Errorf("%s: expected response body '%s', got %+v",
-			testname, expectedBody, response)
-	}
-}
-
-// See #7121
-func (s *HandlerSuite) TestPutNeedsOnlyOneBuffer(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	defer func(orig *bufferPool) {
-		bufs = orig
-	}(bufs)
-	bufs = newBufferPool(ctxlog.TestLogger(c), 1, BlockSize)
-
-	ok := make(chan struct{})
-	go func() {
-		for i := 0; i < 2; i++ {
-			response := IssueRequest(s.handler,
-				&RequestTester{
-					method:      "PUT",
-					uri:         "/" + TestHash,
-					requestBody: TestBlock,
-				})
-			ExpectStatusCode(c,
-				"TestPutNeedsOnlyOneBuffer", http.StatusOK, response)
-		}
-		ok <- struct{}{}
-	}()
-
-	select {
-	case <-ok:
-	case <-time.After(time.Second):
-		c.Fatal("PUT deadlocks with MaxKeepBlobBuffers==1")
-	}
-}
-
-// Invoke the PutBlockHandler a bunch of times to test for bufferpool resource
-// leak.
-func (s *HandlerSuite) TestPutHandlerNoBufferleak(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	ok := make(chan bool)
-	go func() {
-		for i := 0; i < s.cluster.API.MaxKeepBlobBuffers+1; i++ {
-			// Unauthenticated request, no server key
-			// => OK (unsigned response)
-			unsignedLocator := "/" + TestHash
-			response := IssueRequest(s.handler,
-				&RequestTester{
-					method:      "PUT",
-					uri:         unsignedLocator,
-					requestBody: TestBlock,
-				})
-			ExpectStatusCode(c,
-				"TestPutHandlerBufferleak", http.StatusOK, response)
-			ExpectBody(c,
-				"TestPutHandlerBufferleak",
-				TestHashPutResp, response)
-		}
-		ok <- true
-	}()
-	select {
-	case <-time.After(20 * time.Second):
-		// If the buffer pool leaks, the test goroutine hangs.
-		c.Fatal("test did not finish, assuming pool leaked")
-	case <-ok:
-	}
-}
-
-func (s *HandlerSuite) TestGetHandlerClientDisconnect(c *check.C) {
-	s.cluster.Collections.BlobSigning = false
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	defer func(orig *bufferPool) {
-		bufs = orig
-	}(bufs)
-	bufs = newBufferPool(ctxlog.TestLogger(c), 1, BlockSize)
-	defer bufs.Put(bufs.Get(BlockSize))
-
-	err := s.handler.volmgr.AllWritable()[0].Put(context.Background(), TestHash, TestBlock)
-	c.Assert(err, check.IsNil)
-
-	resp := httptest.NewRecorder()
-	ok := make(chan struct{})
-	go func() {
-		ctx, cancel := context.WithCancel(context.Background())
-		req, _ := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("/%s+%d", TestHash, len(TestBlock)), nil)
-		cancel()
-		s.handler.ServeHTTP(resp, req)
-		ok <- struct{}{}
-	}()
-
-	select {
-	case <-time.After(20 * time.Second):
-		c.Fatal("request took >20s, close notifier must be broken")
-	case <-ok:
-	}
-
-	ExpectStatusCode(c, "client disconnect", http.StatusServiceUnavailable, resp)
-	for i, v := range s.handler.volmgr.AllWritable() {
-		if calls := v.Volume.(*MockVolume).called["GET"]; calls != 0 {
-			c.Errorf("volume %d got %d calls, expected 0", i, calls)
-		}
-	}
-}
-
-// Invoke the GetBlockHandler a bunch of times to test for bufferpool resource
-// leak.
-func (s *HandlerSuite) TestGetHandlerNoBufferLeak(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	vols := s.handler.volmgr.AllWritable()
-	if err := vols[0].Put(context.Background(), TestHash, TestBlock); err != nil {
-		c.Error(err)
-	}
-
-	ok := make(chan bool)
-	go func() {
-		for i := 0; i < s.cluster.API.MaxKeepBlobBuffers+1; i++ {
-			// Unauthenticated request, unsigned locator
-			// => OK
-			unsignedLocator := "/" + TestHash
-			response := IssueRequest(s.handler,
-				&RequestTester{
-					method: "GET",
-					uri:    unsignedLocator,
-				})
-			ExpectStatusCode(c,
-				"Unauthenticated request, unsigned locator", http.StatusOK, response)
-			ExpectBody(c,
-				"Unauthenticated request, unsigned locator",
-				string(TestBlock),
-				response)
-		}
-		ok <- true
-	}()
-	select {
-	case <-time.After(20 * time.Second):
-		// If the buffer pool leaks, the test goroutine hangs.
-		c.Fatal("test did not finish, assuming pool leaked")
-	case <-ok:
-	}
-}
-
-func (s *HandlerSuite) TestPutStorageClasses(c *check.C) {
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "mock"}, // "default" is implicit
-		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "mock", StorageClasses: map[string]bool{"special": true, "extra": true}},
-		"zzzzz-nyw5e-222222222222222": {Replication: 1, Driver: "mock", StorageClasses: map[string]bool{"readonly": true}, ReadOnly: true},
-	}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	rt := RequestTester{
-		method:      "PUT",
-		uri:         "/" + TestHash,
-		requestBody: TestBlock,
-	}
-
-	for _, trial := range []struct {
-		ask    string
-		expect string
-	}{
-		{"", ""},
-		{"default", "default=1"},
-		{" , default , default , ", "default=1"},
-		{"special", "extra=1, special=1"},
-		{"special, readonly", "extra=1, special=1"},
-		{"special, nonexistent", "extra=1, special=1"},
-		{"extra, special", "extra=1, special=1"},
-		{"default, special", "default=1, extra=1, special=1"},
-	} {
-		c.Logf("success case %#v", trial)
-		rt.storageClasses = trial.ask
-		resp := IssueRequest(s.handler, &rt)
-		if trial.expect == "" {
-			// any non-empty value is correct
-			c.Check(resp.Header().Get("X-Keep-Storage-Classes-Confirmed"), check.Not(check.Equals), "")
-		} else {
-			c.Check(sortCommaSeparated(resp.Header().Get("X-Keep-Storage-Classes-Confirmed")), check.Equals, trial.expect)
-		}
-	}
-
-	for _, trial := range []struct {
-		ask string
-	}{
-		{"doesnotexist"},
-		{"doesnotexist, readonly"},
-		{"readonly"},
-	} {
-		c.Logf("failure case %#v", trial)
-		rt.storageClasses = trial.ask
-		resp := IssueRequest(s.handler, &rt)
-		c.Check(resp.Code, check.Equals, http.StatusServiceUnavailable)
-	}
-}
-
-func sortCommaSeparated(s string) string {
-	slice := strings.Split(s, ", ")
-	sort.Strings(slice)
-	return strings.Join(slice, ", ")
-}
-
-func (s *HandlerSuite) TestPutResponseHeader(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	resp := IssueRequest(s.handler, &RequestTester{
-		method:      "PUT",
-		uri:         "/" + TestHash,
-		requestBody: TestBlock,
-	})
-	c.Logf("%#v", resp)
-	c.Check(resp.Header().Get("X-Keep-Replicas-Stored"), check.Equals, "1")
-	c.Check(resp.Header().Get("X-Keep-Storage-Classes-Confirmed"), check.Equals, "default=1")
-}
-
-func (s *HandlerSuite) TestUntrashHandler(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	// Set up Keep volumes
-	vols := s.handler.volmgr.AllWritable()
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-
-	s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
-
-	// unauthenticatedReq => UnauthorizedError
-	unauthenticatedReq := &RequestTester{
-		method: "PUT",
-		uri:    "/untrash/" + TestHash,
-	}
-	response := IssueRequest(s.handler, unauthenticatedReq)
-	ExpectStatusCode(c,
-		"Unauthenticated request",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// notDataManagerReq => UnauthorizedError
-	notDataManagerReq := &RequestTester{
-		method:   "PUT",
-		uri:      "/untrash/" + TestHash,
-		apiToken: knownToken,
-	}
-
-	response = IssueRequest(s.handler, notDataManagerReq)
-	ExpectStatusCode(c,
-		"Non-datamanager token",
-		UnauthorizedError.HTTPCode,
-		response)
-
-	// datamanagerWithBadHashReq => StatusBadRequest
-	datamanagerWithBadHashReq := &RequestTester{
-		method:   "PUT",
-		uri:      "/untrash/thisisnotalocator",
-		apiToken: s.cluster.SystemRootToken,
-	}
-	response = IssueRequest(s.handler, datamanagerWithBadHashReq)
-	ExpectStatusCode(c,
-		"Bad locator in untrash request",
-		http.StatusBadRequest,
-		response)
-
-	// datamanagerWrongMethodReq => StatusBadRequest
-	datamanagerWrongMethodReq := &RequestTester{
-		method:   "GET",
-		uri:      "/untrash/" + TestHash,
-		apiToken: s.cluster.SystemRootToken,
-	}
-	response = IssueRequest(s.handler, datamanagerWrongMethodReq)
-	ExpectStatusCode(c,
-		"Only PUT method is supported for untrash",
-		http.StatusMethodNotAllowed,
-		response)
-
-	// datamanagerReq => StatusOK
-	datamanagerReq := &RequestTester{
-		method:   "PUT",
-		uri:      "/untrash/" + TestHash,
-		apiToken: s.cluster.SystemRootToken,
-	}
-	response = IssueRequest(s.handler, datamanagerReq)
-	ExpectStatusCode(c,
-		"",
-		http.StatusOK,
-		response)
-	c.Check(response.Body.String(), check.Equals, "Successfully untrashed on: [MockVolume], [MockVolume]\n")
-}
-
-func (s *HandlerSuite) TestUntrashHandlerWithNoWritableVolumes(c *check.C) {
-	// Change all volumes to read-only
-	for uuid, v := range s.cluster.Volumes {
-		v.ReadOnly = true
-		s.cluster.Volumes[uuid] = v
-	}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	// datamanagerReq => StatusOK
-	datamanagerReq := &RequestTester{
-		method:   "PUT",
-		uri:      "/untrash/" + TestHash,
-		apiToken: s.cluster.SystemRootToken,
-	}
-	response := IssueRequest(s.handler, datamanagerReq)
-	ExpectStatusCode(c,
-		"No writable volumes",
-		http.StatusNotFound,
-		response)
-}
-
-func (s *HandlerSuite) TestHealthCheckPing(c *check.C) {
-	s.cluster.ManagementToken = arvadostest.ManagementToken
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	pingReq := &RequestTester{
-		method:   "GET",
-		uri:      "/_health/ping",
-		apiToken: arvadostest.ManagementToken,
-	}
-	response := IssueHealthCheckRequest(s.handler, pingReq)
-	ExpectStatusCode(c,
-		"",
-		http.StatusOK,
-		response)
-	want := `{"health":"OK"}`
-	if !strings.Contains(response.Body.String(), want) {
-		c.Errorf("expected response to include %s: got %s", want, response.Body.String())
-	}
-}
diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go
deleted file mode 100644
index abeb20fe86..0000000000
--- a/services/keepstore/handlers.go
+++ /dev/null
@@ -1,1056 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"container/list"
-	"context"
-	"crypto/md5"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"regexp"
-	"runtime"
-	"strconv"
-	"strings"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	"git.arvados.org/arvados.git/lib/cmd"
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/ctxlog"
-	"git.arvados.org/arvados.git/sdk/go/health"
-	"git.arvados.org/arvados.git/sdk/go/httpserver"
-	"github.com/gorilla/mux"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/sirupsen/logrus"
-)
-
-type router struct {
-	*mux.Router
-	cluster     *arvados.Cluster
-	logger      logrus.FieldLogger
-	remoteProxy remoteProxy
-	metrics     *nodeMetrics
-	volmgr      *RRVolumeManager
-	pullq       *WorkQueue
-	trashq      *WorkQueue
-}
-
-// MakeRESTRouter returns a new router that forwards all Keep requests
-// to the appropriate handlers.
-func MakeRESTRouter(ctx context.Context, cluster *arvados.Cluster, reg *prometheus.Registry, volmgr *RRVolumeManager, pullq, trashq *WorkQueue) http.Handler {
-	rtr := &router{
-		Router:  mux.NewRouter(),
-		cluster: cluster,
-		logger:  ctxlog.FromContext(ctx),
-		metrics: &nodeMetrics{reg: reg},
-		volmgr:  volmgr,
-		pullq:   pullq,
-		trashq:  trashq,
-	}
-
-	rtr.HandleFunc(
-		`/{hash:[0-9a-f]{32}}`, rtr.handleGET).Methods("GET", "HEAD")
-	rtr.HandleFunc(
-		`/{hash:[0-9a-f]{32}}+{hints}`,
-		rtr.handleGET).Methods("GET", "HEAD")
-
-	rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handlePUT).Methods("PUT")
-	rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handleDELETE).Methods("DELETE")
-	// List all blocks stored here. Privileged client only.
-	rtr.HandleFunc(`/index`, rtr.handleIndex).Methods("GET", "HEAD")
-	// List blocks stored here whose hash has the given prefix.
-	// Privileged client only.
-	rtr.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, rtr.handleIndex).Methods("GET", "HEAD")
-	// Update timestamp on existing block. Privileged client only.
-	rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handleTOUCH).Methods("TOUCH")
-
-	// Internals/debugging info (runtime.MemStats)
-	rtr.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
-
-	// List volumes: path, device number, bytes used/avail.
-	rtr.HandleFunc(`/status.json`, rtr.StatusHandler).Methods("GET", "HEAD")
-
-	// List mounts: UUID, readonly, tier, device ID, ...
-	rtr.HandleFunc(`/mounts`, rtr.MountsHandler).Methods("GET")
-	rtr.HandleFunc(`/mounts/{uuid}/blocks`, rtr.handleIndex).Methods("GET")
-	rtr.HandleFunc(`/mounts/{uuid}/blocks/`, rtr.handleIndex).Methods("GET")
-
-	// Replace the current pull queue.
-	rtr.HandleFunc(`/pull`, rtr.handlePull).Methods("PUT")
-
-	// Replace the current trash queue.
-	rtr.HandleFunc(`/trash`, rtr.handleTrash).Methods("PUT")
-
-	// Untrash moves blocks from trash back into store
-	rtr.HandleFunc(`/untrash/{hash:[0-9a-f]{32}}`, rtr.handleUntrash).Methods("PUT")
-
-	rtr.Handle("/_health/{check}", &health.Handler{
-		Token:  cluster.ManagementToken,
-		Prefix: "/_health/",
-	}).Methods("GET")
-
-	// Any request which does not match any of these routes gets
-	// 400 Bad Request.
-	rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
-
-	rtr.metrics.setupBufferPoolMetrics(bufs)
-	rtr.metrics.setupWorkQueueMetrics(rtr.pullq, "pull")
-	rtr.metrics.setupWorkQueueMetrics(rtr.trashq, "trash")
-
-	return rtr
-}
-
-// BadRequestHandler is a HandleFunc to address bad requests.
-func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
-	http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
-}
-
-func (rtr *router) handleGET(resp http.ResponseWriter, req *http.Request) {
-	locator := req.URL.Path[1:]
-	if strings.Contains(locator, "+R") && !strings.Contains(locator, "+A") {
-		rtr.remoteProxy.Get(req.Context(), resp, req, rtr.cluster, rtr.volmgr)
-		return
-	}
-
-	if rtr.cluster.Collections.BlobSigning {
-		locator := req.URL.Path[1:] // strip leading slash
-		if err := VerifySignature(rtr.cluster, locator, GetAPIToken(req)); err != nil {
-			http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
-			return
-		}
-	}
-
-	// TODO: Probe volumes to check whether the block _might_
-	// exist. Some volumes/types could support a quick existence
-	// check without causing other operations to suffer. If all
-	// volumes support that, and assure us the block definitely
-	// isn't here, we can return 404 now instead of waiting for a
-	// buffer.
-
-	buf, err := getBufferWithContext(req.Context(), bufs, BlockSize)
-	if err != nil {
-		http.Error(resp, err.Error(), http.StatusServiceUnavailable)
-		return
-	}
-	defer bufs.Put(buf)
-
-	size, err := GetBlock(req.Context(), rtr.volmgr, mux.Vars(req)["hash"], buf, resp)
-	if err != nil {
-		code := http.StatusInternalServerError
-		if err, ok := err.(*KeepError); ok {
-			code = err.HTTPCode
-		}
-		http.Error(resp, err.Error(), code)
-		return
-	}
-
-	resp.Header().Set("Content-Length", strconv.Itoa(size))
-	resp.Header().Set("Content-Type", "application/octet-stream")
-	resp.Write(buf[:size])
-}
-
-// Get a buffer from the pool -- but give up and return a non-nil
-// error if ctx ends before we get a buffer.
-func getBufferWithContext(ctx context.Context, bufs *bufferPool, bufSize int) ([]byte, error) {
-	bufReady := make(chan []byte)
-	go func() {
-		bufReady <- bufs.Get(bufSize)
-	}()
-	select {
-	case buf := <-bufReady:
-		return buf, nil
-	case <-ctx.Done():
-		go func() {
-			// Even if closeNotifier happened first, we
-			// need to keep waiting for our buf so we can
-			// return it to the pool.
-			bufs.Put(<-bufReady)
-		}()
-		return nil, ErrClientDisconnect
-	}
-}
-
-func (rtr *router) handleTOUCH(resp http.ResponseWriter, req *http.Request) {
-	if !rtr.isSystemAuth(GetAPIToken(req)) {
-		http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
-		return
-	}
-	hash := mux.Vars(req)["hash"]
-	vols := rtr.volmgr.AllWritable()
-	if len(vols) == 0 {
-		http.Error(resp, "no volumes", http.StatusNotFound)
-		return
-	}
-	var err error
-	for _, mnt := range vols {
-		err = mnt.Touch(hash)
-		if err == nil {
-			break
-		}
-	}
-	switch {
-	case err == nil:
-		return
-	case os.IsNotExist(err):
-		http.Error(resp, err.Error(), http.StatusNotFound)
-	default:
-		http.Error(resp, err.Error(), http.StatusInternalServerError)
-	}
-}
-
-func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
-	hash := mux.Vars(req)["hash"]
-
-	// Detect as many error conditions as possible before reading
-	// the body: avoid transmitting data that will not end up
-	// being written anyway.
-
-	if req.ContentLength == -1 {
-		http.Error(resp, SizeRequiredError.Error(), SizeRequiredError.HTTPCode)
-		return
-	}
-
-	if req.ContentLength > BlockSize {
-		http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
-		return
-	}
-
-	if len(rtr.volmgr.AllWritable()) == 0 {
-		http.Error(resp, FullError.Error(), FullError.HTTPCode)
-		return
-	}
-
-	var wantStorageClasses []string
-	if hdr := req.Header.Get("X-Keep-Storage-Classes"); hdr != "" {
-		wantStorageClasses = strings.Split(hdr, ",")
-		for i, sc := range wantStorageClasses {
-			wantStorageClasses[i] = strings.TrimSpace(sc)
-		}
-	} else {
-		// none specified -- use configured default
-		for class, cfg := range rtr.cluster.StorageClasses {
-			if cfg.Default {
-				wantStorageClasses = append(wantStorageClasses, class)
-			}
-		}
-	}
-
-	buf, err := getBufferWithContext(req.Context(), bufs, int(req.ContentLength))
-	if err != nil {
-		http.Error(resp, err.Error(), http.StatusServiceUnavailable)
-		return
-	}
-
-	_, err = io.ReadFull(req.Body, buf)
-	if err != nil {
-		http.Error(resp, err.Error(), 500)
-		bufs.Put(buf)
-		return
-	}
-
-	result, err := PutBlock(req.Context(), rtr.volmgr, buf, hash, wantStorageClasses)
-	bufs.Put(buf)
-
-	if err != nil {
-		code := http.StatusInternalServerError
-		if err, ok := err.(*KeepError); ok {
-			code = err.HTTPCode
-		}
-		http.Error(resp, err.Error(), code)
-		return
-	}
-
-	// Success; add a size hint, sign the locator if possible, and
-	// return it to the client.
-	returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
-	apiToken := GetAPIToken(req)
-	if rtr.cluster.Collections.BlobSigningKey != "" && apiToken != "" {
-		expiry := time.Now().Add(rtr.cluster.Collections.BlobSigningTTL.Duration())
-		returnHash = SignLocator(rtr.cluster, returnHash, apiToken, expiry)
-	}
-	resp.Header().Set("X-Keep-Replicas-Stored", result.TotalReplication())
-	resp.Header().Set("X-Keep-Storage-Classes-Confirmed", result.ClassReplication())
-	resp.Write([]byte(returnHash + "\n"))
-}
-
-// IndexHandler responds to "/index", "/index/{prefix}", and
-// "/mounts/{uuid}/blocks" requests.
-func (rtr *router) handleIndex(resp http.ResponseWriter, req *http.Request) {
-	if !rtr.isSystemAuth(GetAPIToken(req)) {
-		http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
-		return
-	}
-
-	prefix := mux.Vars(req)["prefix"]
-	if prefix == "" {
-		req.ParseForm()
-		prefix = req.Form.Get("prefix")
-	}
-
-	uuid := mux.Vars(req)["uuid"]
-
-	var vols []*VolumeMount
-	if uuid == "" {
-		vols = rtr.volmgr.AllReadable()
-	} else if mnt := rtr.volmgr.Lookup(uuid, false); mnt == nil {
-		http.Error(resp, "mount not found", http.StatusNotFound)
-		return
-	} else {
-		vols = []*VolumeMount{mnt}
-	}
-
-	for _, v := range vols {
-		if err := v.IndexTo(prefix, resp); err != nil {
-			// We can't send an error status/message to
-			// the client because IndexTo() might have
-			// already written body content. All we can do
-			// is log the error in our own logs.
-			//
-			// The client must notice the lack of trailing
-			// newline as an indication that the response
-			// is incomplete.
-			ctxlog.FromContext(req.Context()).WithError(err).Errorf("truncating index response after error from volume %s", v)
-			return
-		}
-	}
-	// An empty line at EOF is the only way the client can be
-	// assured the entire index was received.
-	resp.Write([]byte{'\n'})
-}
-
-// MountsHandler responds to "GET /mounts" requests.
-func (rtr *router) MountsHandler(resp http.ResponseWriter, req *http.Request) {
-	err := json.NewEncoder(resp).Encode(rtr.volmgr.Mounts())
-	if err != nil {
-		httpserver.Error(resp, err.Error(), http.StatusInternalServerError)
-	}
-}
-
-// PoolStatus struct
-type PoolStatus struct {
-	Alloc uint64 `json:"BytesAllocatedCumulative"`
-	Cap   int    `json:"BuffersMax"`
-	Len   int    `json:"BuffersInUse"`
-}
-
-type volumeStatusEnt struct {
-	Label         string
-	Status        *VolumeStatus `json:",omitempty"`
-	VolumeStats   *ioStats      `json:",omitempty"`
-	InternalStats interface{}   `json:",omitempty"`
-}
-
-// NodeStatus struct
-type NodeStatus struct {
-	Volumes         []*volumeStatusEnt
-	BufferPool      PoolStatus
-	PullQueue       WorkQueueStatus
-	TrashQueue      WorkQueueStatus
-	RequestsCurrent int
-	RequestsMax     int
-	Version         string
-}
-
-var st NodeStatus
-var stLock sync.Mutex
-
-// DebugHandler addresses /debug.json requests.
-func (rtr *router) DebugHandler(resp http.ResponseWriter, req *http.Request) {
-	type debugStats struct {
-		MemStats runtime.MemStats
-	}
-	var ds debugStats
-	runtime.ReadMemStats(&ds.MemStats)
-	data, err := json.Marshal(&ds)
-	if err != nil {
-		http.Error(resp, err.Error(), http.StatusInternalServerError)
-		return
-	}
-	resp.Write(data)
-}
-
-// StatusHandler addresses /status.json requests.
-func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) {
-	stLock.Lock()
-	rtr.readNodeStatus(&st)
-	data, err := json.Marshal(&st)
-	stLock.Unlock()
-	if err != nil {
-		http.Error(resp, err.Error(), http.StatusInternalServerError)
-		return
-	}
-	resp.Write(data)
-}
-
-// populate the given NodeStatus struct with current values.
-func (rtr *router) readNodeStatus(st *NodeStatus) {
-	st.Version = strings.SplitN(cmd.Version.String(), " ", 2)[0]
-	vols := rtr.volmgr.AllReadable()
-	if cap(st.Volumes) < len(vols) {
-		st.Volumes = make([]*volumeStatusEnt, len(vols))
-	}
-	st.Volumes = st.Volumes[:0]
-	for _, vol := range vols {
-		var internalStats interface{}
-		if vol, ok := vol.Volume.(InternalStatser); ok {
-			internalStats = vol.InternalStats()
-		}
-		st.Volumes = append(st.Volumes, &volumeStatusEnt{
-			Label:         vol.String(),
-			Status:        vol.Status(),
-			InternalStats: internalStats,
-			//VolumeStats: rtr.volmgr.VolumeStats(vol),
-		})
-	}
-	st.BufferPool.Alloc = bufs.Alloc()
-	st.BufferPool.Cap = bufs.Cap()
-	st.BufferPool.Len = bufs.Len()
-	st.PullQueue = getWorkQueueStatus(rtr.pullq)
-	st.TrashQueue = getWorkQueueStatus(rtr.trashq)
-}
-
-// return a WorkQueueStatus for the given queue. If q is nil (which
-// should never happen except in test suites), return a zero status
-// value instead of crashing.
-func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
-	if q == nil {
-		// This should only happen during tests.
-		return WorkQueueStatus{}
-	}
-	return q.Status()
-}
-
-// handleDELETE processes DELETE requests.
-//
-// DELETE /{hash:[0-9a-f]{32} will delete the block with the specified hash
-// from all connected volumes.
-//
-// Only the Data Manager, or an Arvados admin with scope "all", are
-// allowed to issue DELETE requests.  If a DELETE request is not
-// authenticated or is issued by a non-admin user, the server returns
-// a PermissionError.
-//
-// Upon receiving a valid request from an authorized user,
-// handleDELETE deletes all copies of the specified block on local
-// writable volumes.
-//
-// Response format:
-//
-// If the requested blocks was not found on any volume, the response
-// code is HTTP 404 Not Found.
-//
-// Otherwise, the response code is 200 OK, with a response body
-// consisting of the JSON message
-//
-//	{"copies_deleted":d,"copies_failed":f}
-//
-// where d and f are integers representing the number of blocks that
-// were successfully and unsuccessfully deleted.
-func (rtr *router) handleDELETE(resp http.ResponseWriter, req *http.Request) {
-	hash := mux.Vars(req)["hash"]
-
-	// Confirm that this user is an admin and has a token with unlimited scope.
-	var tok = GetAPIToken(req)
-	if tok == "" || !rtr.canDelete(tok) {
-		http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
-		return
-	}
-
-	if !rtr.cluster.Collections.BlobTrash {
-		http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode)
-		return
-	}
-
-	// Delete copies of this block from all available volumes.
-	// Report how many blocks were successfully deleted, and how
-	// many were found on writable volumes but not deleted.
-	var result struct {
-		Deleted int `json:"copies_deleted"`
-		Failed  int `json:"copies_failed"`
-	}
-	for _, vol := range rtr.volmgr.Mounts() {
-		if !vol.KeepMount.AllowTrash {
-			continue
-		} else if err := vol.Trash(hash); err == nil {
-			result.Deleted++
-		} else if os.IsNotExist(err) {
-			continue
-		} else {
-			result.Failed++
-			ctxlog.FromContext(req.Context()).WithError(err).Errorf("Trash(%s) failed on volume %s", hash, vol)
-		}
-	}
-	if result.Deleted == 0 && result.Failed == 0 {
-		resp.WriteHeader(http.StatusNotFound)
-		return
-	}
-	body, err := json.Marshal(result)
-	if err != nil {
-		http.Error(resp, err.Error(), http.StatusInternalServerError)
-		return
-	}
-	resp.Write(body)
-}
-
-/* PullHandler processes "PUT /pull" requests for the data manager.
-   The request body is a JSON message containing a list of pull
-   requests in the following format:
-
-   [
-      {
-         "locator":"e4d909c290d0fb1ca068ffaddf22cbd0+4985",
-         "servers":[
-			"keep0.qr1hi.arvadosapi.com:25107",
-			"keep1.qr1hi.arvadosapi.com:25108"
-		 ]
-	  },
-	  {
-		 "locator":"55ae4d45d2db0793d53f03e805f656e5+658395",
-		 "servers":[
-			"10.0.1.5:25107",
-			"10.0.1.6:25107",
-			"10.0.1.7:25108"
-		 ]
-	  },
-	  ...
-   ]
-
-   Each pull request in the list consists of a block locator string
-   and an ordered list of servers.  Keepstore should try to fetch the
-   block from each server in turn.
-
-   If the request has not been sent by the Data Manager, return 401
-   Unauthorized.
-
-   If the JSON unmarshalling fails, return 400 Bad Request.
-*/
-
-// PullRequest consists of a block locator and an ordered list of servers
-type PullRequest struct {
-	Locator string   `json:"locator"`
-	Servers []string `json:"servers"`
-
-	// Destination mount, or "" for "anywhere"
-	MountUUID string `json:"mount_uuid"`
-}
-
-// PullHandler processes "PUT /pull" requests for the data manager.
-func (rtr *router) handlePull(resp http.ResponseWriter, req *http.Request) {
-	// Reject unauthorized requests.
-	if !rtr.isSystemAuth(GetAPIToken(req)) {
-		http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
-		return
-	}
-
-	// Parse the request body.
-	var pr []PullRequest
-	r := json.NewDecoder(req.Body)
-	if err := r.Decode(&pr); err != nil {
-		http.Error(resp, err.Error(), BadRequestError.HTTPCode)
-		return
-	}
-
-	// We have a properly formatted pull list sent from the data
-	// manager.  Report success and send the list to the pull list
-	// manager for further handling.
-	resp.WriteHeader(http.StatusOK)
-	resp.Write([]byte(
-		fmt.Sprintf("Received %d pull requests\n", len(pr))))
-
-	plist := list.New()
-	for _, p := range pr {
-		plist.PushBack(p)
-	}
-	rtr.pullq.ReplaceQueue(plist)
-}
-
-// TrashRequest consists of a block locator and its Mtime
-type TrashRequest struct {
-	Locator    string `json:"locator"`
-	BlockMtime int64  `json:"block_mtime"`
-
-	// Target mount, or "" for "everywhere"
-	MountUUID string `json:"mount_uuid"`
-}
-
-// TrashHandler processes /trash requests.
-func (rtr *router) handleTrash(resp http.ResponseWriter, req *http.Request) {
-	// Reject unauthorized requests.
-	if !rtr.isSystemAuth(GetAPIToken(req)) {
-		http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
-		return
-	}
-
-	// Parse the request body.
-	var trash []TrashRequest
-	r := json.NewDecoder(req.Body)
-	if err := r.Decode(&trash); err != nil {
-		http.Error(resp, err.Error(), BadRequestError.HTTPCode)
-		return
-	}
-
-	// We have a properly formatted trash list sent from the data
-	// manager.  Report success and send the list to the trash work
-	// queue for further handling.
-	resp.WriteHeader(http.StatusOK)
-	resp.Write([]byte(
-		fmt.Sprintf("Received %d trash requests\n", len(trash))))
-
-	tlist := list.New()
-	for _, t := range trash {
-		tlist.PushBack(t)
-	}
-	rtr.trashq.ReplaceQueue(tlist)
-}
-
-// UntrashHandler processes "PUT /untrash/{hash:[0-9a-f]{32}}" requests for the data manager.
-func (rtr *router) handleUntrash(resp http.ResponseWriter, req *http.Request) {
-	// Reject unauthorized requests.
-	if !rtr.isSystemAuth(GetAPIToken(req)) {
-		http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
-		return
-	}
-
-	log := ctxlog.FromContext(req.Context())
-	hash := mux.Vars(req)["hash"]
-
-	if len(rtr.volmgr.AllWritable()) == 0 {
-		http.Error(resp, "No writable volumes", http.StatusNotFound)
-		return
-	}
-
-	var untrashedOn, failedOn []string
-	var numNotFound int
-	for _, vol := range rtr.volmgr.AllWritable() {
-		err := vol.Untrash(hash)
-
-		if os.IsNotExist(err) {
-			numNotFound++
-		} else if err != nil {
-			log.WithError(err).Errorf("Error untrashing %v on volume %s", hash, vol)
-			failedOn = append(failedOn, vol.String())
-		} else {
-			log.Infof("Untrashed %v on volume %v", hash, vol.String())
-			untrashedOn = append(untrashedOn, vol.String())
-		}
-	}
-
-	if numNotFound == len(rtr.volmgr.AllWritable()) {
-		http.Error(resp, "Block not found on any of the writable volumes", http.StatusNotFound)
-	} else if len(failedOn) == len(rtr.volmgr.AllWritable()) {
-		http.Error(resp, "Failed to untrash on all writable volumes", http.StatusInternalServerError)
-	} else {
-		respBody := "Successfully untrashed on: " + strings.Join(untrashedOn, ", ")
-		if len(failedOn) > 0 {
-			respBody += "; Failed to untrash on: " + strings.Join(failedOn, ", ")
-			http.Error(resp, respBody, http.StatusInternalServerError)
-		} else {
-			fmt.Fprintln(resp, respBody)
-		}
-	}
-}
-
-// GetBlock and PutBlock implement lower-level code for handling
-// blocks by rooting through volumes connected to the local machine.
-// Once the handler has determined that system policy permits the
-// request, it calls these methods to perform the actual operation.
-//
-// TODO(twp): this code would probably be better located in the
-// VolumeManager interface. As an abstraction, the VolumeManager
-// should be the only part of the code that cares about which volume a
-// block is stored on, so it should be responsible for figuring out
-// which volume to check for fetching blocks, storing blocks, etc.
-
-// GetBlock fetches the block identified by "hash" into the provided
-// buf, and returns the data size.
-//
-// If the block cannot be found on any volume, returns NotFoundError.
-//
-// If the block found does not have the correct MD5 hash, returns
-// DiskHashError.
-func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []byte, resp http.ResponseWriter) (int, error) {
-	log := ctxlog.FromContext(ctx)
-
-	// Attempt to read the requested hash from a keep volume.
-	errorToCaller := NotFoundError
-
-	for _, vol := range volmgr.AllReadable() {
-		size, err := vol.Get(ctx, hash, buf)
-		select {
-		case <-ctx.Done():
-			return 0, ErrClientDisconnect
-		default:
-		}
-		if err != nil {
-			// IsNotExist is an expected error and may be
-			// ignored. All other errors are logged. In
-			// any case we continue trying to read other
-			// volumes. If all volumes report IsNotExist,
-			// we return a NotFoundError.
-			if !os.IsNotExist(err) {
-				log.WithError(err).Errorf("Get(%s) failed on %s", hash, vol)
-			}
-			// If some volume returns a transient error, return it to the caller
-			// instead of "Not found" so it can retry.
-			if err == VolumeBusyError {
-				errorToCaller = err.(*KeepError)
-			}
-			continue
-		}
-		// Check the file checksum.
-		filehash := fmt.Sprintf("%x", md5.Sum(buf[:size]))
-		if filehash != hash {
-			// TODO: Try harder to tell a sysadmin about
-			// this.
-			log.Errorf("checksum mismatch for block %s (actual %s), size %d on %s", hash, filehash, size, vol)
-			errorToCaller = DiskHashError
-			continue
-		}
-		if errorToCaller == DiskHashError {
-			log.Warn("after checksum mismatch for block %s on a different volume, a good copy was found on volume %s and returned", hash, vol)
-		}
-		return size, nil
-	}
-	return 0, errorToCaller
-}
-
-type putProgress struct {
-	classNeeded      map[string]bool
-	classTodo        map[string]bool
-	mountUsed        map[*VolumeMount]bool
-	totalReplication int
-	classDone        map[string]int
-}
-
-// Number of distinct replicas stored. "2" can mean the block was
-// stored on 2 different volumes with replication 1, or on 1 volume
-// with replication 2.
-func (pr putProgress) TotalReplication() string {
-	return strconv.Itoa(pr.totalReplication)
-}
-
-// Number of replicas satisfying each storage class, formatted like
-// "default=2; special=1".
-func (pr putProgress) ClassReplication() string {
-	s := ""
-	for k, v := range pr.classDone {
-		if len(s) > 0 {
-			s += ", "
-		}
-		s += k + "=" + strconv.Itoa(v)
-	}
-	return s
-}
-
-func (pr *putProgress) Add(mnt *VolumeMount) {
-	if pr.mountUsed[mnt] {
-		logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
-		return
-	}
-	pr.mountUsed[mnt] = true
-	pr.totalReplication += mnt.Replication
-	for class := range mnt.StorageClasses {
-		pr.classDone[class] += mnt.Replication
-		delete(pr.classTodo, class)
-	}
-}
-
-func (pr *putProgress) Sub(mnt *VolumeMount) {
-	if !pr.mountUsed[mnt] {
-		logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
-		return
-	}
-	pr.mountUsed[mnt] = false
-	pr.totalReplication -= mnt.Replication
-	for class := range mnt.StorageClasses {
-		pr.classDone[class] -= mnt.Replication
-		if pr.classNeeded[class] {
-			pr.classTodo[class] = true
-		}
-	}
-}
-
-func (pr *putProgress) Done() bool {
-	return len(pr.classTodo) == 0 && pr.totalReplication > 0
-}
-
-func (pr *putProgress) Want(mnt *VolumeMount) bool {
-	if pr.Done() || pr.mountUsed[mnt] {
-		return false
-	}
-	if len(pr.classTodo) == 0 {
-		// none specified == "any"
-		return true
-	}
-	for class := range mnt.StorageClasses {
-		if pr.classTodo[class] {
-			return true
-		}
-	}
-	return false
-}
-
-func (pr *putProgress) Copy() *putProgress {
-	cp := putProgress{
-		classNeeded:      pr.classNeeded,
-		classTodo:        make(map[string]bool, len(pr.classTodo)),
-		classDone:        make(map[string]int, len(pr.classDone)),
-		mountUsed:        make(map[*VolumeMount]bool, len(pr.mountUsed)),
-		totalReplication: pr.totalReplication,
-	}
-	for k, v := range pr.classTodo {
-		cp.classTodo[k] = v
-	}
-	for k, v := range pr.classDone {
-		cp.classDone[k] = v
-	}
-	for k, v := range pr.mountUsed {
-		cp.mountUsed[k] = v
-	}
-	return &cp
-}
-
-func newPutProgress(classes []string) putProgress {
-	pr := putProgress{
-		classNeeded: make(map[string]bool, len(classes)),
-		classTodo:   make(map[string]bool, len(classes)),
-		classDone:   map[string]int{},
-		mountUsed:   map[*VolumeMount]bool{},
-	}
-	for _, c := range classes {
-		if c != "" {
-			pr.classNeeded[c] = true
-			pr.classTodo[c] = true
-		}
-	}
-	return pr
-}
-
-// PutBlock stores the given block on one or more volumes.
-//
-// The MD5 checksum of the block must match the given hash.
-//
-// The block is written to each writable volume (ordered by priority
-// and then UUID, see volume.go) until at least one replica has been
-// stored in each of the requested storage classes.
-//
-// The returned error, if any, is a KeepError with one of the
-// following codes:
-//
-// 500 Collision
-//
-//	A different block with the same hash already exists on this
-//	Keep server.
-//
-// 422 MD5Fail
-//
-//	The MD5 hash of the BLOCK does not match the argument HASH.
-//
-// 503 Full
-//
-//	There was not enough space left in any Keep volume to store
-//	the object.
-//
-// 500 Fail
-//
-//	The object could not be stored for some other reason (e.g.
-//	all writes failed). The text of the error message should
-//	provide as much detail as possible.
-func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) {
-	log := ctxlog.FromContext(ctx)
-
-	// Check that BLOCK's checksum matches HASH.
-	blockhash := fmt.Sprintf("%x", md5.Sum(block))
-	if blockhash != hash {
-		log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash)
-		return putProgress{}, RequestHashError
-	}
-
-	result := newPutProgress(wantStorageClasses)
-
-	// If we already have this data, it's intact on disk, and we
-	// can update its timestamp, return success. If we have
-	// different data with the same hash, return failure.
-	if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() {
-		return result, err
-	}
-	if ctx.Err() != nil {
-		return result, ErrClientDisconnect
-	}
-
-	writables := volmgr.NextWritable()
-	if len(writables) == 0 {
-		log.Error("no writable volumes")
-		return result, FullError
-	}
-
-	var wg sync.WaitGroup
-	var mtx sync.Mutex
-	cond := sync.Cond{L: &mtx}
-	// pending predicts what result will be if all pending writes
-	// succeed.
-	pending := result.Copy()
-	var allFull atomic.Value
-	allFull.Store(true)
-
-	// We hold the lock for the duration of the "each volume" loop
-	// below, except when it is released during cond.Wait().
-	mtx.Lock()
-
-	for _, mnt := range writables {
-		// Wait until our decision to use this mount does not
-		// depend on the outcome of pending writes.
-		for result.Want(mnt) && !pending.Want(mnt) {
-			cond.Wait()
-		}
-		if !result.Want(mnt) {
-			continue
-		}
-		mnt := mnt
-		pending.Add(mnt)
-		wg.Add(1)
-		go func() {
-			log.Debugf("PutBlock: start write to %s", mnt.UUID)
-			defer wg.Done()
-			err := mnt.Put(ctx, hash, block)
-
-			mtx.Lock()
-			if err != nil {
-				log.Debugf("PutBlock: write to %s failed", mnt.UUID)
-				pending.Sub(mnt)
-			} else {
-				log.Debugf("PutBlock: write to %s succeeded", mnt.UUID)
-				result.Add(mnt)
-			}
-			cond.Broadcast()
-			mtx.Unlock()
-
-			if err != nil && err != FullError && ctx.Err() == nil {
-				// The volume is not full but the
-				// write did not succeed.  Report the
-				// error and continue trying.
-				allFull.Store(false)
-				log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-			}
-		}()
-	}
-	mtx.Unlock()
-	wg.Wait()
-	if ctx.Err() != nil {
-		return result, ErrClientDisconnect
-	}
-	if result.Done() {
-		return result, nil
-	}
-
-	if result.totalReplication > 0 {
-		// Some, but not all, of the storage classes were
-		// satisfied. This qualifies as success.
-		return result, nil
-	} else if allFull.Load().(bool) {
-		log.Error("all volumes with qualifying storage classes are full")
-		return putProgress{}, FullError
-	} else {
-		// Already logged the non-full errors.
-		return putProgress{}, GenericError
-	}
-}
-
-// CompareAndTouch looks for volumes where the given content already
-// exists and its modification time can be updated (i.e., it is
-// protected from garbage collection), and updates result accordingly.
-// It returns when the result is Done() or all volumes have been
-// checked.
-func CompareAndTouch(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []byte, result *putProgress) error {
-	log := ctxlog.FromContext(ctx)
-	for _, mnt := range volmgr.AllWritable() {
-		if !result.Want(mnt) {
-			continue
-		}
-		err := mnt.Compare(ctx, hash, buf)
-		if ctx.Err() != nil {
-			return nil
-		} else if err == CollisionError {
-			// Stop if we have a block with same hash but
-			// different content. (It will be impossible
-			// to tell which one is wanted if we have
-			// both, so there's no point writing it even
-			// on a different volume.)
-			log.Errorf("collision in Compare(%s) on volume %s", hash, mnt.Volume)
-			return CollisionError
-		} else if os.IsNotExist(err) {
-			// Block does not exist. This is the only
-			// "normal" error: we don't log anything.
-			continue
-		} else if err != nil {
-			// Couldn't open file, data is corrupt on
-			// disk, etc.: log this abnormal condition,
-			// and try the next volume.
-			log.WithError(err).Warnf("error in Compare(%s) on volume %s", hash, mnt.Volume)
-			continue
-		}
-		if err := mnt.Touch(hash); err != nil {
-			log.WithError(err).Errorf("error in Touch(%s) on volume %s", hash, mnt.Volume)
-			continue
-		}
-		// Compare and Touch both worked --> done.
-		result.Add(mnt)
-		if result.Done() {
-			return nil
-		}
-	}
-	return nil
-}
-
-var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
-
-// IsValidLocator returns true if the specified string is a valid Keep
-// locator.  When Keep is extended to support hash types other than
-// MD5, this should be updated to cover those as well.
-func IsValidLocator(loc string) bool {
-	return validLocatorRe.MatchString(loc)
-}
-
-var authRe = regexp.MustCompile(`^(OAuth2|Bearer)\s+(.*)`)
-
-// GetAPIToken returns the OAuth2 token from the Authorization
-// header of a HTTP request, or an empty string if no matching
-// token is found.
-func GetAPIToken(req *http.Request) string {
-	if auth, ok := req.Header["Authorization"]; ok {
-		if match := authRe.FindStringSubmatch(auth[0]); match != nil {
-			return match[2]
-		}
-	}
-	return ""
-}
-
-// canDelete returns true if the user identified by apiToken is
-// allowed to delete blocks.
-func (rtr *router) canDelete(apiToken string) bool {
-	if apiToken == "" {
-		return false
-	}
-	// Blocks may be deleted only when Keep has been configured with a
-	// data manager.
-	if rtr.isSystemAuth(apiToken) {
-		return true
-	}
-	// TODO(twp): look up apiToken with the API server
-	// return true if is_admin is true and if the token
-	// has unlimited scope
-	return false
-}
-
-// isSystemAuth returns true if the given token is allowed to perform
-// system level actions like deleting data.
-func (rtr *router) isSystemAuth(token string) bool {
-	return token != "" && token == rtr.cluster.SystemRootToken
-}
diff --git a/services/keepstore/hashcheckwriter.go b/services/keepstore/hashcheckwriter.go
new file mode 100644
index 0000000000..f191c98e4b
--- /dev/null
+++ b/services/keepstore/hashcheckwriter.go
@@ -0,0 +1,68 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"fmt"
+	"hash"
+	"io"
+)
+
+type hashCheckWriter struct {
+	writer       io.Writer
+	hash         hash.Hash
+	expectSize   int64
+	expectDigest string
+
+	offset int64
+}
+
+// newHashCheckWriter returns a writer that writes through to w, but
+// stops short if the written content reaches expectSize bytes and
+// does not match expectDigest according to the given hash
+// function.
+//
+// It returns a write error if more than expectSize bytes are written.
+//
+// Thus, in case of a hash mismatch, fewer than expectSize will be
+// written through.
+func newHashCheckWriter(writer io.Writer, hash hash.Hash, expectSize int64, expectDigest string) io.Writer {
+	return &hashCheckWriter{
+		writer:       writer,
+		hash:         hash,
+		expectSize:   expectSize,
+		expectDigest: expectDigest,
+	}
+}
+
+func (hcw *hashCheckWriter) Write(p []byte) (int, error) {
+	if todo := hcw.expectSize - hcw.offset - int64(len(p)); todo < 0 {
+		// Writing beyond expected size returns a checksum
+		// error without even checking the hash.
+		return 0, errChecksum
+	} else if todo > 0 {
+		// This isn't the last write, so we pass it through.
+		_, err := hcw.hash.Write(p)
+		if err != nil {
+			return 0, err
+		}
+		n, err := hcw.writer.Write(p)
+		hcw.offset += int64(n)
+		return n, err
+	} else {
+		// This is the last write, so we check the hash before
+		// writing through.
+		_, err := hcw.hash.Write(p)
+		if err != nil {
+			return 0, err
+		}
+		if digest := fmt.Sprintf("%x", hcw.hash.Sum(nil)); digest != hcw.expectDigest {
+			return 0, errChecksum
+		}
+		// Ensure subsequent write will fail
+		hcw.offset = hcw.expectSize + 1
+		return hcw.writer.Write(p)
+	}
+}
diff --git a/services/keepstore/keepstore.go b/services/keepstore/keepstore.go
index 953aa047cb..89afa9089f 100644
--- a/services/keepstore/keepstore.go
+++ b/services/keepstore/keepstore.go
@@ -5,53 +5,716 @@
 package keepstore
 
 import (
+	"bytes"
+	"context"
+	"crypto/md5"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
 	"time"
+
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/arvadosclient"
+	"git.arvados.org/arvados.git/sdk/go/auth"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"git.arvados.org/arvados.git/sdk/go/httpserver"
+	"git.arvados.org/arvados.git/sdk/go/keepclient"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/sirupsen/logrus"
 )
 
-// BlockSize for a Keep "block" is 64MB.
-const BlockSize = 64 * 1024 * 1024
+// Maximum size of a keep block is 64 MiB.
+const BlockSize = 1 << 26
 
-// MinFreeKilobytes is the amount of space a Keep volume must have available
-// in order to permit writes.
-const MinFreeKilobytes = BlockSize / 1024
+var (
+	errChecksum          = httpserver.ErrorWithStatus(errors.New("checksum mismatch in stored data"), http.StatusBadGateway)
+	errNoTokenProvided   = httpserver.ErrorWithStatus(errors.New("no token provided in Authorization header"), http.StatusUnauthorized)
+	errMethodNotAllowed  = httpserver.ErrorWithStatus(errors.New("method not allowed"), http.StatusMethodNotAllowed)
+	errVolumeUnavailable = httpserver.ErrorWithStatus(errors.New("volume unavailable"), http.StatusServiceUnavailable)
+	errCollision         = httpserver.ErrorWithStatus(errors.New("hash collision"), http.StatusInternalServerError)
+	errExpiredSignature  = httpserver.ErrorWithStatus(errors.New("expired signature"), http.StatusUnauthorized)
+	errInvalidSignature  = httpserver.ErrorWithStatus(errors.New("invalid signature"), http.StatusBadRequest)
+	errInvalidLocator    = httpserver.ErrorWithStatus(errors.New("invalid locator"), http.StatusBadRequest)
+	errFull              = httpserver.ErrorWithStatus(errors.New("insufficient storage"), http.StatusInsufficientStorage)
+	errTooLarge          = httpserver.ErrorWithStatus(errors.New("request entity too large"), http.StatusRequestEntityTooLarge)
+	driver               = make(map[string]volumeDriver)
+)
 
-var bufs *bufferPool
+type IndexOptions struct {
+	MountUUID string
+	Prefix    string
+	WriteTo   io.Writer
+}
 
-type KeepError struct {
-	HTTPCode int
-	ErrMsg   string
+type mount struct {
+	arvados.KeepMount
+	volume
+	priority int
 }
 
-var (
-	BadRequestError     = &KeepError{400, "Bad Request"}
-	UnauthorizedError   = &KeepError{401, "Unauthorized"}
-	CollisionError      = &KeepError{500, "Collision"}
-	RequestHashError    = &KeepError{422, "Hash mismatch in request"}
-	PermissionError     = &KeepError{403, "Forbidden"}
-	DiskHashError       = &KeepError{500, "Hash mismatch in stored data"}
-	ExpiredError        = &KeepError{401, "Expired permission signature"}
-	NotFoundError       = &KeepError{404, "Not Found"}
-	VolumeBusyError     = &KeepError{503, "Volume backend busy"}
-	GenericError        = &KeepError{500, "Fail"}
-	FullError           = &KeepError{503, "Full"}
-	SizeRequiredError   = &KeepError{411, "Missing Content-Length"}
-	TooLongError        = &KeepError{413, "Block is too large"}
-	MethodDisabledError = &KeepError{405, "Method disabled"}
-	ErrNotImplemented   = &KeepError{500, "Unsupported configuration"}
-	ErrClientDisconnect = &KeepError{503, "Client disconnected"}
-)
+type keepstore struct {
+	cluster    *arvados.Cluster
+	logger     logrus.FieldLogger
+	serviceURL arvados.URL
+	mounts     map[string]*mount
+	mountsR    []*mount
+	mountsW    []*mount
+	bufferPool *bufferPool
 
-func (e *KeepError) Error() string {
-	return e.ErrMsg
+	iostats map[volume]*ioStats
+
+	remoteClients    map[string]*keepclient.KeepClient
+	remoteClientsMtx sync.Mutex
 }
 
-// Periodically (once per interval) invoke EmptyTrash on all volumes.
-func emptyTrash(mounts []*VolumeMount, interval time.Duration) {
-	for range time.NewTicker(interval).C {
-		for _, v := range mounts {
-			if v.KeepMount.AllowTrash {
-				v.EmptyTrash()
+func newKeepstore(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry, serviceURL arvados.URL) (*keepstore, error) {
+	logger := ctxlog.FromContext(ctx)
+
+	if cluster.API.MaxConcurrentRequests > 0 && cluster.API.MaxConcurrentRequests < cluster.API.MaxKeepBlobBuffers {
+		logger.Warnf("Possible configuration mistake: not useful to set API.MaxKeepBlobBuffers (%d) higher than API.MaxConcurrentRequests (%d)", cluster.API.MaxKeepBlobBuffers, cluster.API.MaxConcurrentRequests)
+	}
+
+	if cluster.Collections.BlobSigningKey != "" {
+	} else if cluster.Collections.BlobSigning {
+		return nil, errors.New("cannot enable Collections.BlobSigning with no Collections.BlobSigningKey")
+	} else {
+		logger.Warn("Running without a blob signing key. Block locators returned by this server will not be signed, and will be rejected by a server that enforces permissions. To fix this, configure Collections.BlobSigning and Collections.BlobSigningKey.")
+	}
+
+	if cluster.API.MaxKeepBlobBuffers <= 0 {
+		return nil, fmt.Errorf("API.MaxKeepBlobBuffers must be greater than zero")
+	}
+	bufferPool := newBufferPool(logger, cluster.API.MaxKeepBlobBuffers, reg)
+
+	ks := &keepstore{
+		cluster:       cluster,
+		logger:        logger,
+		serviceURL:    serviceURL,
+		bufferPool:    bufferPool,
+		remoteClients: make(map[string]*keepclient.KeepClient),
+	}
+
+	err := ks.setupMounts(newVolumeMetricsVecs(reg))
+	if err != nil {
+		return nil, err
+	}
+
+	return ks, nil
+}
+
+func (ks *keepstore) setupMounts(metrics *volumeMetricsVecs) error {
+	ks.mounts = make(map[string]*mount)
+	if len(ks.cluster.Volumes) == 0 {
+		return errors.New("no volumes configured")
+	}
+	for uuid, cfgvol := range ks.cluster.Volumes {
+		va, ok := cfgvol.AccessViaHosts[ks.serviceURL]
+		if !ok && len(cfgvol.AccessViaHosts) > 0 {
+			continue
+		}
+		dri, ok := driver[cfgvol.Driver]
+		if !ok {
+			return fmt.Errorf("volume %s: invalid driver %q", uuid, cfgvol.Driver)
+		}
+		vol, err := dri(newVolumeParams{
+			UUID:         uuid,
+			Cluster:      ks.cluster,
+			ConfigVolume: cfgvol,
+			Logger:       ks.logger,
+			MetricsVecs:  metrics,
+			BufferPool:   ks.bufferPool,
+		})
+		if err != nil {
+			return fmt.Errorf("error initializing volume %s: %s", uuid, err)
+		}
+		sc := cfgvol.StorageClasses
+		if len(sc) == 0 {
+			sc = map[string]bool{"default": true}
+		}
+		repl := cfgvol.Replication
+		if repl < 1 {
+			repl = 1
+		}
+		pri := 0
+		for class, in := range cfgvol.StorageClasses {
+			p := ks.cluster.StorageClasses[class].Priority
+			if in && p > pri {
+				pri = p
 			}
 		}
+		mnt := &mount{
+			volume:   vol,
+			priority: pri,
+			KeepMount: arvados.KeepMount{
+				UUID:           uuid,
+				DeviceID:       vol.DeviceID(),
+				AllowWrite:     !va.ReadOnly && !cfgvol.ReadOnly,
+				AllowTrash:     !va.ReadOnly && (!cfgvol.ReadOnly || cfgvol.AllowTrashWhenReadOnly),
+				Replication:    repl,
+				StorageClasses: sc,
+			},
+		}
+		ks.mounts[uuid] = mnt
+		ks.logger.Printf("started volume %s (%s), AllowWrite=%v, AllowTrash=%v", uuid, vol.DeviceID(), mnt.AllowWrite, mnt.AllowTrash)
+	}
+	if len(ks.mounts) == 0 {
+		return fmt.Errorf("no volumes configured for %s", ks.serviceURL)
+	}
+
+	ks.mountsR = nil
+	ks.mountsW = nil
+	for _, mnt := range ks.mounts {
+		ks.mountsR = append(ks.mountsR, mnt)
+		if mnt.AllowWrite {
+			ks.mountsW = append(ks.mountsW, mnt)
+		}
+	}
+	// Sorting mounts by UUID makes behavior more predictable, and
+	// is convenient for testing -- for example, "index all
+	// volumes" and "trash block on all volumes" will visit
+	// volumes in predictable order.
+	sort.Slice(ks.mountsR, func(i, j int) bool { return ks.mountsR[i].UUID < ks.mountsR[j].UUID })
+	sort.Slice(ks.mountsW, func(i, j int) bool { return ks.mountsW[i].UUID < ks.mountsW[j].UUID })
+	return nil
+}
+
+// checkLocatorSignature checks that locator has a valid signature.
+// If the BlobSigning config is false, it returns nil even if the
+// signature is invalid or missing.
+func (ks *keepstore) checkLocatorSignature(ctx context.Context, locator string) error {
+	if !ks.cluster.Collections.BlobSigning {
+		return nil
+	}
+	token := ctxToken(ctx)
+	if token == "" {
+		return errNoTokenProvided
+	}
+	err := arvados.VerifySignature(locator, token, ks.cluster.Collections.BlobSigningTTL.Duration(), []byte(ks.cluster.Collections.BlobSigningKey))
+	if err == arvados.ErrSignatureExpired {
+		return errExpiredSignature
+	} else if err != nil {
+		return errInvalidSignature
+	}
+	return nil
+}
+
+// signLocator signs the locator for the given token, if possible.
+// Note this signs if the BlobSigningKey config is available, even if
+// the BlobSigning config is false.
+func (ks *keepstore) signLocator(token, locator string) string {
+	if token == "" || len(ks.cluster.Collections.BlobSigningKey) == 0 {
+		return locator
+	}
+	ttl := ks.cluster.Collections.BlobSigningTTL.Duration()
+	return arvados.SignLocator(locator, token, time.Now().Add(ttl), ttl, []byte(ks.cluster.Collections.BlobSigningKey))
+}
+
+func (ks *keepstore) BlockRead(ctx context.Context, opts arvados.BlockReadOptions) (n int, err error) {
+	li, err := parseLocator(opts.Locator)
+	if err != nil {
+		return 0, err
+	}
+	out := opts.WriteTo
+	if rw, ok := out.(http.ResponseWriter); ok && li.size > 0 {
+		out = &setSizeOnWrite{ResponseWriter: rw, size: li.size}
+	}
+	if li.remote && !li.signed {
+		return ks.blockReadRemote(ctx, opts)
+	}
+	if err := ks.checkLocatorSignature(ctx, opts.Locator); err != nil {
+		return 0, err
+	}
+	hashcheck := md5.New()
+	if li.size > 0 {
+		out = newHashCheckWriter(out, hashcheck, int64(li.size), li.hash)
+	} else {
+		out = io.MultiWriter(out, hashcheck)
+	}
+	var errToCaller error = os.ErrNotExist
+	for _, mnt := range ks.rendezvous(li.hash, ks.mountsR) {
+		if ctx.Err() != nil {
+			return 0, ctx.Err()
+		}
+		n, err = mnt.BlockRead(ctx, li.hash, out)
+		if err == nil && li.size > 0 && n != li.size {
+			// If the backend read fewer bytes than
+			// expected but returns no error, we can
+			// classify this as a checksum error (even
+			// though hashCheckWriter doesn't know that
+			// yet, it's just waiting for the next
+			// write). If our caller is serving a GET
+			// request it's too late to do anything about
+			// it anyway, but if it's a HEAD request the
+			// caller can still change the response status
+			// code.
+			return n, errChecksum
+		}
+		if err == nil && li.size == 0 {
+			// hashCheckingWriter isn't in use because we
+			// don't know the expected size. All we can do
+			// is check after writing all the data, and
+			// trust the caller is doing a HEAD request so
+			// it's not too late to set an error code in
+			// the response header.
+			if hash := fmt.Sprintf("%x", hashcheck.Sum(nil)); hash != li.hash {
+				return n, errChecksum
+			}
+		}
+		if rw, ok := opts.WriteTo.(http.ResponseWriter); ok && li.size == 0 && err == nil {
+			// We didn't set the content-length header
+			// above because we didn't know the block size
+			// until now.
+			rw.Header().Set("Content-Length", fmt.Sprintf("%d", n))
+		}
+		if n > 0 || err == nil {
+			// success, or there's an error but we can't
+			// retry because we've already sent some data.
+			return n, err
+		}
+		if !os.IsNotExist(err) {
+			// If some volume returns a transient error,
+			// return it to the caller instead of "Not
+			// found" so it can retry.
+			errToCaller = err
+		}
+	}
+	return 0, errToCaller
+}
+
+func (ks *keepstore) blockReadRemote(ctx context.Context, opts arvados.BlockReadOptions) (int, error) {
+	ks.logger.Infof("blockReadRemote(%s)", opts.Locator)
+	token := ctxToken(ctx)
+	if token == "" {
+		return 0, errNoTokenProvided
+	}
+	var remoteClient *keepclient.KeepClient
+	var parts []string
+	var size int
+	for i, part := range strings.Split(opts.Locator, "+") {
+		switch {
+		case i == 0:
+			// don't try to parse hash part as hint
+		case strings.HasPrefix(part, "A"):
+			// drop local permission hint
+			continue
+		case len(part) > 7 && part[0] == 'R' && part[6] == '-':
+			remoteID := part[1:6]
+			remote, ok := ks.cluster.RemoteClusters[remoteID]
+			if !ok {
+				return 0, httpserver.ErrorWithStatus(errors.New("remote cluster not configured"), http.StatusBadRequest)
+			}
+			kc, err := ks.remoteClient(remoteID, remote, token)
+			if err == auth.ErrObsoleteToken {
+				return 0, httpserver.ErrorWithStatus(err, http.StatusBadRequest)
+			} else if err != nil {
+				return 0, err
+			}
+			remoteClient = kc
+			part = "A" + part[7:]
+		case len(part) > 0 && part[0] >= '0' && part[0] <= '9':
+			size, _ = strconv.Atoi(part)
+		}
+		parts = append(parts, part)
+	}
+	if remoteClient == nil {
+		return 0, httpserver.ErrorWithStatus(errors.New("invalid remote hint"), http.StatusBadRequest)
+	}
+	locator := strings.Join(parts, "+")
+	if opts.LocalLocator == nil {
+		// Read from remote cluster and stream response back
+		// to caller
+		if rw, ok := opts.WriteTo.(http.ResponseWriter); ok && size > 0 {
+			rw.Header().Set("Content-Length", fmt.Sprintf("%d", size))
+		}
+		return remoteClient.BlockRead(ctx, arvados.BlockReadOptions{
+			Locator: locator,
+			WriteTo: opts.WriteTo,
+		})
+	}
+	// We must call LocalLocator before writing any data to
+	// opts.WriteTo, otherwise the caller can't put the local
+	// locator in a response header.  So we copy into memory,
+	// generate the local signature, then copy from memory to
+	// opts.WriteTo.
+	buf, err := ks.bufferPool.GetContext(ctx)
+	if err != nil {
+		return 0, err
+	}
+	defer ks.bufferPool.Put(buf)
+	writebuf := bytes.NewBuffer(buf[:0])
+	ks.logger.Infof("blockReadRemote(%s): remote read(%s)", opts.Locator, locator)
+	_, err = remoteClient.BlockRead(ctx, arvados.BlockReadOptions{
+		Locator: locator,
+		WriteTo: writebuf,
+	})
+	if err != nil {
+		return 0, err
+	}
+	resp, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+		Hash: locator,
+		Data: writebuf.Bytes(),
+	})
+	if err != nil {
+		return 0, err
+	}
+	opts.LocalLocator(resp.Locator)
+	if rw, ok := opts.WriteTo.(http.ResponseWriter); ok {
+		rw.Header().Set("Content-Length", fmt.Sprintf("%d", writebuf.Len()))
+	}
+	n, err := io.Copy(opts.WriteTo, bytes.NewReader(writebuf.Bytes()))
+	return int(n), err
+}
+
+func (ks *keepstore) remoteClient(remoteID string, remoteCluster arvados.RemoteCluster, token string) (*keepclient.KeepClient, error) {
+	ks.remoteClientsMtx.Lock()
+	kc, ok := ks.remoteClients[remoteID]
+	ks.remoteClientsMtx.Unlock()
+	if !ok {
+		c := &arvados.Client{
+			APIHost:   remoteCluster.Host,
+			AuthToken: "xxx",
+			Insecure:  remoteCluster.Insecure,
+		}
+		ac, err := arvadosclient.New(c)
+		if err != nil {
+			return nil, err
+		}
+		kc, err = keepclient.MakeKeepClient(ac)
+		if err != nil {
+			return nil, err
+		}
+		kc.DiskCacheSize = keepclient.DiskCacheDisabled
+
+		ks.remoteClientsMtx.Lock()
+		ks.remoteClients[remoteID] = kc
+		ks.remoteClientsMtx.Unlock()
+	}
+	accopy := *kc.Arvados
+	accopy.ApiToken = token
+	kccopy := kc.Clone()
+	kccopy.Arvados = &accopy
+	token, err := auth.SaltToken(token, remoteID)
+	if err != nil {
+		return nil, err
+	}
+	kccopy.Arvados.ApiToken = token
+	return kccopy, nil
+}
+
+// BlockWrite writes a block to one or more volumes.
+func (ks *keepstore) BlockWrite(ctx context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+	var resp arvados.BlockWriteResponse
+	var hash string
+	if opts.Data == nil {
+		buf, err := ks.bufferPool.GetContext(ctx)
+		if err != nil {
+			return resp, err
+		}
+		defer ks.bufferPool.Put(buf)
+		w := bytes.NewBuffer(buf[:0])
+		h := md5.New()
+		limitedReader := &io.LimitedReader{R: opts.Reader, N: BlockSize}
+		n, err := io.Copy(io.MultiWriter(w, h), limitedReader)
+		if err != nil {
+			return resp, err
+		}
+		if limitedReader.N == 0 {
+			// Data size is either exactly BlockSize, or too big.
+			n, err := opts.Reader.Read(make([]byte, 1))
+			if n > 0 {
+				return resp, httpserver.ErrorWithStatus(err, http.StatusRequestEntityTooLarge)
+			}
+			if err != io.EOF {
+				return resp, err
+			}
+		}
+		opts.Data = buf[:n]
+		if opts.DataSize != 0 && int(n) != opts.DataSize {
+			return resp, httpserver.ErrorWithStatus(fmt.Errorf("content length %d did not match specified data size %d", n, opts.DataSize), http.StatusBadRequest)
+		}
+		hash = fmt.Sprintf("%x", h.Sum(nil))
+	} else {
+		hash = fmt.Sprintf("%x", md5.Sum(opts.Data))
+	}
+	if opts.Hash != "" && !strings.HasPrefix(opts.Hash, hash) {
+		return resp, httpserver.ErrorWithStatus(fmt.Errorf("content hash %s did not match specified locator %s", hash, opts.Hash), http.StatusBadRequest)
+	}
+	rvzmounts := ks.rendezvous(hash, ks.mountsW)
+	result := newPutProgress(opts.StorageClasses)
+	for _, mnt := range rvzmounts {
+		if !result.Want(mnt) {
+			continue
+		}
+		cmp := &checkEqual{Expect: opts.Data}
+		if _, err := mnt.BlockRead(ctx, hash, cmp); err == nil {
+			if !cmp.Equal() {
+				return resp, errCollision
+			}
+			err := mnt.BlockTouch(hash)
+			if err == nil {
+				result.Add(mnt)
+			}
+		}
+	}
+	var allFull atomic.Bool
+	allFull.Store(true)
+	// pending tracks what result will be if all outstanding
+	// writes succeed.
+	pending := result.Copy()
+	cond := sync.NewCond(new(sync.Mutex))
+	cond.L.Lock()
+	var wg sync.WaitGroup
+nextmnt:
+	for _, mnt := range rvzmounts {
+		for {
+			if result.Done() || ctx.Err() != nil {
+				break nextmnt
+			}
+			if !result.Want(mnt) {
+				continue nextmnt
+			}
+			if pending.Want(mnt) {
+				break
+			}
+			// This mount might not be needed, depending
+			// on the outcome of pending writes. Wait for
+			// a pending write to finish, then check
+			// again.
+			cond.Wait()
+		}
+		mnt := mnt
+		logger := ks.logger.WithField("mount", mnt.UUID)
+		pending.Add(mnt)
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			logger.Debug("start write")
+			err := mnt.BlockWrite(ctx, hash, opts.Data)
+			cond.L.Lock()
+			defer cond.L.Unlock()
+			defer cond.Broadcast()
+			if err != nil {
+				logger.Debug("write failed")
+				pending.Sub(mnt)
+				if err != errFull {
+					allFull.Store(false)
+				}
+			} else {
+				result.Add(mnt)
+				pending.Sub(mnt)
+			}
+		}()
+	}
+	cond.L.Unlock()
+	wg.Wait()
+	if ctx.Err() != nil {
+		return resp, ctx.Err()
+	}
+	if result.Done() || result.totalReplication > 0 {
+		resp = arvados.BlockWriteResponse{
+			Locator:        ks.signLocator(ctxToken(ctx), fmt.Sprintf("%s+%d", hash, len(opts.Data))),
+			Replicas:       result.totalReplication,
+			StorageClasses: result.classDone,
+		}
+		return resp, nil
+	}
+	if allFull.Load() {
+		return resp, errFull
+	}
+	return resp, errVolumeUnavailable
+}
+
+// rendezvous sorts the given mounts by descending priority, then by
+// rendezvous order for the given locator.
+func (*keepstore) rendezvous(locator string, mnts []*mount) []*mount {
+	hash := locator
+	if len(hash) > 32 {
+		hash = hash[:32]
+	}
+	// copy the provided []*mount before doing an in-place sort
+	mnts = append([]*mount(nil), mnts...)
+	weight := make(map[*mount]string)
+	for _, mnt := range mnts {
+		uuidpart := mnt.UUID
+		if len(uuidpart) == 27 {
+			// strip zzzzz-yyyyy- prefixes
+			uuidpart = uuidpart[12:]
+		}
+		weight[mnt] = fmt.Sprintf("%x", md5.Sum([]byte(hash+uuidpart)))
+	}
+	sort.Slice(mnts, func(i, j int) bool {
+		if p := mnts[i].priority - mnts[j].priority; p != 0 {
+			return p > 0
+		}
+		return weight[mnts[i]] < weight[mnts[j]]
+	})
+	return mnts
+}
+
+// checkEqual reports whether the data written to it (via io.Writer
+// interface) is equal to the expected data.
+//
+// Expect should not be changed after the first Write.
+type checkEqual struct {
+	Expect     []byte
+	equalUntil int
+}
+
+func (ce *checkEqual) Equal() bool {
+	return ce.equalUntil == len(ce.Expect)
+}
+
+func (ce *checkEqual) Write(p []byte) (int, error) {
+	endpos := ce.equalUntil + len(p)
+	if ce.equalUntil >= 0 && endpos <= len(ce.Expect) && bytes.Equal(p, ce.Expect[ce.equalUntil:endpos]) {
+		ce.equalUntil = endpos
+	} else {
+		ce.equalUntil = -1
+	}
+	return len(p), nil
+}
+
+func (ks *keepstore) BlockUntrash(ctx context.Context, locator string) error {
+	li, err := parseLocator(locator)
+	if err != nil {
+		return err
+	}
+	var errToCaller error = os.ErrNotExist
+	for _, mnt := range ks.mountsW {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		err := mnt.BlockUntrash(li.hash)
+		if err == nil {
+			errToCaller = nil
+		} else if !os.IsNotExist(err) && errToCaller != nil {
+			errToCaller = err
+		}
+	}
+	return errToCaller
+}
+
+func (ks *keepstore) BlockTouch(ctx context.Context, locator string) error {
+	li, err := parseLocator(locator)
+	if err != nil {
+		return err
+	}
+	var errToCaller error = os.ErrNotExist
+	for _, mnt := range ks.mountsW {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		err := mnt.BlockTouch(li.hash)
+		if err == nil {
+			return nil
+		}
+		if !os.IsNotExist(err) {
+			errToCaller = err
+		}
+	}
+	return errToCaller
+}
+
+func (ks *keepstore) BlockTrash(ctx context.Context, locator string) error {
+	if !ks.cluster.Collections.BlobTrash {
+		return errMethodNotAllowed
+	}
+	li, err := parseLocator(locator)
+	if err != nil {
+		return err
+	}
+	var errToCaller error = os.ErrNotExist
+	for _, mnt := range ks.mounts {
+		if !mnt.AllowTrash {
+			continue
+		}
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		t, err := mnt.Mtime(li.hash)
+		if err == nil && time.Now().Sub(t) > ks.cluster.Collections.BlobSigningTTL.Duration() {
+			err = mnt.BlockTrash(li.hash)
+		}
+		if os.IsNotExist(errToCaller) || (errToCaller == nil && !os.IsNotExist(err)) {
+			errToCaller = err
+		}
+	}
+	return errToCaller
+}
+
+func (ks *keepstore) Mounts() []*mount {
+	return ks.mountsR
+}
+
+func (ks *keepstore) Index(ctx context.Context, opts IndexOptions) error {
+	mounts := ks.mountsR
+	if opts.MountUUID != "" {
+		mnt, ok := ks.mounts[opts.MountUUID]
+		if !ok {
+			return os.ErrNotExist
+		}
+		mounts = []*mount{mnt}
+	}
+	for _, mnt := range mounts {
+		err := mnt.Index(ctx, opts.Prefix, opts.WriteTo)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func ctxToken(ctx context.Context) string {
+	if c, ok := auth.FromContext(ctx); ok && len(c.Tokens) > 0 {
+		return c.Tokens[0]
+	} else {
+		return ""
+	}
+}
+
+type locatorInfo struct {
+	hash   string
+	size   int
+	remote bool
+	signed bool
+}
+
+func parseLocator(loc string) (locatorInfo, error) {
+	var li locatorInfo
+	for i, part := range strings.Split(loc, "+") {
+		if i == 0 {
+			if len(part) != 32 {
+				return li, errInvalidLocator
+			}
+			li.hash = part
+			continue
+		}
+		if i == 1 {
+			if size, err := strconv.Atoi(part); err == nil {
+				li.size = size
+				continue
+			}
+		}
+		if len(part) == 0 {
+			return li, errInvalidLocator
+		}
+		if part[0] == 'A' {
+			li.signed = true
+		}
+		if part[0] == 'R' {
+			li.remote = true
+		}
+		if part[0] >= '0' && part[0] <= '9' {
+			// size, if present at all, must come first
+			return li, errInvalidLocator
+		}
 	}
+	return li, nil
 }
diff --git a/services/keepstore/keepstore_test.go b/services/keepstore/keepstore_test.go
new file mode 100644
index 0000000000..3a01476096
--- /dev/null
+++ b/services/keepstore/keepstore_test.go
@@ -0,0 +1,874 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"bytes"
+	"context"
+	"crypto/md5"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"sort"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"git.arvados.org/arvados.git/lib/config"
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/arvadostest"
+	"git.arvados.org/arvados.git/sdk/go/auth"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"github.com/prometheus/client_golang/prometheus"
+	. "gopkg.in/check.v1"
+)
+
+func TestGocheck(t *testing.T) {
+	TestingT(t)
+}
+
+const (
+	fooHash = "acbd18db4cc2f85cedef654fccc4a4d8"
+	barHash = "37b51d194a7513e45b56f6524f2d51f2"
+)
+
+var testServiceURL = func() arvados.URL {
+	return arvados.URL{Host: "localhost:12345", Scheme: "http"}
+}()
+
+func authContext(token string) context.Context {
+	return auth.NewContext(context.TODO(), &auth.Credentials{Tokens: []string{token}})
+}
+
+func testCluster(t TB) *arvados.Cluster {
+	cfg, err := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), ctxlog.TestLogger(t)).Load()
+	if err != nil {
+		t.Fatal(err)
+	}
+	cluster, err := cfg.GetCluster("")
+	if err != nil {
+		t.Fatal(err)
+	}
+	cluster.SystemRootToken = arvadostest.SystemRootToken
+	cluster.ManagementToken = arvadostest.ManagementToken
+	return cluster
+}
+
+func testKeepstore(t TB, cluster *arvados.Cluster, reg *prometheus.Registry) (*keepstore, context.CancelFunc) {
+	if reg == nil {
+		reg = prometheus.NewRegistry()
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	ctx = ctxlog.Context(ctx, ctxlog.TestLogger(t))
+	ks, err := newKeepstore(ctx, cluster, cluster.SystemRootToken, reg, testServiceURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return ks, cancel
+}
+
+var _ = Suite(&keepstoreSuite{})
+
+type keepstoreSuite struct {
+	cluster *arvados.Cluster
+}
+
+func (s *keepstoreSuite) SetUpTest(c *C) {
+	s.cluster = testCluster(c)
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub"},
+	}
+}
+
+func (s *keepstoreSuite) TestBlockRead_ChecksumMismatch(c *C) {
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+
+	ctx := authContext(arvadostest.ActiveTokenV2)
+
+	fooHash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+	err := ks.mountsW[0].BlockWrite(ctx, fooHash, []byte("bar"))
+	c.Assert(err, IsNil)
+
+	_, err = ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+		Hash: fooHash,
+		Data: []byte("foo"),
+	})
+	c.Check(err, ErrorMatches, "hash collision")
+
+	buf := bytes.NewBuffer(nil)
+	_, err = ks.BlockRead(ctx, arvados.BlockReadOptions{
+		Locator: ks.signLocator(arvadostest.ActiveTokenV2, fooHash+"+3"),
+		WriteTo: buf,
+	})
+	c.Check(err, ErrorMatches, "checksum mismatch in stored data")
+	c.Check(buf.String(), Not(Equals), "foo")
+	c.Check(buf.Len() < 3, Equals, true)
+
+	err = ks.mountsW[1].BlockWrite(ctx, fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+
+	buf = bytes.NewBuffer(nil)
+	_, err = ks.BlockRead(ctx, arvados.BlockReadOptions{
+		Locator: ks.signLocator(arvadostest.ActiveTokenV2, fooHash+"+3"),
+		WriteTo: buf,
+	})
+	c.Check(err, ErrorMatches, "checksum mismatch in stored data")
+	c.Check(buf.Len() < 3, Equals, true)
+}
+
+func (s *keepstoreSuite) TestBlockReadWrite_SigningDisabled(c *C) {
+	origKey := s.cluster.Collections.BlobSigningKey
+	s.cluster.Collections.BlobSigning = false
+	s.cluster.Collections.BlobSigningKey = ""
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+
+	resp, err := ks.BlockWrite(authContext("abcde"), arvados.BlockWriteOptions{
+		Hash: fooHash,
+		Data: []byte("foo"),
+	})
+	c.Assert(err, IsNil)
+	c.Check(resp.Locator, Equals, fooHash+"+3")
+	locUnsigned := resp.Locator
+	ttl := time.Hour
+	locSigned := arvados.SignLocator(locUnsigned, arvadostest.ActiveTokenV2, time.Now().Add(ttl), ttl, []byte(origKey))
+	c.Assert(locSigned, Not(Equals), locUnsigned)
+
+	for _, locator := range []string{locUnsigned, locSigned} {
+		for _, token := range []string{"", "xyzzy", arvadostest.ActiveTokenV2} {
+			c.Logf("=== locator %q token %q", locator, token)
+			ctx := authContext(token)
+			buf := bytes.NewBuffer(nil)
+			_, err := ks.BlockRead(ctx, arvados.BlockReadOptions{
+				Locator: locator,
+				WriteTo: buf,
+			})
+			c.Check(err, IsNil)
+			c.Check(buf.String(), Equals, "foo")
+		}
+	}
+}
+
+func (s *keepstoreSuite) TestBlockRead_OrderedByStorageClassPriority(c *C) {
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-111111111111111": {
+			Driver:         "stub",
+			Replication:    1,
+			StorageClasses: map[string]bool{"class1": true}},
+		"zzzzz-nyw5e-222222222222222": {
+			Driver:         "stub",
+			Replication:    1,
+			StorageClasses: map[string]bool{"class2": true, "class3": true}},
+	}
+
+	// "foobar" is just some data that happens to result in
+	// rendezvous order {111, 222}
+	data := []byte("foobar")
+	hash := fmt.Sprintf("%x", md5.Sum(data))
+
+	for _, trial := range []struct {
+		priority1 int // priority of class1, thus vol1
+		priority2 int // priority of class2
+		priority3 int // priority of class3 (vol2 priority will be max(priority2, priority3))
+		expectLog string
+	}{
+		{100, 50, 50, "111 read 385\n"},              // class1 has higher priority => try vol1 first, no need to try vol2
+		{100, 100, 100, "111 read 385\n"},            // same priority, vol2 is first in rendezvous order => try vol1 first and succeed
+		{66, 99, 33, "222 read 385\n111 read 385\n"}, // class2 has higher priority => try vol2 first, then try vol1
+		{66, 33, 99, "222 read 385\n111 read 385\n"}, // class3 has highest priority => vol2 has highest => try vol2 first, then try vol1
+	} {
+		c.Logf("=== %+v", trial)
+
+		s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+			"class1": {Priority: trial.priority1},
+			"class2": {Priority: trial.priority2},
+			"class3": {Priority: trial.priority3},
+		}
+		ks, cancel := testKeepstore(c, s.cluster, nil)
+		defer cancel()
+
+		ctx := authContext(arvadostest.ActiveTokenV2)
+		resp, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+			Hash:           hash,
+			Data:           data,
+			StorageClasses: []string{"class1"},
+		})
+		c.Assert(err, IsNil)
+
+		// Combine logs into one. (We only want the logs from
+		// the BlockRead below, not from BlockWrite above.)
+		stubLog := &stubLog{}
+		for _, mnt := range ks.mounts {
+			mnt.volume.(*stubVolume).stubLog = stubLog
+		}
+
+		n, err := ks.BlockRead(ctx, arvados.BlockReadOptions{
+			Locator: resp.Locator,
+			WriteTo: io.Discard,
+		})
+		c.Assert(n, Equals, len(data))
+		c.Assert(err, IsNil)
+		c.Check(stubLog.String(), Equals, trial.expectLog)
+	}
+}
+
+func (s *keepstoreSuite) TestBlockWrite_NoWritableVolumes(c *C) {
+	for uuid, v := range s.cluster.Volumes {
+		v.ReadOnly = true
+		s.cluster.Volumes[uuid] = v
+	}
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+	for _, mnt := range ks.mounts {
+		mnt.volume.(*stubVolume).blockWrite = func(context.Context, string, []byte) error {
+			c.Error("volume BlockWrite called")
+			return errors.New("fail")
+		}
+	}
+	ctx := authContext(arvadostest.ActiveTokenV2)
+
+	_, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+		Hash: fooHash,
+		Data: []byte("foo")})
+	c.Check(err, NotNil)
+	c.Check(err.(interface{ HTTPStatus() int }).HTTPStatus(), Equals, http.StatusInsufficientStorage)
+}
+
+func (s *keepstoreSuite) TestBlockWrite_MultipleStorageClasses(c *C) {
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-111111111111111": {
+			Driver:         "stub",
+			Replication:    1,
+			StorageClasses: map[string]bool{"class1": true}},
+		"zzzzz-nyw5e-121212121212121": {
+			Driver:         "stub",
+			Replication:    1,
+			StorageClasses: map[string]bool{"class1": true, "class2": true}},
+		"zzzzz-nyw5e-222222222222222": {
+			Driver:         "stub",
+			Replication:    1,
+			StorageClasses: map[string]bool{"class2": true}},
+	}
+
+	// testData is a block that happens to have rendezvous order 111, 121, 222
+	testData := []byte("qux")
+	testHash := fmt.Sprintf("%x+%d", md5.Sum(testData), len(testData))
+
+	s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+		"class1": {},
+		"class2": {},
+		"class3": {},
+	}
+
+	ctx := authContext(arvadostest.ActiveTokenV2)
+	for idx, trial := range []struct {
+		classes   string // desired classes
+		expectLog string
+	}{
+		{"class1", "" +
+			"111 read d85\n" +
+			"121 read d85\n" +
+			"111 write d85\n" +
+			"111 read d85\n" +
+			"111 touch d85\n"},
+		{"class2", "" +
+			"121 read d85\n" + // write#1
+			"222 read d85\n" +
+			"121 write d85\n" +
+			"121 read d85\n" + // write#2
+			"121 touch d85\n"},
+		{"class1,class2", "" +
+			"111 read d85\n" + // write#1
+			"121 read d85\n" +
+			"222 read d85\n" +
+			"121 write d85\n" +
+			"111 write d85\n" +
+			"111 read d85\n" + // write#2
+			"111 touch d85\n" +
+			"121 read d85\n" +
+			"121 touch d85\n"},
+		{"class1,class2,class404", "" +
+			"111 read d85\n" + // write#1
+			"121 read d85\n" +
+			"222 read d85\n" +
+			"121 write d85\n" +
+			"111 write d85\n" +
+			"111 read d85\n" + // write#2
+			"111 touch d85\n" +
+			"121 read d85\n" +
+			"121 touch d85\n"},
+	} {
+		c.Logf("=== %d: %+v", idx, trial)
+
+		ks, cancel := testKeepstore(c, s.cluster, nil)
+		defer cancel()
+		stubLog := &stubLog{}
+		for _, mnt := range ks.mounts {
+			mnt.volume.(*stubVolume).stubLog = stubLog
+		}
+
+		// Check that we chose the right block data
+		rvz := ks.rendezvous(testHash, ks.mountsW)
+		c.Assert(rvz[0].UUID[24:], Equals, "111")
+		c.Assert(rvz[1].UUID[24:], Equals, "121")
+		c.Assert(rvz[2].UUID[24:], Equals, "222")
+
+		for i := 0; i < 2; i++ {
+			_, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+				Hash:           testHash,
+				Data:           testData,
+				StorageClasses: strings.Split(trial.classes, ","),
+			})
+			c.Check(err, IsNil)
+		}
+		c.Check(stubLog.String(), Equals, trial.expectLog)
+	}
+}
+
+func (s *keepstoreSuite) TestBlockTrash(c *C) {
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-222222222222222": {Replication: 1, Driver: "stub", ReadOnly: true},
+		"zzzzz-nyw5e-333333333333333": {Replication: 1, Driver: "stub", ReadOnly: true, AllowTrashWhenReadOnly: true},
+	}
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+
+	var vol []*stubVolume
+	for _, mount := range ks.mountsR {
+		vol = append(vol, mount.volume.(*stubVolume))
+	}
+	sort.Slice(vol, func(i, j int) bool {
+		return vol[i].params.UUID < vol[j].params.UUID
+	})
+
+	ctx := context.Background()
+	loc := fooHash + "+3"
+	tOld := time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration() - time.Second)
+
+	clear := func() {
+		for _, vol := range vol {
+			err := vol.BlockTrash(fooHash)
+			if !os.IsNotExist(err) {
+				c.Assert(err, IsNil)
+			}
+		}
+	}
+	writeit := func(volidx int) {
+		err := vol[volidx].BlockWrite(ctx, fooHash, []byte("foo"))
+		c.Assert(err, IsNil)
+		err = vol[volidx].blockTouchWithTime(fooHash, tOld)
+		c.Assert(err, IsNil)
+	}
+	trashit := func() error {
+		return ks.BlockTrash(ctx, loc)
+	}
+	checkexists := func(volidx int) bool {
+		_, err := vol[volidx].BlockRead(ctx, fooHash, io.Discard)
+		if !os.IsNotExist(err) {
+			c.Check(err, IsNil)
+		}
+		return err == nil
+	}
+
+	clear()
+	c.Check(trashit(), Equals, os.ErrNotExist)
+
+	// one old replica => trash it
+	clear()
+	writeit(0)
+	c.Check(trashit(), IsNil)
+	c.Check(checkexists(0), Equals, false)
+
+	// one old replica + one new replica => keep new, trash old
+	clear()
+	writeit(0)
+	writeit(1)
+	c.Check(vol[1].blockTouchWithTime(fooHash, time.Now()), IsNil)
+	c.Check(trashit(), IsNil)
+	c.Check(checkexists(0), Equals, false)
+	c.Check(checkexists(1), Equals, true)
+
+	// two old replicas => trash both
+	clear()
+	writeit(0)
+	writeit(1)
+	c.Check(trashit(), IsNil)
+	c.Check(checkexists(0), Equals, false)
+	c.Check(checkexists(1), Equals, false)
+
+	// four old replicas => trash all except readonly volume with
+	// AllowTrashWhenReadOnly==false
+	clear()
+	writeit(0)
+	writeit(1)
+	writeit(2)
+	writeit(3)
+	c.Check(trashit(), IsNil)
+	c.Check(checkexists(0), Equals, false)
+	c.Check(checkexists(1), Equals, false)
+	c.Check(checkexists(2), Equals, true)
+	c.Check(checkexists(3), Equals, false)
+
+	// two old replicas but one returns an error => return the
+	// only non-404 backend error
+	clear()
+	vol[0].blockTrash = func(hash string) error {
+		return errors.New("fake error")
+	}
+	writeit(0)
+	writeit(3)
+	c.Check(trashit(), ErrorMatches, "fake error")
+	c.Check(checkexists(0), Equals, true)
+	c.Check(checkexists(1), Equals, false)
+	c.Check(checkexists(2), Equals, false)
+	c.Check(checkexists(3), Equals, false)
+}
+
+func (s *keepstoreSuite) TestBlockWrite_OnlyOneBuffer(c *C) {
+	s.cluster.API.MaxKeepBlobBuffers = 1
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+	ok := make(chan struct{})
+	go func() {
+		defer close(ok)
+		ctx := authContext(arvadostest.ActiveTokenV2)
+		_, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+			Hash: fooHash,
+			Data: []byte("foo")})
+		c.Check(err, IsNil)
+	}()
+	select {
+	case <-ok:
+	case <-time.After(time.Second):
+		c.Fatal("PUT deadlocks with MaxKeepBlobBuffers==1")
+	}
+}
+
+func (s *keepstoreSuite) TestBufferPoolLeak(c *C) {
+	s.cluster.API.MaxKeepBlobBuffers = 4
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+
+	ctx := authContext(arvadostest.ActiveTokenV2)
+	var wg sync.WaitGroup
+	for range make([]int, 20) {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			resp, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+				Hash: fooHash,
+				Data: []byte("foo")})
+			c.Check(err, IsNil)
+			_, err = ks.BlockRead(ctx, arvados.BlockReadOptions{
+				Locator: resp.Locator,
+				WriteTo: io.Discard})
+			c.Check(err, IsNil)
+		}()
+	}
+	ok := make(chan struct{})
+	go func() {
+		wg.Wait()
+		close(ok)
+	}()
+	select {
+	case <-ok:
+	case <-time.After(time.Second):
+		c.Fatal("read/write sequence deadlocks, likely buffer pool leak")
+	}
+}
+
+func (s *keepstoreSuite) TestPutStorageClasses(c *C) {
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub"}, // "default" is implicit
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub", StorageClasses: map[string]bool{"special": true, "extra": true}},
+		"zzzzz-nyw5e-222222222222222": {Replication: 1, Driver: "stub", StorageClasses: map[string]bool{"readonly": true}, ReadOnly: true},
+	}
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+	ctx := authContext(arvadostest.ActiveTokenV2)
+
+	for _, trial := range []struct {
+		ask            []string
+		expectReplicas int
+		expectClasses  map[string]int
+	}{
+		{nil,
+			1,
+			map[string]int{"default": 1}},
+		{[]string{},
+			1,
+			map[string]int{"default": 1}},
+		{[]string{"default"},
+			1,
+			map[string]int{"default": 1}},
+		{[]string{"default", "default"},
+			1,
+			map[string]int{"default": 1}},
+		{[]string{"special"},
+			1,
+			map[string]int{"extra": 1, "special": 1}},
+		{[]string{"special", "readonly"},
+			1,
+			map[string]int{"extra": 1, "special": 1}},
+		{[]string{"special", "nonexistent"},
+			1,
+			map[string]int{"extra": 1, "special": 1}},
+		{[]string{"extra", "special"},
+			1,
+			map[string]int{"extra": 1, "special": 1}},
+		{[]string{"default", "special"},
+			2,
+			map[string]int{"default": 1, "extra": 1, "special": 1}},
+	} {
+		c.Logf("success case %#v", trial)
+		resp, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+			Hash:           fooHash,
+			Data:           []byte("foo"),
+			StorageClasses: trial.ask,
+		})
+		if !c.Check(err, IsNil) {
+			continue
+		}
+		c.Check(resp.Replicas, Equals, trial.expectReplicas)
+		if len(trial.expectClasses) == 0 {
+			// any non-empty value is correct
+			c.Check(resp.StorageClasses, Not(HasLen), 0)
+		} else {
+			c.Check(resp.StorageClasses, DeepEquals, trial.expectClasses)
+		}
+	}
+
+	for _, ask := range [][]string{
+		{"doesnotexist"},
+		{"doesnotexist", "readonly"},
+		{"readonly"},
+	} {
+		c.Logf("failure case %s", ask)
+		_, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{
+			Hash:           fooHash,
+			Data:           []byte("foo"),
+			StorageClasses: ask,
+		})
+		c.Check(err, NotNil)
+	}
+}
+
+func (s *keepstoreSuite) TestUntrashHandlerWithNoWritableVolumes(c *C) {
+	for uuid, v := range s.cluster.Volumes {
+		v.ReadOnly = true
+		s.cluster.Volumes[uuid] = v
+	}
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+
+	for _, mnt := range ks.mounts {
+		err := mnt.BlockWrite(context.Background(), fooHash, []byte("foo"))
+		c.Assert(err, IsNil)
+		_, err = mnt.BlockRead(context.Background(), fooHash, io.Discard)
+		c.Assert(err, IsNil)
+	}
+
+	err := ks.BlockUntrash(context.Background(), fooHash)
+	c.Check(os.IsNotExist(err), Equals, true)
+
+	for _, mnt := range ks.mounts {
+		_, err := mnt.BlockRead(context.Background(), fooHash, io.Discard)
+		c.Assert(err, IsNil)
+	}
+}
+
+func (s *keepstoreSuite) TestBlockWrite_SkipReadOnly(c *C) {
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub", ReadOnly: true},
+		"zzzzz-nyw5e-222222222222222": {Replication: 1, Driver: "stub", ReadOnly: true, AllowTrashWhenReadOnly: true},
+	}
+	ks, cancel := testKeepstore(c, s.cluster, nil)
+	defer cancel()
+	ctx := authContext(arvadostest.ActiveTokenV2)
+
+	for i := range make([]byte, 32) {
+		data := []byte(fmt.Sprintf("block %d", i))
+		_, err := ks.BlockWrite(ctx, arvados.BlockWriteOptions{Data: data})
+		c.Assert(err, IsNil)
+	}
+	c.Check(ks.mounts["zzzzz-nyw5e-000000000000000"].volume.(*stubVolume).stubLog.String(), Matches, "(?ms).*write.*")
+	c.Check(ks.mounts["zzzzz-nyw5e-111111111111111"].volume.(*stubVolume).stubLog.String(), HasLen, 0)
+	c.Check(ks.mounts["zzzzz-nyw5e-222222222222222"].volume.(*stubVolume).stubLog.String(), HasLen, 0)
+}
+
+func (s *keepstoreSuite) TestParseLocator(c *C) {
+	for _, trial := range []struct {
+		locator string
+		ok      bool
+		expect  locatorInfo
+	}{
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+			ok: true},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
+			ok: true, expect: locatorInfo{size: 1234}},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234+Abcdef at abcdef",
+			ok: true, expect: locatorInfo{size: 1234, signed: true}},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234+Rzzzzz-abcdef",
+			ok: true, expect: locatorInfo{size: 1234, remote: true}},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+12345+Zexample+Rzzzzz-abcdef",
+			ok: true, expect: locatorInfo{size: 12345, remote: true}},
+		// invalid: hash length != 32
+		{locator: "",
+			ok: false},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+			ok: false},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
+			ok: false},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb",
+			ok: false},
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb+1234",
+			ok: false},
+		// invalid: first hint is not size
+		{locator: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+Abcdef+1234",
+			ok: false},
+	} {
+		c.Logf("=== %s", trial.locator)
+		li, err := parseLocator(trial.locator)
+		if !trial.ok {
+			c.Check(err, NotNil)
+			continue
+		}
+		c.Check(err, IsNil)
+		c.Check(li.hash, Equals, trial.locator[:32])
+		c.Check(li.size, Equals, trial.expect.size)
+		c.Check(li.signed, Equals, trial.expect.signed)
+		c.Check(li.remote, Equals, trial.expect.remote)
+	}
+}
+
+func init() {
+	driver["stub"] = func(params newVolumeParams) (volume, error) {
+		v := &stubVolume{
+			params:  params,
+			data:    make(map[string]stubData),
+			stubLog: &stubLog{},
+		}
+		return v, nil
+	}
+}
+
+type stubLog struct {
+	sync.Mutex
+	bytes.Buffer
+}
+
+func (sl *stubLog) Printf(format string, args ...interface{}) {
+	if sl == nil {
+		return
+	}
+	sl.Lock()
+	defer sl.Unlock()
+	fmt.Fprintf(sl, format+"\n", args...)
+}
+
+type stubData struct {
+	mtime time.Time
+	data  []byte
+	trash time.Time
+}
+
+type stubVolume struct {
+	params  newVolumeParams
+	data    map[string]stubData
+	stubLog *stubLog
+	mtx     sync.Mutex
+
+	// The following funcs enable tests to insert delays and
+	// failures. Each volume operation begins by calling the
+	// corresponding func (if non-nil). If the func returns an
+	// error, that error is returned to caller. Otherwise, the
+	// stub continues normally.
+	blockRead    func(ctx context.Context, hash string, writeTo io.Writer) (int, error)
+	blockWrite   func(ctx context.Context, hash string, data []byte) error
+	deviceID     func() string
+	blockTouch   func(hash string) error
+	blockTrash   func(hash string) error
+	blockUntrash func(hash string) error
+	index        func(ctx context.Context, prefix string, writeTo io.Writer) error
+	mtime        func(hash string) (time.Time, error)
+	emptyTrash   func()
+}
+
+func (v *stubVolume) log(op, hash string) {
+	// Note this intentionally crashes if UUID or hash is short --
+	// if keepstore ever does that, tests should fail.
+	v.stubLog.Printf("%s %s %s", v.params.UUID[24:27], op, hash[:3])
+}
+
+func (v *stubVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
+	v.log("read", hash)
+	if v.blockRead != nil {
+		n, err := v.blockRead(ctx, hash, writeTo)
+		if err != nil {
+			return n, err
+		}
+	}
+	v.mtx.Lock()
+	ent, ok := v.data[hash]
+	v.mtx.Unlock()
+	if !ok || !ent.trash.IsZero() {
+		return 0, os.ErrNotExist
+	}
+	wrote := 0
+	for writesize := 1000; wrote < len(ent.data); writesize = writesize * 2 {
+		data := ent.data[wrote:]
+		if len(data) > writesize {
+			data = data[:writesize]
+		}
+		n, err := writeTo.Write(data)
+		wrote += n
+		if err != nil {
+			return wrote, err
+		}
+	}
+	return wrote, nil
+}
+
+func (v *stubVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
+	v.log("write", hash)
+	if v.blockWrite != nil {
+		if err := v.blockWrite(ctx, hash, data); err != nil {
+			return err
+		}
+	}
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	v.data[hash] = stubData{
+		mtime: time.Now(),
+		data:  append([]byte(nil), data...),
+	}
+	return nil
+}
+
+func (v *stubVolume) DeviceID() string {
+	return fmt.Sprintf("%p", v)
+}
+
+func (v *stubVolume) BlockTouch(hash string) error {
+	v.log("touch", hash)
+	if v.blockTouch != nil {
+		if err := v.blockTouch(hash); err != nil {
+			return err
+		}
+	}
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	ent, ok := v.data[hash]
+	if !ok || !ent.trash.IsZero() {
+		return os.ErrNotExist
+	}
+	ent.mtime = time.Now()
+	v.data[hash] = ent
+	return nil
+}
+
+// Set mtime to the (presumably old) specified time.
+func (v *stubVolume) blockTouchWithTime(hash string, t time.Time) error {
+	v.log("touchwithtime", hash)
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	ent, ok := v.data[hash]
+	if !ok {
+		return os.ErrNotExist
+	}
+	ent.mtime = t
+	v.data[hash] = ent
+	return nil
+}
+
+func (v *stubVolume) BlockTrash(hash string) error {
+	v.log("trash", hash)
+	if v.blockTrash != nil {
+		if err := v.blockTrash(hash); err != nil {
+			return err
+		}
+	}
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	ent, ok := v.data[hash]
+	if !ok || !ent.trash.IsZero() {
+		return os.ErrNotExist
+	}
+	ent.trash = time.Now().Add(v.params.Cluster.Collections.BlobTrashLifetime.Duration())
+	v.data[hash] = ent
+	return nil
+}
+
+func (v *stubVolume) BlockUntrash(hash string) error {
+	v.log("untrash", hash)
+	if v.blockUntrash != nil {
+		if err := v.blockUntrash(hash); err != nil {
+			return err
+		}
+	}
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	ent, ok := v.data[hash]
+	if !ok || ent.trash.IsZero() {
+		return os.ErrNotExist
+	}
+	ent.trash = time.Time{}
+	v.data[hash] = ent
+	return nil
+}
+
+func (v *stubVolume) Index(ctx context.Context, prefix string, writeTo io.Writer) error {
+	v.stubLog.Printf("%s index %s", v.params.UUID, prefix)
+	if v.index != nil {
+		if err := v.index(ctx, prefix, writeTo); err != nil {
+			return err
+		}
+	}
+	buf := &bytes.Buffer{}
+	v.mtx.Lock()
+	for hash, ent := range v.data {
+		if ent.trash.IsZero() && strings.HasPrefix(hash, prefix) {
+			fmt.Fprintf(buf, "%s+%d %d\n", hash, len(ent.data), ent.mtime.UnixNano())
+		}
+	}
+	v.mtx.Unlock()
+	_, err := io.Copy(writeTo, buf)
+	return err
+}
+
+func (v *stubVolume) Mtime(hash string) (time.Time, error) {
+	v.log("mtime", hash)
+	if v.mtime != nil {
+		if t, err := v.mtime(hash); err != nil {
+			return t, err
+		}
+	}
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	ent, ok := v.data[hash]
+	if !ok || !ent.trash.IsZero() {
+		return time.Time{}, os.ErrNotExist
+	}
+	return ent.mtime, nil
+}
+
+func (v *stubVolume) EmptyTrash() {
+	v.stubLog.Printf("%s emptytrash", v.params.UUID)
+	v.mtx.Lock()
+	defer v.mtx.Unlock()
+	for hash, ent := range v.data {
+		if !ent.trash.IsZero() && time.Now().After(ent.trash) {
+			delete(v.data, hash)
+		}
+	}
+}
diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go
index d04601fbec..4638de5444 100644
--- a/services/keepstore/metrics.go
+++ b/services/keepstore/metrics.go
@@ -5,66 +5,9 @@
 package keepstore
 
 import (
-	"fmt"
-
 	"github.com/prometheus/client_golang/prometheus"
 )
 
-type nodeMetrics struct {
-	reg *prometheus.Registry
-}
-
-func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
-	m.reg.MustRegister(prometheus.NewGaugeFunc(
-		prometheus.GaugeOpts{
-			Namespace: "arvados",
-			Subsystem: "keepstore",
-			Name:      "bufferpool_allocated_bytes",
-			Help:      "Number of bytes allocated to buffers",
-		},
-		func() float64 { return float64(b.Alloc()) },
-	))
-	m.reg.MustRegister(prometheus.NewGaugeFunc(
-		prometheus.GaugeOpts{
-			Namespace: "arvados",
-			Subsystem: "keepstore",
-			Name:      "bufferpool_max_buffers",
-			Help:      "Maximum number of buffers allowed",
-		},
-		func() float64 { return float64(b.Cap()) },
-	))
-	m.reg.MustRegister(prometheus.NewGaugeFunc(
-		prometheus.GaugeOpts{
-			Namespace: "arvados",
-			Subsystem: "keepstore",
-			Name:      "bufferpool_inuse_buffers",
-			Help:      "Number of buffers in use",
-		},
-		func() float64 { return float64(b.Len()) },
-	))
-}
-
-func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
-	m.reg.MustRegister(prometheus.NewGaugeFunc(
-		prometheus.GaugeOpts{
-			Namespace: "arvados",
-			Subsystem: "keepstore",
-			Name:      fmt.Sprintf("%s_queue_inprogress_entries", qName),
-			Help:      fmt.Sprintf("Number of %s requests in progress", qName),
-		},
-		func() float64 { return float64(getWorkQueueStatus(q).InProgress) },
-	))
-	m.reg.MustRegister(prometheus.NewGaugeFunc(
-		prometheus.GaugeOpts{
-			Namespace: "arvados",
-			Subsystem: "keepstore",
-			Name:      fmt.Sprintf("%s_queue_pending_entries", qName),
-			Help:      fmt.Sprintf("Number of queued %s requests", qName),
-		},
-		func() float64 { return float64(getWorkQueueStatus(q).Queued) },
-	))
-}
-
 type volumeMetricsVecs struct {
 	ioBytes     *prometheus.CounterVec
 	errCounters *prometheus.CounterVec
diff --git a/services/keepstore/metrics_test.go b/services/keepstore/metrics_test.go
new file mode 100644
index 0000000000..0c8f1e68e6
--- /dev/null
+++ b/services/keepstore/metrics_test.go
@@ -0,0 +1,87 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/arvadostest"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"git.arvados.org/arvados.git/sdk/go/httpserver"
+	"github.com/prometheus/client_golang/prometheus"
+	. "gopkg.in/check.v1"
+)
+
+func (s *routerSuite) TestMetrics(c *C) {
+	reg := prometheus.NewRegistry()
+	router, cancel := testRouter(c, s.cluster, reg)
+	defer cancel()
+	instrumented := httpserver.Instrument(reg, ctxlog.TestLogger(c), router)
+	handler := instrumented.ServeAPI(s.cluster.ManagementToken, instrumented)
+
+	router.keepstore.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+		Hash: fooHash,
+		Data: []byte("foo"),
+	})
+	router.keepstore.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+		Hash: barHash,
+		Data: []byte("bar"),
+	})
+
+	// prime the metrics by doing a no-op request
+	resp := call(handler, "GET", "/", "", nil, nil)
+
+	resp = call(handler, "GET", "/metrics.json", "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusUnauthorized)
+	resp = call(handler, "GET", "/metrics.json", "foobar", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusForbidden)
+	resp = call(handler, "GET", "/metrics.json", arvadostest.ManagementToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	var j []struct {
+		Name   string
+		Help   string
+		Type   string
+		Metric []struct {
+			Label []struct {
+				Name  string
+				Value string
+			}
+			Summary struct {
+				SampleCount string
+				SampleSum   float64
+			}
+		}
+	}
+	json.NewDecoder(resp.Body).Decode(&j)
+	found := make(map[string]bool)
+	names := map[string]bool{}
+	for _, g := range j {
+		names[g.Name] = true
+		for _, m := range g.Metric {
+			if len(m.Label) == 2 && m.Label[0].Name == "code" && m.Label[0].Value == "200" && m.Label[1].Name == "method" && m.Label[1].Value == "put" {
+				c.Check(m.Summary.SampleCount, Equals, "2")
+				found[g.Name] = true
+			}
+		}
+	}
+
+	metricsNames := []string{
+		"arvados_keepstore_bufferpool_inuse_buffers",
+		"arvados_keepstore_bufferpool_max_buffers",
+		"arvados_keepstore_bufferpool_allocated_bytes",
+		"arvados_keepstore_pull_queue_inprogress_entries",
+		"arvados_keepstore_pull_queue_pending_entries",
+		"arvados_keepstore_trash_queue_inprogress_entries",
+		"arvados_keepstore_trash_queue_pending_entries",
+		"request_duration_seconds",
+	}
+	for _, m := range metricsNames {
+		_, ok := names[m]
+		c.Check(ok, Equals, true, Commentf("checking metric %q", m))
+	}
+}
diff --git a/services/keepstore/mock_mutex_for_test.go b/services/keepstore/mock_mutex_for_test.go
deleted file mode 100644
index daf0ef05f7..0000000000
--- a/services/keepstore/mock_mutex_for_test.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-type MockMutex struct {
-	AllowLock   chan struct{}
-	AllowUnlock chan struct{}
-}
-
-func NewMockMutex() *MockMutex {
-	return &MockMutex{
-		AllowLock:   make(chan struct{}),
-		AllowUnlock: make(chan struct{}),
-	}
-}
-
-// Lock waits for someone to send to AllowLock.
-func (m *MockMutex) Lock() {
-	<-m.AllowLock
-}
-
-// Unlock waits for someone to send to AllowUnlock.
-func (m *MockMutex) Unlock() {
-	<-m.AllowUnlock
-}
diff --git a/services/keepstore/mounts_test.go b/services/keepstore/mounts_test.go
index e8c248219f..d29d5f6dc0 100644
--- a/services/keepstore/mounts_test.go
+++ b/services/keepstore/mounts_test.go
@@ -5,28 +5,24 @@
 package keepstore
 
 import (
-	"bytes"
 	"context"
 	"encoding/json"
 	"net/http"
-	"net/http/httptest"
 
-	"git.arvados.org/arvados.git/sdk/go/arvadostest"
-	"git.arvados.org/arvados.git/sdk/go/ctxlog"
-	"git.arvados.org/arvados.git/sdk/go/httpserver"
-	"github.com/prometheus/client_golang/prometheus"
-	check "gopkg.in/check.v1"
+	. "gopkg.in/check.v1"
 )
 
-func (s *HandlerSuite) TestMounts(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+func (s *routerSuite) TestMounts(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
 
-	vols := s.handler.volmgr.AllWritable()
-	vols[0].Put(context.Background(), TestHash, TestBlock)
-	vols[1].Put(context.Background(), TestHash2, TestBlock2)
+	router.keepstore.mountsW[0].BlockWrite(context.Background(), fooHash, []byte("foo"))
+	router.keepstore.mountsW[1].BlockWrite(context.Background(), barHash, []byte("bar"))
+
+	resp := call(router, "GET", "/mounts", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Log(resp.Body.String())
 
-	resp := s.call("GET", "/mounts", "", nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
 	var mntList []struct {
 		UUID           string          `json:"uuid"`
 		DeviceID       string          `json:"device_id"`
@@ -34,119 +30,56 @@ func (s *HandlerSuite) TestMounts(c *check.C) {
 		Replication    int             `json:"replication"`
 		StorageClasses map[string]bool `json:"storage_classes"`
 	}
-	c.Log(resp.Body.String())
 	err := json.Unmarshal(resp.Body.Bytes(), &mntList)
-	c.Assert(err, check.IsNil)
-	c.Assert(len(mntList), check.Equals, 2)
+	c.Assert(err, IsNil)
+	c.Assert(mntList, HasLen, 2)
+
 	for _, m := range mntList {
-		c.Check(len(m.UUID), check.Equals, 27)
-		c.Check(m.UUID[:12], check.Equals, "zzzzz-nyw5e-")
-		c.Check(m.DeviceID, check.Equals, "mock-device-id")
-		c.Check(m.ReadOnly, check.Equals, false)
-		c.Check(m.Replication, check.Equals, 1)
-		c.Check(m.StorageClasses, check.DeepEquals, map[string]bool{"default": true})
+		c.Check(len(m.UUID), Equals, 27)
+		c.Check(m.UUID[:12], Equals, "zzzzz-nyw5e-")
+		c.Check(m.DeviceID, Matches, "0x[0-9a-f]+")
+		c.Check(m.ReadOnly, Equals, false)
+		c.Check(m.Replication, Equals, 1)
+		c.Check(m.StorageClasses, HasLen, 1)
+		for k := range m.StorageClasses {
+			c.Check(k, Matches, "testclass.*")
+		}
 	}
-	c.Check(mntList[0].UUID, check.Not(check.Equals), mntList[1].UUID)
+	c.Check(mntList[0].UUID, Not(Equals), mntList[1].UUID)
 
-	// Bad auth
+	c.Logf("=== bad auth")
 	for _, tok := range []string{"", "xyzzy"} {
-		resp = s.call("GET", "/mounts/"+mntList[1].UUID+"/blocks", tok, nil)
-		c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
-		c.Check(resp.Body.String(), check.Equals, "Unauthorized\n")
-	}
-
-	tok := arvadostest.SystemRootToken
-
-	// Nonexistent mount UUID
-	resp = s.call("GET", "/mounts/X/blocks", tok, nil)
-	c.Check(resp.Code, check.Equals, http.StatusNotFound)
-	c.Check(resp.Body.String(), check.Equals, "mount not found\n")
-
-	// Complete index of first mount
-	resp = s.call("GET", "/mounts/"+mntList[0].UUID+"/blocks", tok, nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	c.Check(resp.Body.String(), check.Matches, TestHash+`\+[0-9]+ [0-9]+\n\n`)
-
-	// Partial index of first mount (one block matches prefix)
-	resp = s.call("GET", "/mounts/"+mntList[0].UUID+"/blocks?prefix="+TestHash[:2], tok, nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	c.Check(resp.Body.String(), check.Matches, TestHash+`\+[0-9]+ [0-9]+\n\n`)
-
-	// Complete index of second mount (note trailing slash)
-	resp = s.call("GET", "/mounts/"+mntList[1].UUID+"/blocks/", tok, nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	c.Check(resp.Body.String(), check.Matches, TestHash2+`\+[0-9]+ [0-9]+\n\n`)
-
-	// Partial index of second mount (no blocks match prefix)
-	resp = s.call("GET", "/mounts/"+mntList[1].UUID+"/blocks/?prefix="+TestHash[:2], tok, nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	c.Check(resp.Body.String(), check.Equals, "\n")
-}
-
-func (s *HandlerSuite) TestMetrics(c *check.C) {
-	reg := prometheus.NewRegistry()
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", reg, testServiceURL), check.IsNil)
-	instrumented := httpserver.Instrument(reg, ctxlog.TestLogger(c), s.handler.Handler)
-	s.handler.Handler = instrumented.ServeAPI(s.cluster.ManagementToken, instrumented)
-
-	s.call("PUT", "/"+TestHash, "", TestBlock)
-	s.call("PUT", "/"+TestHash2, "", TestBlock2)
-	resp := s.call("GET", "/metrics.json", "", nil)
-	c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
-	resp = s.call("GET", "/metrics.json", "foobar", nil)
-	c.Check(resp.Code, check.Equals, http.StatusForbidden)
-	resp = s.call("GET", "/metrics.json", arvadostest.ManagementToken, nil)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	var j []struct {
-		Name   string
-		Help   string
-		Type   string
-		Metric []struct {
-			Label []struct {
-				Name  string
-				Value string
-			}
-			Summary struct {
-				SampleCount string
-				SampleSum   float64
-			}
-		}
-	}
-	json.NewDecoder(resp.Body).Decode(&j)
-	found := make(map[string]bool)
-	names := map[string]bool{}
-	for _, g := range j {
-		names[g.Name] = true
-		for _, m := range g.Metric {
-			if len(m.Label) == 2 && m.Label[0].Name == "code" && m.Label[0].Value == "200" && m.Label[1].Name == "method" && m.Label[1].Value == "put" {
-				c.Check(m.Summary.SampleCount, check.Equals, "2")
-				found[g.Name] = true
-			}
+		resp = call(router, "GET", "/mounts/"+mntList[1].UUID+"/blocks", tok, nil, nil)
+		if tok == "" {
+			c.Check(resp.Code, Equals, http.StatusUnauthorized)
+			c.Check(resp.Body.String(), Equals, "Unauthorized\n")
+		} else {
+			c.Check(resp.Code, Equals, http.StatusForbidden)
+			c.Check(resp.Body.String(), Equals, "Forbidden\n")
 		}
 	}
 
-	metricsNames := []string{
-		"arvados_keepstore_bufferpool_inuse_buffers",
-		"arvados_keepstore_bufferpool_max_buffers",
-		"arvados_keepstore_bufferpool_allocated_bytes",
-		"arvados_keepstore_pull_queue_inprogress_entries",
-		"arvados_keepstore_pull_queue_pending_entries",
-		"arvados_keepstore_trash_queue_inprogress_entries",
-		"arvados_keepstore_trash_queue_pending_entries",
-		"request_duration_seconds",
-	}
-	for _, m := range metricsNames {
-		_, ok := names[m]
-		c.Check(ok, check.Equals, true, check.Commentf("checking metric %q", m))
-	}
-}
-
-func (s *HandlerSuite) call(method, path, tok string, body []byte) *httptest.ResponseRecorder {
-	resp := httptest.NewRecorder()
-	req, _ := http.NewRequest(method, path, bytes.NewReader(body))
-	if tok != "" {
-		req.Header.Set("Authorization", "Bearer "+tok)
-	}
-	s.handler.ServeHTTP(resp, req)
-	return resp
+	c.Logf("=== nonexistent mount UUID")
+	resp = call(router, "GET", "/mounts/X/blocks", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusNotFound)
+
+	c.Logf("=== complete index of first mount")
+	resp = call(router, "GET", "/mounts/"+mntList[0].UUID+"/blocks", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Matches, fooHash+`\+[0-9]+ [0-9]+\n\n`)
+
+	c.Logf("=== partial index of first mount (one block matches prefix)")
+	resp = call(router, "GET", "/mounts/"+mntList[0].UUID+"/blocks?prefix="+fooHash[:2], s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Matches, fooHash+`\+[0-9]+ [0-9]+\n\n`)
+
+	c.Logf("=== complete index of second mount (note trailing slash)")
+	resp = call(router, "GET", "/mounts/"+mntList[1].UUID+"/blocks/", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Matches, barHash+`\+[0-9]+ [0-9]+\n\n`)
+
+	c.Logf("=== partial index of second mount (no blocks match prefix)")
+	resp = call(router, "GET", "/mounts/"+mntList[1].UUID+"/blocks/?prefix="+fooHash[:2], s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "\n")
 }
diff --git a/services/keepstore/perms.go b/services/keepstore/perms.go
deleted file mode 100644
index 7205a4594d..0000000000
--- a/services/keepstore/perms.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"time"
-
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
-)
-
-// SignLocator takes a blobLocator, an apiToken and an expiry time, and
-// returns a signed locator string.
-func SignLocator(cluster *arvados.Cluster, blobLocator, apiToken string, expiry time.Time) string {
-	return keepclient.SignLocator(blobLocator, apiToken, expiry, cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey))
-}
-
-// VerifySignature returns nil if the signature on the signedLocator
-// can be verified using the given apiToken. Otherwise it returns
-// either ExpiredError (if the timestamp has expired, which is
-// something the client could have figured out independently) or
-// PermissionError.
-func VerifySignature(cluster *arvados.Cluster, signedLocator, apiToken string) error {
-	err := keepclient.VerifySignature(signedLocator, apiToken, cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey))
-	if err == keepclient.ErrSignatureExpired {
-		return ExpiredError
-	} else if err != nil {
-		return PermissionError
-	}
-	return nil
-}
diff --git a/services/keepstore/perms_test.go b/services/keepstore/perms_test.go
deleted file mode 100644
index 1322374706..0000000000
--- a/services/keepstore/perms_test.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"strconv"
-	"time"
-
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	check "gopkg.in/check.v1"
-)
-
-const (
-	knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
-	knownLocator = knownHash + "+3"
-	knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
-	knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
-		"p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
-		"ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
-		"jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
-		"gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
-		"vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
-		"786u5rw2a9gx743dj3fgq2irk"
-	knownSignatureTTL  = arvados.Duration(24 * 14 * time.Hour)
-	knownSignature     = "89118b78732c33104a4d6231e8b5a5fa1e4301e3"
-	knownTimestamp     = "7fffffff"
-	knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
-	knownSignedLocator = knownLocator + knownSigHint
-)
-
-func (s *HandlerSuite) TestSignLocator(c *check.C) {
-	tsInt, err := strconv.ParseInt(knownTimestamp, 16, 0)
-	if err != nil {
-		c.Fatal(err)
-	}
-	t0 := time.Unix(tsInt, 0)
-
-	s.cluster.Collections.BlobSigningTTL = knownSignatureTTL
-	s.cluster.Collections.BlobSigningKey = knownKey
-	if x := SignLocator(s.cluster, knownLocator, knownToken, t0); x != knownSignedLocator {
-		c.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
-	}
-
-	s.cluster.Collections.BlobSigningKey = "arbitrarykey"
-	if x := SignLocator(s.cluster, knownLocator, knownToken, t0); x == knownSignedLocator {
-		c.Fatalf("Got same signature %+q, even though blobSigningKey changed", x)
-	}
-}
-
-func (s *HandlerSuite) TestVerifyLocator(c *check.C) {
-	s.cluster.Collections.BlobSigningTTL = knownSignatureTTL
-	s.cluster.Collections.BlobSigningKey = knownKey
-	if err := VerifySignature(s.cluster, knownSignedLocator, knownToken); err != nil {
-		c.Fatal(err)
-	}
-
-	s.cluster.Collections.BlobSigningKey = "arbitrarykey"
-	if err := VerifySignature(s.cluster, knownSignedLocator, knownToken); err == nil {
-		c.Fatal("Verified signature even with wrong blobSigningKey")
-	}
-}
diff --git a/services/keepstore/pipe_adapters.go b/services/keepstore/pipe_adapters.go
deleted file mode 100644
index 6b555054b6..0000000000
--- a/services/keepstore/pipe_adapters.go
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"bytes"
-	"context"
-	"io"
-	"io/ioutil"
-)
-
-// getWithPipe invokes getter and copies the resulting data into
-// buf. If ctx is done before all data is copied, getWithPipe closes
-// the pipe with an error, and returns early with an error.
-func getWithPipe(ctx context.Context, loc string, buf []byte, br BlockReader) (int, error) {
-	piper, pipew := io.Pipe()
-	go func() {
-		pipew.CloseWithError(br.ReadBlock(ctx, loc, pipew))
-	}()
-	done := make(chan struct{})
-	var size int
-	var err error
-	go func() {
-		size, err = io.ReadFull(piper, buf)
-		if err == io.EOF || err == io.ErrUnexpectedEOF {
-			err = nil
-		}
-		close(done)
-	}()
-	select {
-	case <-ctx.Done():
-		piper.CloseWithError(ctx.Err())
-		return 0, ctx.Err()
-	case <-done:
-		piper.Close()
-		return size, err
-	}
-}
-
-// putWithPipe invokes putter with a new pipe, and copies data
-// from buf into the pipe. If ctx is done before all data is copied,
-// putWithPipe closes the pipe with an error, and returns early with
-// an error.
-func putWithPipe(ctx context.Context, loc string, buf []byte, bw BlockWriter) error {
-	piper, pipew := io.Pipe()
-	copyErr := make(chan error)
-	go func() {
-		_, err := io.Copy(pipew, bytes.NewReader(buf))
-		copyErr <- err
-		close(copyErr)
-	}()
-
-	putErr := make(chan error, 1)
-	go func() {
-		putErr <- bw.WriteBlock(ctx, loc, piper)
-		close(putErr)
-	}()
-
-	var err error
-	select {
-	case err = <-copyErr:
-	case err = <-putErr:
-	case <-ctx.Done():
-		err = ctx.Err()
-	}
-
-	// Ensure io.Copy goroutine isn't blocked writing to pipew
-	// (otherwise, io.Copy is still using buf so it isn't safe to
-	// return). This can cause pipew to receive corrupt data if
-	// err came from copyErr or ctx.Done() before the copy
-	// finished. That's OK, though: in that case err != nil, and
-	// CloseWithErr(err) ensures putter() will get an error from
-	// piper.Read() before seeing EOF.
-	go pipew.CloseWithError(err)
-	go io.Copy(ioutil.Discard, piper)
-	<-copyErr
-
-	// Note: io.Copy() is finished now, but putter() might still
-	// be running. If we encounter an error before putter()
-	// returns, we return right away without waiting for putter().
-
-	if err != nil {
-		return err
-	}
-	select {
-	case <-ctx.Done():
-		return ctx.Err()
-	case err = <-putErr:
-		return err
-	}
-}
diff --git a/services/keepstore/proxy_remote.go b/services/keepstore/proxy_remote.go
deleted file mode 100644
index 325f1cf485..0000000000
--- a/services/keepstore/proxy_remote.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"context"
-	"errors"
-	"io"
-	"net/http"
-	"regexp"
-	"strings"
-	"sync"
-	"time"
-
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/arvadosclient"
-	"git.arvados.org/arvados.git/sdk/go/auth"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
-)
-
-type remoteProxy struct {
-	clients map[string]*keepclient.KeepClient
-	mtx     sync.Mutex
-}
-
-func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.Request, cluster *arvados.Cluster, volmgr *RRVolumeManager) {
-	// Intervening proxies must not return a cached GET response
-	// to a prior request if a X-Keep-Signature request header has
-	// been added or changed.
-	w.Header().Add("Vary", "X-Keep-Signature")
-
-	token := GetAPIToken(r)
-	if token == "" {
-		http.Error(w, "no token provided in Authorization header", http.StatusUnauthorized)
-		return
-	}
-	if strings.SplitN(r.Header.Get("X-Keep-Signature"), ",", 2)[0] == "local" {
-		buf, err := getBufferWithContext(ctx, bufs, BlockSize)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusServiceUnavailable)
-			return
-		}
-		defer bufs.Put(buf)
-		rrc := &remoteResponseCacher{
-			Locator:        r.URL.Path[1:],
-			Token:          token,
-			Buffer:         buf[:0],
-			ResponseWriter: w,
-			Context:        ctx,
-			Cluster:        cluster,
-			VolumeManager:  volmgr,
-		}
-		defer rrc.Close()
-		w = rrc
-	}
-	var remoteClient *keepclient.KeepClient
-	var parts []string
-	for i, part := range strings.Split(r.URL.Path[1:], "+") {
-		switch {
-		case i == 0:
-			// don't try to parse hash part as hint
-		case strings.HasPrefix(part, "A"):
-			// drop local permission hint
-			continue
-		case len(part) > 7 && part[0] == 'R' && part[6] == '-':
-			remoteID := part[1:6]
-			remote, ok := cluster.RemoteClusters[remoteID]
-			if !ok {
-				http.Error(w, "remote cluster not configured", http.StatusBadRequest)
-				return
-			}
-			kc, err := rp.remoteClient(remoteID, remote, token)
-			if err == auth.ErrObsoleteToken {
-				http.Error(w, err.Error(), http.StatusBadRequest)
-				return
-			} else if err != nil {
-				http.Error(w, err.Error(), http.StatusInternalServerError)
-				return
-			}
-			remoteClient = kc
-			part = "A" + part[7:]
-		}
-		parts = append(parts, part)
-	}
-	if remoteClient == nil {
-		http.Error(w, "bad request", http.StatusBadRequest)
-		return
-	}
-	locator := strings.Join(parts, "+")
-	rdr, _, _, err := remoteClient.Get(locator)
-	switch err.(type) {
-	case nil:
-		defer rdr.Close()
-		io.Copy(w, rdr)
-	case *keepclient.ErrNotFound:
-		http.Error(w, err.Error(), http.StatusNotFound)
-	default:
-		http.Error(w, err.Error(), http.StatusBadGateway)
-	}
-}
-
-func (rp *remoteProxy) remoteClient(remoteID string, remoteCluster arvados.RemoteCluster, token string) (*keepclient.KeepClient, error) {
-	rp.mtx.Lock()
-	kc, ok := rp.clients[remoteID]
-	rp.mtx.Unlock()
-	if !ok {
-		c := &arvados.Client{
-			APIHost:   remoteCluster.Host,
-			AuthToken: "xxx",
-			Insecure:  remoteCluster.Insecure,
-		}
-		ac, err := arvadosclient.New(c)
-		if err != nil {
-			return nil, err
-		}
-		kc, err = keepclient.MakeKeepClient(ac)
-		if err != nil {
-			return nil, err
-		}
-		kc.DiskCacheSize = keepclient.DiskCacheDisabled
-
-		rp.mtx.Lock()
-		if rp.clients == nil {
-			rp.clients = map[string]*keepclient.KeepClient{remoteID: kc}
-		} else {
-			rp.clients[remoteID] = kc
-		}
-		rp.mtx.Unlock()
-	}
-	accopy := *kc.Arvados
-	accopy.ApiToken = token
-	kccopy := kc.Clone()
-	kccopy.Arvados = &accopy
-	token, err := auth.SaltToken(token, remoteID)
-	if err != nil {
-		return nil, err
-	}
-	kccopy.Arvados.ApiToken = token
-	return kccopy, nil
-}
-
-var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^\+]*`)
-
-// remoteResponseCacher wraps http.ResponseWriter. It buffers the
-// response data in the provided buffer, writes/touches a copy on a
-// local volume, adds a response header with a locally-signed locator,
-// and finally writes the data through.
-type remoteResponseCacher struct {
-	Locator       string
-	Token         string
-	Buffer        []byte
-	Context       context.Context
-	Cluster       *arvados.Cluster
-	VolumeManager *RRVolumeManager
-	http.ResponseWriter
-	statusCode int
-}
-
-func (rrc *remoteResponseCacher) Write(p []byte) (int, error) {
-	if len(rrc.Buffer)+len(p) > cap(rrc.Buffer) {
-		return 0, errors.New("buffer full")
-	}
-	rrc.Buffer = append(rrc.Buffer, p...)
-	return len(p), nil
-}
-
-func (rrc *remoteResponseCacher) WriteHeader(statusCode int) {
-	rrc.statusCode = statusCode
-}
-
-func (rrc *remoteResponseCacher) Close() error {
-	if rrc.statusCode == 0 {
-		rrc.statusCode = http.StatusOK
-	} else if rrc.statusCode != http.StatusOK {
-		rrc.ResponseWriter.WriteHeader(rrc.statusCode)
-		rrc.ResponseWriter.Write(rrc.Buffer)
-		return nil
-	}
-	_, err := PutBlock(rrc.Context, rrc.VolumeManager, rrc.Buffer, rrc.Locator[:32], nil)
-	if rrc.Context.Err() != nil {
-		// If caller hung up, log that instead of subsequent/misleading errors.
-		http.Error(rrc.ResponseWriter, rrc.Context.Err().Error(), http.StatusGatewayTimeout)
-		return err
-	}
-	if err == RequestHashError {
-		http.Error(rrc.ResponseWriter, "checksum mismatch in remote response", http.StatusBadGateway)
-		return err
-	}
-	if err, ok := err.(*KeepError); ok {
-		http.Error(rrc.ResponseWriter, err.Error(), err.HTTPCode)
-		return err
-	}
-	if err != nil {
-		http.Error(rrc.ResponseWriter, err.Error(), http.StatusBadGateway)
-		return err
-	}
-
-	unsigned := localOrRemoteSignature.ReplaceAllLiteralString(rrc.Locator, "")
-	expiry := time.Now().Add(rrc.Cluster.Collections.BlobSigningTTL.Duration())
-	signed := SignLocator(rrc.Cluster, unsigned, rrc.Token, expiry)
-	if signed == unsigned {
-		err = errors.New("could not sign locator")
-		http.Error(rrc.ResponseWriter, err.Error(), http.StatusInternalServerError)
-		return err
-	}
-	rrc.Header().Set("X-Keep-Locator", signed)
-	rrc.ResponseWriter.WriteHeader(rrc.statusCode)
-	_, err = rrc.ResponseWriter.Write(rrc.Buffer)
-	return err
-}
diff --git a/services/keepstore/proxy_remote_test.go b/services/keepstore/proxy_remote_test.go
index 534371cc0e..886754e14a 100644
--- a/services/keepstore/proxy_remote_test.go
+++ b/services/keepstore/proxy_remote_test.go
@@ -5,7 +5,6 @@
 package keepstore
 
 import (
-	"context"
 	"crypto/md5"
 	"encoding/json"
 	"fmt"
@@ -20,16 +19,18 @@ import (
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"git.arvados.org/arvados.git/sdk/go/arvadostest"
 	"git.arvados.org/arvados.git/sdk/go/auth"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"git.arvados.org/arvados.git/sdk/go/httpserver"
 	"git.arvados.org/arvados.git/sdk/go/keepclient"
 	"github.com/prometheus/client_golang/prometheus"
 	check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&ProxyRemoteSuite{})
+var _ = check.Suite(&proxyRemoteSuite{})
 
-type ProxyRemoteSuite struct {
+type proxyRemoteSuite struct {
 	cluster *arvados.Cluster
-	handler *handler
+	handler *router
 
 	remoteClusterID      string
 	remoteBlobSigningKey []byte
@@ -40,7 +41,7 @@ type ProxyRemoteSuite struct {
 	remoteAPI            *httptest.Server
 }
 
-func (s *ProxyRemoteSuite) remoteKeepproxyHandler(w http.ResponseWriter, r *http.Request) {
+func (s *proxyRemoteSuite) remoteKeepproxyHandler(w http.ResponseWriter, r *http.Request) {
 	expectToken, err := auth.SaltToken(arvadostest.ActiveTokenV2, s.remoteClusterID)
 	if err != nil {
 		panic(err)
@@ -57,7 +58,7 @@ func (s *ProxyRemoteSuite) remoteKeepproxyHandler(w http.ResponseWriter, r *http
 	http.Error(w, "404", 404)
 }
 
-func (s *ProxyRemoteSuite) remoteAPIHandler(w http.ResponseWriter, r *http.Request) {
+func (s *proxyRemoteSuite) remoteAPIHandler(w http.ResponseWriter, r *http.Request) {
 	host, port, _ := net.SplitHostPort(strings.Split(s.remoteKeepproxy.URL, "//")[1])
 	portnum, _ := strconv.Atoi(port)
 	if r.URL.Path == "/arvados/v1/discovery/v1/rest" {
@@ -81,15 +82,13 @@ func (s *ProxyRemoteSuite) remoteAPIHandler(w http.ResponseWriter, r *http.Reque
 	http.Error(w, "404", 404)
 }
 
-func (s *ProxyRemoteSuite) SetUpTest(c *check.C) {
+func (s *proxyRemoteSuite) SetUpTest(c *check.C) {
 	s.remoteClusterID = "z0000"
 	s.remoteBlobSigningKey = []byte("3b6df6fb6518afe12922a5bc8e67bf180a358bc8")
-	s.remoteKeepproxy = httptest.NewServer(http.HandlerFunc(s.remoteKeepproxyHandler))
+	s.remoteKeepproxy = httptest.NewServer(httpserver.LogRequests(http.HandlerFunc(s.remoteKeepproxyHandler)))
 	s.remoteAPI = httptest.NewUnstartedServer(http.HandlerFunc(s.remoteAPIHandler))
 	s.remoteAPI.StartTLS()
 	s.cluster = testCluster(c)
-	s.cluster.Collections.BlobSigningKey = knownKey
-	s.cluster.SystemRootToken = arvadostest.SystemRootToken
 	s.cluster.RemoteClusters = map[string]arvados.RemoteCluster{
 		s.remoteClusterID: {
 			Host:     strings.Split(s.remoteAPI.URL, "//")[1],
@@ -98,17 +97,21 @@ func (s *ProxyRemoteSuite) SetUpTest(c *check.C) {
 			Insecure: true,
 		},
 	}
-	s.cluster.Volumes = map[string]arvados.Volume{"zzzzz-nyw5e-000000000000000": {Driver: "mock"}}
-	s.handler = &handler{}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+	s.cluster.Volumes = map[string]arvados.Volume{"zzzzz-nyw5e-000000000000000": {Driver: "stub"}}
 }
 
-func (s *ProxyRemoteSuite) TearDownTest(c *check.C) {
+func (s *proxyRemoteSuite) TearDownTest(c *check.C) {
 	s.remoteAPI.Close()
 	s.remoteKeepproxy.Close()
 }
 
-func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
+func (s *proxyRemoteSuite) TestProxyRemote(c *check.C) {
+	reg := prometheus.NewRegistry()
+	router, cancel := testRouter(c, s.cluster, reg)
+	defer cancel()
+	instrumented := httpserver.Instrument(reg, ctxlog.TestLogger(c), router)
+	handler := httpserver.LogRequests(instrumented.ServeAPI(s.cluster.ManagementToken, instrumented))
+
 	data := []byte("foo bar")
 	s.remoteKeepData = data
 	locator := fmt.Sprintf("%x+%d", md5.Sum(data), len(data))
@@ -172,7 +175,7 @@ func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
 			expectSignature:  true,
 		},
 	} {
-		c.Logf("trial: %s", trial.label)
+		c.Logf("=== trial: %s", trial.label)
 
 		s.remoteKeepRequests = 0
 
@@ -184,11 +187,18 @@ func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
 			req.Header.Set("X-Keep-Signature", trial.xKeepSignature)
 		}
 		resp = httptest.NewRecorder()
-		s.handler.ServeHTTP(resp, req)
+		handler.ServeHTTP(resp, req)
 		c.Check(s.remoteKeepRequests, check.Equals, trial.expectRemoteReqs)
-		c.Check(resp.Code, check.Equals, trial.expectCode)
+		if !c.Check(resp.Code, check.Equals, trial.expectCode) {
+			c.Logf("resp.Code %d came with resp.Body %q", resp.Code, resp.Body.String())
+		}
 		if resp.Code == http.StatusOK {
-			c.Check(resp.Body.String(), check.Equals, string(data))
+			if trial.method == "HEAD" {
+				c.Check(resp.Body.String(), check.Equals, "")
+				c.Check(resp.Result().ContentLength, check.Equals, int64(len(data)))
+			} else {
+				c.Check(resp.Body.String(), check.Equals, string(data))
+			}
 		} else {
 			c.Check(resp.Body.String(), check.Not(check.Equals), string(data))
 		}
@@ -203,13 +213,13 @@ func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
 
 		c.Check(locHdr, check.Not(check.Equals), "")
 		c.Check(locHdr, check.Not(check.Matches), `.*\+R.*`)
-		c.Check(VerifySignature(s.cluster, locHdr, trial.token), check.IsNil)
+		c.Check(arvados.VerifySignature(locHdr, trial.token, s.cluster.Collections.BlobSigningTTL.Duration(), []byte(s.cluster.Collections.BlobSigningKey)), check.IsNil)
 
 		// Ensure block can be requested using new signature
 		req = httptest.NewRequest("GET", "/"+locHdr, nil)
 		req.Header.Set("Authorization", "Bearer "+trial.token)
 		resp = httptest.NewRecorder()
-		s.handler.ServeHTTP(resp, req)
+		handler.ServeHTTP(resp, req)
 		c.Check(resp.Code, check.Equals, http.StatusOK)
 		c.Check(s.remoteKeepRequests, check.Equals, trial.expectRemoteReqs)
 	}
diff --git a/services/keepstore/pull_worker.go b/services/keepstore/pull_worker.go
index 348bfb4df0..c131de02cb 100644
--- a/services/keepstore/pull_worker.go
+++ b/services/keepstore/pull_worker.go
@@ -5,90 +5,164 @@
 package keepstore
 
 import (
+	"bytes"
 	"context"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"time"
+	"sync"
+	"sync/atomic"
 
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/arvadosclient"
 	"git.arvados.org/arvados.git/sdk/go/keepclient"
+	"github.com/prometheus/client_golang/prometheus"
 )
 
-// RunPullWorker receives PullRequests from pullq, invokes
-// PullItemAndProcess on each one. After each PR, it logs a message
-// indicating whether the pull was successful.
-func (h *handler) runPullWorker(pullq *WorkQueue) {
-	for item := range pullq.NextItem {
-		pr := item.(PullRequest)
-		err := h.pullItemAndProcess(pr)
-		pullq.DoneItem <- struct{}{}
-		if err == nil {
-			h.Logger.Printf("Pull %s success", pr)
-		} else {
-			h.Logger.Printf("Pull %s error: %s", pr, err)
-		}
-	}
+type PullListItem struct {
+	Locator   string   `json:"locator"`
+	Servers   []string `json:"servers"`
+	MountUUID string   `json:"mount_uuid"` // Destination mount, or "" for "anywhere"
 }
 
-// PullItemAndProcess executes a pull request by retrieving the
-// specified block from one of the specified servers, and storing it
-// on a local volume.
-//
-// If the PR specifies a non-blank mount UUID, PullItemAndProcess will
-// only attempt to write the data to the corresponding
-// volume. Otherwise it writes to any local volume, as a PUT request
-// would.
-func (h *handler) pullItemAndProcess(pullRequest PullRequest) error {
-	var vol *VolumeMount
-	if uuid := pullRequest.MountUUID; uuid != "" {
-		vol = h.volmgr.Lookup(pullRequest.MountUUID, true)
-		if vol == nil {
-			return fmt.Errorf("pull req has nonexistent mount: %v", pullRequest)
-		}
-	}
+type puller struct {
+	keepstore  *keepstore
+	todo       []PullListItem
+	cond       *sync.Cond // lock guards todo accesses; cond broadcasts when todo becomes non-empty
+	inprogress atomic.Int64
+}
 
-	// Make a private copy of keepClient so we can set
-	// ServiceRoots to the source servers specified in the pull
-	// request.
-	keepClient := h.keepClient.Clone()
-	serviceRoots := make(map[string]string)
-	for _, addr := range pullRequest.Servers {
-		serviceRoots[addr] = addr
+func newPuller(ctx context.Context, keepstore *keepstore, reg *prometheus.Registry) *puller {
+	p := &puller{
+		keepstore: keepstore,
+		cond:      sync.NewCond(&sync.Mutex{}),
 	}
-	keepClient.SetServiceRoots(serviceRoots, nil, nil)
+	reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "pull_queue_pending_entries",
+			Help:      "Number of queued pull requests",
+		},
+		func() float64 {
+			p.cond.L.Lock()
+			defer p.cond.L.Unlock()
+			return float64(len(p.todo))
+		},
+	))
+	reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "pull_queue_inprogress_entries",
+			Help:      "Number of pull requests in progress",
+		},
+		func() float64 {
+			return float64(p.inprogress.Load())
+		},
+	))
+	if len(p.keepstore.mountsW) == 0 {
+		keepstore.logger.Infof("not running pull worker because there are no writable volumes")
+		return p
+	}
+	for i := 0; i < 1 || i < keepstore.cluster.Collections.BlobReplicateConcurrency; i++ {
+		go p.runWorker(ctx)
+	}
+	return p
+}
 
-	signedLocator := SignLocator(h.Cluster, pullRequest.Locator, keepClient.Arvados.ApiToken, time.Now().Add(time.Minute))
+func (p *puller) SetPullList(newlist []PullListItem) {
+	p.cond.L.Lock()
+	p.todo = newlist
+	p.cond.L.Unlock()
+	p.cond.Broadcast()
+}
 
-	reader, _, _, err := GetContent(signedLocator, keepClient)
-	if err != nil {
-		return err
+func (p *puller) runWorker(ctx context.Context) {
+	if len(p.keepstore.mountsW) == 0 {
+		p.keepstore.logger.Infof("not running pull worker because there are no writable volumes")
+		return
 	}
-	if reader == nil {
-		return fmt.Errorf("No reader found for : %s", signedLocator)
+	c, err := arvados.NewClientFromConfig(p.keepstore.cluster)
+	if err != nil {
+		p.keepstore.logger.Errorf("error setting up pull worker: %s", err)
+		return
 	}
-	defer reader.Close()
-
-	readContent, err := ioutil.ReadAll(reader)
+	c.AuthToken = "keepstore-token-used-for-pulling-data-from-same-cluster"
+	ac, err := arvadosclient.New(c)
 	if err != nil {
-		return err
+		p.keepstore.logger.Errorf("error setting up pull worker: %s", err)
+		return
 	}
-
-	if readContent == nil {
-		return fmt.Errorf("Content not found for: %s", signedLocator)
+	keepClient := &keepclient.KeepClient{
+		Arvados:       ac,
+		Want_replicas: 1,
+		DiskCacheSize: keepclient.DiskCacheDisabled,
 	}
+	// Ensure the loop below wakes up and returns when ctx
+	// cancels, even if pull list is empty.
+	go func() {
+		<-ctx.Done()
+		p.cond.Broadcast()
+	}()
+	for {
+		p.cond.L.Lock()
+		for len(p.todo) == 0 && ctx.Err() == nil {
+			p.cond.Wait()
+		}
+		if ctx.Err() != nil {
+			return
+		}
+		item := p.todo[0]
+		p.todo = p.todo[1:]
+		p.inprogress.Add(1)
+		p.cond.L.Unlock()
 
-	return writePulledBlock(h.volmgr, vol, readContent, pullRequest.Locator)
-}
+		func() {
+			defer p.inprogress.Add(-1)
 
-// GetContent fetches the content for the given locator using keepclient.
-var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (io.ReadCloser, int64, string, error) {
-	return keepClient.Get(signedLocator)
-}
+			logger := p.keepstore.logger.WithField("locator", item.Locator)
+
+			li, err := parseLocator(item.Locator)
+			if err != nil {
+				logger.Warn("ignoring pull request for invalid locator")
+				return
+			}
+
+			var dst *mount
+			if item.MountUUID != "" {
+				dst = p.keepstore.mounts[item.MountUUID]
+				if dst == nil {
+					logger.Warnf("ignoring pull list entry for nonexistent mount %s", item.MountUUID)
+					return
+				} else if !dst.AllowWrite {
+					logger.Warnf("ignoring pull list entry for readonly mount %s", item.MountUUID)
+					return
+				}
+			} else {
+				dst = p.keepstore.rendezvous(item.Locator, p.keepstore.mountsW)[0]
+			}
+
+			serviceRoots := make(map[string]string)
+			for _, addr := range item.Servers {
+				serviceRoots[addr] = addr
+			}
+			keepClient.SetServiceRoots(serviceRoots, nil, nil)
+
+			signedLocator := p.keepstore.signLocator(c.AuthToken, item.Locator)
 
-var writePulledBlock = func(volmgr *RRVolumeManager, volume Volume, data []byte, locator string) error {
-	if volume != nil {
-		return volume.Put(context.Background(), locator, data)
+			buf := bytes.NewBuffer(nil)
+			_, err = keepClient.BlockRead(ctx, arvados.BlockReadOptions{
+				Locator: signedLocator,
+				WriteTo: buf,
+			})
+			if err != nil {
+				logger.WithError(err).Warnf("error pulling data from remote servers (%s)", item.Servers)
+				return
+			}
+			err = dst.BlockWrite(ctx, li.hash, buf.Bytes())
+			if err != nil {
+				logger.WithError(err).Warnf("error writing data to %s", dst.UUID)
+				return
+			}
+			logger.Info("block pulled")
+		}()
 	}
-	_, err := PutBlock(context.Background(), volmgr, data, locator, nil)
-	return err
 }
diff --git a/services/keepstore/pull_worker_integration_test.go b/services/keepstore/pull_worker_integration_test.go
deleted file mode 100644
index 3855b4ecd3..0000000000
--- a/services/keepstore/pull_worker_integration_test.go
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"bytes"
-	"context"
-	"errors"
-	"io"
-	"io/ioutil"
-	"strings"
-
-	"git.arvados.org/arvados.git/sdk/go/arvadostest"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
-	"github.com/prometheus/client_golang/prometheus"
-	check "gopkg.in/check.v1"
-)
-
-type PullWorkIntegrationTestData struct {
-	Name     string
-	Locator  string
-	Content  string
-	GetError string
-}
-
-func (s *HandlerSuite) setupPullWorkerIntegrationTest(c *check.C, testData PullWorkIntegrationTestData, wantData bool) PullRequest {
-	arvadostest.StartKeep(2, false)
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	// Put content if the test needs it
-	if wantData {
-		locator, _, err := s.handler.keepClient.PutB([]byte(testData.Content))
-		if err != nil {
-			c.Errorf("Error putting test data in setup for %s %s %v", testData.Content, locator, err)
-		}
-		if locator == "" {
-			c.Errorf("No locator found after putting test data")
-		}
-	}
-
-	// Create pullRequest for the test
-	pullRequest := PullRequest{
-		Locator: testData.Locator,
-	}
-	return pullRequest
-}
-
-// Do a get on a block that is not existing in any of the keep servers.
-// Expect "block not found" error.
-func (s *HandlerSuite) TestPullWorkerIntegration_GetNonExistingLocator(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	testData := PullWorkIntegrationTestData{
-		Name:     "TestPullWorkerIntegration_GetLocator",
-		Locator:  "5d41402abc4b2a76b9719d911017c592",
-		Content:  "hello",
-		GetError: "Block not found",
-	}
-
-	pullRequest := s.setupPullWorkerIntegrationTest(c, testData, false)
-	defer arvadostest.StopKeep(2)
-
-	s.performPullWorkerIntegrationTest(testData, pullRequest, c)
-}
-
-// Do a get on a block that exists on one of the keep servers.
-// The setup method will create this block before doing the get.
-func (s *HandlerSuite) TestPullWorkerIntegration_GetExistingLocator(c *check.C) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	testData := PullWorkIntegrationTestData{
-		Name:     "TestPullWorkerIntegration_GetLocator",
-		Locator:  "5d41402abc4b2a76b9719d911017c592",
-		Content:  "hello",
-		GetError: "",
-	}
-
-	pullRequest := s.setupPullWorkerIntegrationTest(c, testData, true)
-	defer arvadostest.StopKeep(2)
-
-	s.performPullWorkerIntegrationTest(testData, pullRequest, c)
-}
-
-// Perform the test.
-// The test directly invokes the "PullItemAndProcess" rather than
-// putting an item on the pullq so that the errors can be verified.
-func (s *HandlerSuite) performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pullRequest PullRequest, c *check.C) {
-
-	// Override writePulledBlock to mock PutBlock functionality
-	defer func(orig func(*RRVolumeManager, Volume, []byte, string) error) { writePulledBlock = orig }(writePulledBlock)
-	writePulledBlock = func(_ *RRVolumeManager, _ Volume, content []byte, _ string) error {
-		c.Check(string(content), check.Equals, testData.Content)
-		return nil
-	}
-
-	// Override GetContent to mock keepclient Get functionality
-	defer func(orig func(string, *keepclient.KeepClient) (io.ReadCloser, int64, string, error)) {
-		GetContent = orig
-	}(GetContent)
-	GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
-		if testData.GetError != "" {
-			return nil, 0, "", errors.New(testData.GetError)
-		}
-		rdr := ioutil.NopCloser(bytes.NewBufferString(testData.Content))
-		return rdr, int64(len(testData.Content)), "", nil
-	}
-
-	err := s.handler.pullItemAndProcess(pullRequest)
-
-	if len(testData.GetError) > 0 {
-		if (err == nil) || (!strings.Contains(err.Error(), testData.GetError)) {
-			c.Errorf("Got error %v, expected %v", err, testData.GetError)
-		}
-	} else {
-		if err != nil {
-			c.Errorf("Got error %v, expected nil", err)
-		}
-	}
-}
diff --git a/services/keepstore/pull_worker_test.go b/services/keepstore/pull_worker_test.go
index 2626e66d88..d109b56df3 100644
--- a/services/keepstore/pull_worker_test.go
+++ b/services/keepstore/pull_worker_test.go
@@ -7,309 +7,130 @@ package keepstore
 import (
 	"bytes"
 	"context"
+	"crypto/md5"
+	"encoding/json"
 	"errors"
+	"fmt"
 	"io"
-	"io/ioutil"
 	"net/http"
+	"net/http/httptest"
+	"sort"
 	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
-	"github.com/prometheus/client_golang/prometheus"
+	"git.arvados.org/arvados.git/sdk/go/arvadostest"
+	"github.com/sirupsen/logrus"
 	. "gopkg.in/check.v1"
-	check "gopkg.in/check.v1"
 )
 
-var _ = Suite(&PullWorkerTestSuite{})
-
-type PullWorkerTestSuite struct {
-	cluster *arvados.Cluster
-	handler *handler
-
-	testPullLists map[string]string
-	readContent   string
-	readError     error
-	putContent    []byte
-	putError      error
-}
-
-func (s *PullWorkerTestSuite) SetUpTest(c *C) {
-	s.cluster = testCluster(c)
-	s.cluster.Volumes = map[string]arvados.Volume{
-		"zzzzz-nyw5e-000000000000000": {Driver: "mock"},
-		"zzzzz-nyw5e-111111111111111": {Driver: "mock"},
+func (s *routerSuite) TestPullList_Execute(c *C) {
+	remotecluster := testCluster(c)
+	remotecluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-rrrrrrrrrrrrrrr": {Replication: 1, Driver: "stub"},
 	}
-	s.cluster.Collections.BlobReplicateConcurrency = 1
-
-	s.handler = &handler{}
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-
-	s.readContent = ""
-	s.readError = nil
-	s.putContent = []byte{}
-	s.putError = nil
-
-	// When a new pull request arrives, the old one will be overwritten.
-	// This behavior is verified using these two maps in the
-	// "TestPullWorkerPullList_with_two_items_latest_replacing_old"
-	s.testPullLists = make(map[string]string)
-}
-
-var firstPullList = []byte(`[
-		{
-			"locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
-			"servers":[
-				"server_1",
-				"server_2"
-		 	]
-		},{
-			"locator":"37b51d194a7513e45b56f6524f2d51f2+3",
-			"servers":[
-				"server_3"
-		 	]
-		}
-	]`)
-
-var secondPullList = []byte(`[
-		{
-			"locator":"73feffa4b7f6bb68e44cf984c85f6e88+3",
-			"servers":[
-				"server_1",
-				"server_2"
-		 	]
-		}
-	]`)
-
-type PullWorkerTestData struct {
-	name         string
-	req          RequestTester
-	responseCode int
-	responseBody string
-	readContent  string
-	readError    bool
-	putError     bool
-}
-
-// Ensure MountUUID in a pull list is correctly translated to a Volume
-// argument passed to writePulledBlock().
-func (s *PullWorkerTestSuite) TestSpecifyMountUUID(c *C) {
-	defer func(f func(*RRVolumeManager, Volume, []byte, string) error) {
-		writePulledBlock = f
-	}(writePulledBlock)
-	pullq := s.handler.Handler.(*router).pullq
-
-	for _, spec := range []struct {
-		sendUUID     string
-		expectVolume Volume
-	}{
-		{
-			sendUUID:     "",
-			expectVolume: nil,
-		},
-		{
-			sendUUID:     s.handler.volmgr.Mounts()[0].UUID,
-			expectVolume: s.handler.volmgr.Mounts()[0].Volume,
-		},
-	} {
-		writePulledBlock = func(_ *RRVolumeManager, v Volume, _ []byte, _ string) error {
-			c.Check(v, Equals, spec.expectVolume)
-			return nil
+	remoterouter, cancel := testRouter(c, remotecluster, nil)
+	defer cancel()
+	remoteserver := httptest.NewServer(remoterouter)
+	defer remoteserver.Close()
+
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	executePullList := func(pullList []PullListItem) string {
+		var logbuf bytes.Buffer
+		logger := logrus.New()
+		logger.Out = &logbuf
+		router.keepstore.logger = logger
+
+		listjson, err := json.Marshal(pullList)
+		c.Assert(err, IsNil)
+		resp := call(router, "PUT", "http://example/pull", s.cluster.SystemRootToken, listjson, nil)
+		c.Check(resp.Code, Equals, http.StatusOK)
+		for {
+			router.puller.cond.L.Lock()
+			todolen := len(router.puller.todo)
+			router.puller.cond.L.Unlock()
+			if todolen == 0 && router.puller.inprogress.Load() == 0 {
+				break
+			}
+			time.Sleep(time.Millisecond)
 		}
-
-		resp := IssueRequest(s.handler, &RequestTester{
-			uri:      "/pull",
-			apiToken: s.cluster.SystemRootToken,
-			method:   "PUT",
-			requestBody: []byte(`[{
-				"locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
-				"servers":["server_1","server_2"],
-				"mount_uuid":"` + spec.sendUUID + `"}]`),
-		})
-		c.Assert(resp.Code, Equals, http.StatusOK)
-		expectEqualWithin(c, time.Second, 0, func() interface{} {
-			st := pullq.Status()
-			return st.InProgress + st.Queued
-		})
-	}
-}
-
-func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_locators(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorkerPullList_with_two_locators",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 2 pull requests\n",
-		readContent:  "hello",
-		readError:    false,
-		putError:     false,
-	}
-
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_one_locator(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorkerPullList_with_one_locator",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 1 pull requests\n",
-		readContent:  "hola",
-		readError:    false,
-		putError:     false,
-	}
-
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorker_error_on_get_one_locator",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 1 pull requests\n",
-		readContent:  "unused",
-		readError:    true,
-		putError:     false,
+		return logbuf.String()
 	}
 
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorker_error_on_get_two_locators",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 2 pull requests\n",
-		readContent:  "unused",
-		readError:    true,
-		putError:     false,
-	}
-
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorker_error_on_put_one_locator",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 1 pull requests\n",
-		readContent:  "hello hello",
-		readError:    false,
-		putError:     true,
-	}
-
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_two_locators(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorker_error_on_put_two_locators",
-		req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList, ""},
-		responseCode: http.StatusOK,
-		responseBody: "Received 2 pull requests\n",
-		readContent:  "hello again",
-		readError:    false,
-		putError:     true,
-	}
-
-	s.performTest(testData, c)
-}
-
-// In this case, the item will not be placed on pullq
-func (s *PullWorkerTestSuite) TestPullWorker_invalidToken(c *C) {
-	testData := PullWorkerTestData{
-		name:         "TestPullWorkerPullList_with_two_locators",
-		req:          RequestTester{"/pull", "invalidToken", "PUT", firstPullList, ""},
-		responseCode: http.StatusUnauthorized,
-		responseBody: "Unauthorized\n",
-		readContent:  "hello",
-		readError:    false,
-		putError:     false,
-	}
-
-	s.performTest(testData, c)
-}
-
-func (s *PullWorkerTestSuite) performTest(testData PullWorkerTestData, c *C) {
-	pullq := s.handler.Handler.(*router).pullq
-
-	s.testPullLists[testData.name] = testData.responseBody
-
-	processedPullLists := make(map[string]string)
-
-	// Override GetContent to mock keepclient Get functionality
-	defer func(orig func(string, *keepclient.KeepClient) (io.ReadCloser, int64, string, error)) {
-		GetContent = orig
-	}(GetContent)
-	GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
-		c.Assert(getStatusItem(s.handler, "PullQueue", "InProgress"), Equals, float64(1))
-		processedPullLists[testData.name] = testData.responseBody
-		if testData.readError {
-			err = errors.New("Error getting data")
-			s.readError = err
-			return
-		}
-		s.readContent = testData.readContent
-		reader = ioutil.NopCloser(bytes.NewBufferString(testData.readContent))
-		contentLength = int64(len(testData.readContent))
-		return
+	newRemoteBlock := func(datastring string) string {
+		data := []byte(datastring)
+		hash := fmt.Sprintf("%x", md5.Sum(data))
+		locator := fmt.Sprintf("%s+%d", hash, len(data))
+		_, err := remoterouter.keepstore.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+			Hash: hash,
+			Data: data,
+		})
+		c.Assert(err, IsNil)
+		return locator
 	}
 
-	// Override writePulledBlock to mock PutBlock functionality
-	defer func(orig func(*RRVolumeManager, Volume, []byte, string) error) { writePulledBlock = orig }(writePulledBlock)
-	writePulledBlock = func(_ *RRVolumeManager, v Volume, content []byte, locator string) error {
-		if testData.putError {
-			s.putError = errors.New("Error putting data")
-			return s.putError
-		}
-		s.putContent = content
-		return nil
+	mounts := append([]*mount(nil), router.keepstore.mountsR...)
+	sort.Slice(mounts, func(i, j int) bool { return mounts[i].UUID < mounts[j].UUID })
+	var vols []*stubVolume
+	for _, mount := range mounts {
+		vols = append(vols, mount.volume.(*stubVolume))
 	}
 
-	c.Check(getStatusItem(s.handler, "PullQueue", "InProgress"), Equals, float64(0))
-	c.Check(getStatusItem(s.handler, "PullQueue", "Queued"), Equals, float64(0))
-	c.Check(getStatusItem(s.handler, "Version"), Not(Equals), "")
-
-	response := IssueRequest(s.handler, &testData.req)
-	c.Assert(response.Code, Equals, testData.responseCode)
-	c.Assert(response.Body.String(), Equals, testData.responseBody)
+	ctx := authContext(arvadostest.ActiveTokenV2)
 
-	expectEqualWithin(c, time.Second, 0, func() interface{} {
-		st := pullq.Status()
-		return st.InProgress + st.Queued
-	})
+	locator := newRemoteBlock("pull available block to unspecified volume")
+	executePullList([]PullListItem{{
+		Locator: locator,
+		Servers: []string{remoteserver.URL}}})
+	_, err := router.keepstore.BlockRead(ctx, arvados.BlockReadOptions{
+		Locator: router.keepstore.signLocator(arvadostest.ActiveTokenV2, locator),
+		WriteTo: io.Discard})
+	c.Check(err, IsNil)
 
-	if testData.name == "TestPullWorkerPullList_with_two_items_latest_replacing_old" {
-		c.Assert(len(s.testPullLists), Equals, 2)
-		c.Assert(len(processedPullLists), Equals, 1)
-		c.Assert(s.testPullLists["Added_before_actual_test_item"], NotNil)
-		c.Assert(s.testPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
-		c.Assert(processedPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
-	} else {
-		if testData.responseCode == http.StatusOK {
-			c.Assert(len(s.testPullLists), Equals, 1)
-			c.Assert(len(processedPullLists), Equals, 1)
-			c.Assert(s.testPullLists[testData.name], NotNil)
-		} else {
-			c.Assert(len(s.testPullLists), Equals, 1)
-			c.Assert(len(processedPullLists), Equals, 0)
-		}
-	}
-
-	if testData.readError {
-		c.Assert(s.readError, NotNil)
-	} else if testData.responseCode == http.StatusOK {
-		c.Assert(s.readError, IsNil)
-		c.Assert(s.readContent, Equals, testData.readContent)
-		if testData.putError {
-			c.Assert(s.putError, NotNil)
-		} else {
-			c.Assert(s.putError, IsNil)
-			c.Assert(string(s.putContent), Equals, testData.readContent)
-		}
-	}
-
-	expectChannelEmpty(c, pullq.NextItem)
+	locator0 := newRemoteBlock("pull available block to specified volume 0")
+	locator1 := newRemoteBlock("pull available block to specified volume 1")
+	executePullList([]PullListItem{
+		{
+			Locator:   locator0,
+			Servers:   []string{remoteserver.URL},
+			MountUUID: vols[0].params.UUID},
+		{
+			Locator:   locator1,
+			Servers:   []string{remoteserver.URL},
+			MountUUID: vols[1].params.UUID}})
+	c.Check(vols[0].data[locator0[:32]].data, NotNil)
+	c.Check(vols[1].data[locator1[:32]].data, NotNil)
+
+	locator = fooHash + "+3"
+	logs := executePullList([]PullListItem{{
+		Locator: locator,
+		Servers: []string{remoteserver.URL}}})
+	c.Check(logs, Matches, ".*error pulling data from remote servers.*Block not found.*locator=acbd.*\n")
+
+	locator = fooHash + "+3"
+	logs = executePullList([]PullListItem{{
+		Locator: locator,
+		Servers: []string{"http://0.0.0.0:9/"}}})
+	c.Check(logs, Matches, ".*error pulling data from remote servers.*connection refused.*locator=acbd.*\n")
+
+	locator = newRemoteBlock("log error writing to local volume")
+	vols[0].blockWrite = func(context.Context, string, []byte) error { return errors.New("test error") }
+	vols[1].blockWrite = vols[0].blockWrite
+	logs = executePullList([]PullListItem{{
+		Locator: locator,
+		Servers: []string{remoteserver.URL}}})
+	c.Check(logs, Matches, ".*error writing data to zzzzz-nyw5e-.*error=\"test error\".*locator=.*\n")
+	vols[0].blockWrite = nil
+	vols[1].blockWrite = nil
+
+	locator = newRemoteBlock("log error when destination mount does not exist")
+	logs = executePullList([]PullListItem{{
+		Locator:   locator,
+		Servers:   []string{remoteserver.URL},
+		MountUUID: "bogus-mount-uuid"}})
+	c.Check(logs, Matches, ".*ignoring pull list entry for nonexistent mount bogus-mount-uuid.*locator=.*\n")
+
+	logs = executePullList([]PullListItem{})
+	c.Logf("%s", logs)
 }
diff --git a/services/keepstore/putprogress.go b/services/keepstore/putprogress.go
new file mode 100644
index 0000000000..e02b2d09e9
--- /dev/null
+++ b/services/keepstore/putprogress.go
@@ -0,0 +1,101 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"github.com/sirupsen/logrus"
+)
+
+type putProgress struct {
+	classNeeded      map[string]bool
+	classTodo        map[string]bool
+	mountUsed        map[*mount]bool
+	totalReplication int
+	classDone        map[string]int
+}
+
+func (pr *putProgress) Add(mnt *mount) {
+	if pr.mountUsed[mnt] {
+		logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
+		return
+	}
+	pr.mountUsed[mnt] = true
+	pr.totalReplication += mnt.Replication
+	for class := range mnt.StorageClasses {
+		pr.classDone[class] += mnt.Replication
+		delete(pr.classTodo, class)
+	}
+}
+
+func (pr *putProgress) Sub(mnt *mount) {
+	if !pr.mountUsed[mnt] {
+		logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
+		return
+	}
+	pr.mountUsed[mnt] = false
+	pr.totalReplication -= mnt.Replication
+	for class := range mnt.StorageClasses {
+		pr.classDone[class] -= mnt.Replication
+		if pr.classNeeded[class] {
+			pr.classTodo[class] = true
+		}
+	}
+}
+
+func (pr *putProgress) Done() bool {
+	return len(pr.classTodo) == 0 && pr.totalReplication > 0
+}
+
+func (pr *putProgress) Want(mnt *mount) bool {
+	if pr.Done() || pr.mountUsed[mnt] {
+		return false
+	}
+	if len(pr.classTodo) == 0 {
+		// none specified == "any"
+		return true
+	}
+	for class := range mnt.StorageClasses {
+		if pr.classTodo[class] {
+			return true
+		}
+	}
+	return false
+}
+
+func (pr *putProgress) Copy() *putProgress {
+	cp := putProgress{
+		classNeeded:      pr.classNeeded,
+		classTodo:        make(map[string]bool, len(pr.classTodo)),
+		classDone:        make(map[string]int, len(pr.classDone)),
+		mountUsed:        make(map[*mount]bool, len(pr.mountUsed)),
+		totalReplication: pr.totalReplication,
+	}
+	for k, v := range pr.classTodo {
+		cp.classTodo[k] = v
+	}
+	for k, v := range pr.classDone {
+		cp.classDone[k] = v
+	}
+	for k, v := range pr.mountUsed {
+		cp.mountUsed[k] = v
+	}
+	return &cp
+}
+
+func newPutProgress(classes []string) putProgress {
+	pr := putProgress{
+		classNeeded: make(map[string]bool, len(classes)),
+		classTodo:   make(map[string]bool, len(classes)),
+		classDone:   map[string]int{},
+		mountUsed:   map[*mount]bool{},
+	}
+	for _, c := range classes {
+		if c != "" {
+			pr.classNeeded[c] = true
+			pr.classTodo[c] = true
+		}
+	}
+	return pr
+}
diff --git a/services/keepstore/router.go b/services/keepstore/router.go
new file mode 100644
index 0000000000..7ff82aa80f
--- /dev/null
+++ b/services/keepstore/router.go
@@ -0,0 +1,276 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"sync/atomic"
+
+	"git.arvados.org/arvados.git/lib/service"
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/auth"
+	"git.arvados.org/arvados.git/sdk/go/httpserver"
+	"github.com/gorilla/mux"
+)
+
+type router struct {
+	http.Handler
+	keepstore *keepstore
+	puller    *puller
+	trasher   *trasher
+}
+
+func newRouter(keepstore *keepstore, puller *puller, trasher *trasher) service.Handler {
+	rtr := &router{
+		keepstore: keepstore,
+		puller:    puller,
+		trasher:   trasher,
+	}
+	adminonly := func(h http.HandlerFunc) http.HandlerFunc {
+		return auth.RequireLiteralToken(keepstore.cluster.SystemRootToken, h).ServeHTTP
+	}
+
+	r := mux.NewRouter()
+	locatorPath := `/{locator:[0-9a-f]{32}.*}`
+	get := r.Methods(http.MethodGet, http.MethodHead).Subrouter()
+	get.HandleFunc(locatorPath, rtr.handleBlockRead)
+	get.HandleFunc(`/index`, adminonly(rtr.handleIndex))
+	get.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, adminonly(rtr.handleIndex))
+	get.HandleFunc(`/mounts`, adminonly(rtr.handleMounts))
+	get.HandleFunc(`/mounts/{uuid}/blocks`, adminonly(rtr.handleIndex))
+	get.HandleFunc(`/mounts/{uuid}/blocks/{prefix:[0-9a-f]{0,32}}`, adminonly(rtr.handleIndex))
+	put := r.Methods(http.MethodPut).Subrouter()
+	put.HandleFunc(locatorPath, rtr.handleBlockWrite)
+	put.HandleFunc(`/pull`, adminonly(rtr.handlePullList))
+	put.HandleFunc(`/trash`, adminonly(rtr.handleTrashList))
+	put.HandleFunc(`/untrash`+locatorPath, adminonly(rtr.handleUntrash))
+	touch := r.Methods("TOUCH").Subrouter()
+	touch.HandleFunc(locatorPath, adminonly(rtr.handleBlockTouch))
+	delete := r.Methods(http.MethodDelete).Subrouter()
+	delete.HandleFunc(locatorPath, adminonly(rtr.handleBlockTrash))
+	r.NotFoundHandler = http.HandlerFunc(rtr.handleBadRequest)
+	r.MethodNotAllowedHandler = http.HandlerFunc(rtr.handleBadRequest)
+	rtr.Handler = auth.LoadToken(r)
+	return rtr
+}
+
+func (rtr *router) CheckHealth() error {
+	return nil
+}
+
+func (rtr *router) Done() <-chan struct{} {
+	return nil
+}
+
+func (rtr *router) handleBlockRead(w http.ResponseWriter, req *http.Request) {
+	// Intervening proxies must not return a cached GET response
+	// to a prior request if a X-Keep-Signature request header has
+	// been added or changed.
+	w.Header().Add("Vary", "X-Keep-Signature")
+	var localLocator func(string)
+	if strings.SplitN(req.Header.Get("X-Keep-Signature"), ",", 2)[0] == "local" {
+		localLocator = func(locator string) {
+			w.Header().Set("X-Keep-Locator", locator)
+		}
+	}
+	out := w
+	if req.Method == http.MethodHead {
+		out = discardWrite{ResponseWriter: w}
+	} else if li, err := parseLocator(mux.Vars(req)["locator"]); err != nil {
+		rtr.handleError(w, req, err)
+		return
+	} else if li.size == 0 && li.hash != "d41d8cd98f00b204e9800998ecf8427e" {
+		// GET {hash} (with no size hint) is not allowed
+		// because we can't report md5 mismatches.
+		rtr.handleError(w, req, errMethodNotAllowed)
+		return
+	}
+	n, err := rtr.keepstore.BlockRead(req.Context(), arvados.BlockReadOptions{
+		Locator:      mux.Vars(req)["locator"],
+		WriteTo:      out,
+		LocalLocator: localLocator,
+	})
+	if err != nil && (n == 0 || req.Method == http.MethodHead) {
+		rtr.handleError(w, req, err)
+		return
+	}
+}
+
+func (rtr *router) handleBlockWrite(w http.ResponseWriter, req *http.Request) {
+	dataSize, _ := strconv.Atoi(req.Header.Get("Content-Length"))
+	replicas, _ := strconv.Atoi(req.Header.Get("X-Arvados-Replicas-Desired"))
+	resp, err := rtr.keepstore.BlockWrite(req.Context(), arvados.BlockWriteOptions{
+		Hash:           mux.Vars(req)["locator"],
+		Reader:         req.Body,
+		DataSize:       dataSize,
+		RequestID:      req.Header.Get("X-Request-Id"),
+		StorageClasses: trimSplit(req.Header.Get("X-Keep-Storage-Classes"), ","),
+		Replicas:       replicas,
+	})
+	if err != nil {
+		rtr.handleError(w, req, err)
+		return
+	}
+	w.Header().Set("X-Keep-Replicas-Stored", fmt.Sprintf("%d", resp.Replicas))
+	scc := ""
+	for k, n := range resp.StorageClasses {
+		if n > 0 {
+			if scc != "" {
+				scc += "; "
+			}
+			scc += fmt.Sprintf("%s=%d", k, n)
+		}
+	}
+	w.Header().Set("X-Keep-Storage-Classes-Confirmed", scc)
+	w.WriteHeader(http.StatusOK)
+	fmt.Fprintln(w, resp.Locator)
+}
+
+func (rtr *router) handleBlockTouch(w http.ResponseWriter, req *http.Request) {
+	err := rtr.keepstore.BlockTouch(req.Context(), mux.Vars(req)["locator"])
+	rtr.handleError(w, req, err)
+}
+
+func (rtr *router) handleBlockTrash(w http.ResponseWriter, req *http.Request) {
+	err := rtr.keepstore.BlockTrash(req.Context(), mux.Vars(req)["locator"])
+	rtr.handleError(w, req, err)
+}
+
+func (rtr *router) handleMounts(w http.ResponseWriter, req *http.Request) {
+	json.NewEncoder(w).Encode(rtr.keepstore.Mounts())
+}
+
+func (rtr *router) handleIndex(w http.ResponseWriter, req *http.Request) {
+	prefix := req.FormValue("prefix")
+	if prefix == "" {
+		prefix = mux.Vars(req)["prefix"]
+	}
+	cw := &countingWriter{writer: w}
+	err := rtr.keepstore.Index(req.Context(), IndexOptions{
+		MountUUID: mux.Vars(req)["uuid"],
+		Prefix:    prefix,
+		WriteTo:   cw,
+	})
+	if err != nil && cw.n.Load() == 0 {
+		// Nothing was written, so it's not too late to report
+		// an error via http response header. (Otherwise, all
+		// we can do is omit the trailing newline below to
+		// indicate something went wrong.)
+		rtr.handleError(w, req, err)
+		return
+	}
+	if err == nil {
+		// A trailing blank line signals to the caller that
+		// the response is complete.
+		w.Write([]byte("\n"))
+	}
+}
+
+func (rtr *router) handlePullList(w http.ResponseWriter, req *http.Request) {
+	var pl []PullListItem
+	err := json.NewDecoder(req.Body).Decode(&pl)
+	if err != nil {
+		rtr.handleError(w, req, err)
+		return
+	}
+	req.Body.Close()
+	if len(pl) > 0 && len(pl[0].Locator) == 32 {
+		rtr.handleError(w, req, httpserver.ErrorWithStatus(errors.New("rejecting pull list containing a locator without a size hint -- this probably means keep-balance needs to be upgraded"), http.StatusBadRequest))
+		return
+	}
+	rtr.puller.SetPullList(pl)
+}
+
+func (rtr *router) handleTrashList(w http.ResponseWriter, req *http.Request) {
+	var tl []TrashListItem
+	err := json.NewDecoder(req.Body).Decode(&tl)
+	if err != nil {
+		rtr.handleError(w, req, err)
+		return
+	}
+	req.Body.Close()
+	rtr.trasher.SetTrashList(tl)
+}
+
+func (rtr *router) handleUntrash(w http.ResponseWriter, req *http.Request) {
+	err := rtr.keepstore.BlockUntrash(req.Context(), mux.Vars(req)["locator"])
+	rtr.handleError(w, req, err)
+}
+
+func (rtr *router) handleBadRequest(w http.ResponseWriter, req *http.Request) {
+	http.Error(w, "Bad Request", http.StatusBadRequest)
+}
+
+func (rtr *router) handleError(w http.ResponseWriter, req *http.Request, err error) {
+	if req.Context().Err() != nil {
+		w.WriteHeader(499)
+		return
+	}
+	if err == nil {
+		return
+	} else if os.IsNotExist(err) {
+		w.WriteHeader(http.StatusNotFound)
+	} else if statusErr := interface{ HTTPStatus() int }(nil); errors.As(err, &statusErr) {
+		w.WriteHeader(statusErr.HTTPStatus())
+	} else {
+		w.WriteHeader(http.StatusInternalServerError)
+	}
+	fmt.Fprintln(w, err.Error())
+}
+
+type countingWriter struct {
+	writer io.Writer
+	n      atomic.Int64
+}
+
+func (cw *countingWriter) Write(p []byte) (int, error) {
+	n, err := cw.writer.Write(p)
+	cw.n.Add(int64(n))
+	return n, err
+}
+
+// Split s by sep, trim whitespace from each part, and drop empty
+// parts.
+func trimSplit(s, sep string) []string {
+	var r []string
+	for _, part := range strings.Split(s, sep) {
+		part = strings.TrimSpace(part)
+		if part != "" {
+			r = append(r, part)
+		}
+	}
+	return r
+}
+
+// setSizeOnWrite sets the Content-Length header to the given size on
+// first write.
+type setSizeOnWrite struct {
+	http.ResponseWriter
+	size  int
+	wrote bool
+}
+
+func (ss *setSizeOnWrite) Write(p []byte) (int, error) {
+	if !ss.wrote {
+		ss.Header().Set("Content-Length", fmt.Sprintf("%d", ss.size))
+		ss.wrote = true
+	}
+	return ss.ResponseWriter.Write(p)
+}
+
+type discardWrite struct {
+	http.ResponseWriter
+}
+
+func (discardWrite) Write(p []byte) (int, error) {
+	return len(p), nil
+}
diff --git a/services/keepstore/router_test.go b/services/keepstore/router_test.go
new file mode 100644
index 0000000000..a729ee0df3
--- /dev/null
+++ b/services/keepstore/router_test.go
@@ -0,0 +1,510 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"bytes"
+	"context"
+	"crypto/md5"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"sort"
+	"strings"
+	"time"
+
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/arvadostest"
+	"git.arvados.org/arvados.git/sdk/go/httpserver"
+	"github.com/prometheus/client_golang/prometheus"
+	. "gopkg.in/check.v1"
+)
+
+// routerSuite tests that the router correctly translates HTTP
+// requests to the appropriate keepstore functionality, and translates
+// the results to HTTP responses.
+type routerSuite struct {
+	cluster *arvados.Cluster
+}
+
+var _ = Suite(&routerSuite{})
+
+func testRouter(t TB, cluster *arvados.Cluster, reg *prometheus.Registry) (*router, context.CancelFunc) {
+	if reg == nil {
+		reg = prometheus.NewRegistry()
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	ks, kcancel := testKeepstore(t, cluster, reg)
+	go func() {
+		<-ctx.Done()
+		kcancel()
+	}()
+	puller := newPuller(ctx, ks, reg)
+	trasher := newTrasher(ctx, ks, reg)
+	return newRouter(ks, puller, trasher).(*router), cancel
+}
+
+func (s *routerSuite) SetUpTest(c *C) {
+	s.cluster = testCluster(c)
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub", StorageClasses: map[string]bool{"testclass1": true}},
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub", StorageClasses: map[string]bool{"testclass2": true}},
+	}
+	s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+		"testclass1": arvados.StorageClassConfig{
+			Default: true,
+		},
+		"testclass2": arvados.StorageClassConfig{
+			Default: true,
+		},
+	}
+}
+
+func (s *routerSuite) TestBlockRead_Token(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	err := router.keepstore.mountsW[0].BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+	locSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, fooHash+"+3")
+	c.Assert(locSigned, Not(Equals), fooHash+"+3")
+
+	// No token provided
+	resp := call(router, "GET", "http://example/"+locSigned, "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusUnauthorized)
+	c.Check(resp.Body.String(), Matches, "no token provided in Authorization header\n")
+
+	// Different token => invalid signature
+	resp = call(router, "GET", "http://example/"+locSigned, "badtoken", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusBadRequest)
+	c.Check(resp.Body.String(), Equals, "invalid signature\n")
+
+	// Correct token
+	resp = call(router, "GET", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "foo")
+
+	// HEAD
+	resp = call(router, "HEAD", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Result().ContentLength, Equals, int64(3))
+	c.Check(resp.Body.String(), Equals, "")
+}
+
+// As a special case we allow HEAD requests that only provide a hash
+// without a size hint. This accommodates uses of keep-block-check
+// where it's inconvenient to attach size hints to known hashes.
+//
+// GET requests must provide a size hint -- otherwise we can't
+// propagate a checksum mismatch error.
+func (s *routerSuite) TestBlockRead_NoSizeHint(c *C) {
+	s.cluster.Collections.BlobSigning = true
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+	err := router.keepstore.mountsW[0].BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+
+	// hash+signature
+	hashSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, fooHash)
+	resp := call(router, "GET", "http://example/"+hashSigned, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusMethodNotAllowed)
+
+	resp = call(router, "HEAD", "http://example/"+fooHash, "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusUnauthorized)
+	resp = call(router, "HEAD", "http://example/"+fooHash+"+3", "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusUnauthorized)
+
+	s.cluster.Collections.BlobSigning = false
+	router, cancel = testRouter(c, s.cluster, nil)
+	defer cancel()
+	err = router.keepstore.mountsW[0].BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+
+	resp = call(router, "GET", "http://example/"+fooHash, "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusMethodNotAllowed)
+
+	resp = call(router, "HEAD", "http://example/"+fooHash, "", nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "")
+	c.Check(resp.Result().ContentLength, Equals, int64(3))
+	c.Check(resp.Header().Get("Content-Length"), Equals, "3")
+}
+
+// By the time we discover the checksum mismatch, it's too late to
+// change the response code, but the expected block size is given in
+// the Content-Length response header, so a generic http client can
+// detect the problem.
+func (s *routerSuite) TestBlockRead_ChecksumMismatch(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	gooddata := make([]byte, 10_000_000)
+	gooddata[0] = 'a'
+	hash := fmt.Sprintf("%x", md5.Sum(gooddata))
+	locSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, fmt.Sprintf("%s+%d", hash, len(gooddata)))
+
+	for _, baddata := range [][]byte{
+		make([]byte, 3),
+		make([]byte, len(gooddata)),
+		make([]byte, len(gooddata)-1),
+		make([]byte, len(gooddata)+1),
+		make([]byte, len(gooddata)*2),
+	} {
+		c.Logf("=== baddata len %d", len(baddata))
+		err := router.keepstore.mountsW[0].BlockWrite(context.Background(), hash, baddata)
+		c.Assert(err, IsNil)
+
+		resp := call(router, "GET", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+		if !c.Check(resp.Code, Equals, http.StatusOK) {
+			c.Logf("resp.Body: %s", resp.Body.String())
+		}
+		c.Check(resp.Body.Len(), Not(Equals), len(gooddata))
+		c.Check(resp.Result().ContentLength, Equals, int64(len(gooddata)))
+
+		resp = call(router, "HEAD", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusBadGateway)
+
+		hashSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, hash)
+		resp = call(router, "HEAD", "http://example/"+hashSigned, arvadostest.ActiveTokenV2, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusBadGateway)
+	}
+}
+
+func (s *routerSuite) TestBlockWrite(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	resp := call(router, "PUT", "http://example/"+fooHash, arvadostest.ActiveTokenV2, []byte("foo"), nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	locator := strings.TrimSpace(resp.Body.String())
+
+	resp = call(router, "GET", "http://example/"+locator, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "foo")
+}
+
+func (s *routerSuite) TestBlockWrite_Headers(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	resp := call(router, "PUT", "http://example/"+fooHash, arvadostest.ActiveTokenV2, []byte("foo"), http.Header{"X-Arvados-Replicas-Desired": []string{"2"}})
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Header().Get("X-Keep-Replicas-Stored"), Equals, "1")
+	c.Check(sortCommaSeparated(resp.Header().Get("X-Keep-Storage-Classes-Confirmed")), Equals, "testclass1=1")
+
+	resp = call(router, "PUT", "http://example/"+fooHash, arvadostest.ActiveTokenV2, []byte("foo"), http.Header{"X-Keep-Storage-Classes": []string{"testclass1"}})
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Header().Get("X-Keep-Replicas-Stored"), Equals, "1")
+	c.Check(resp.Header().Get("X-Keep-Storage-Classes-Confirmed"), Equals, "testclass1=1")
+
+	resp = call(router, "PUT", "http://example/"+fooHash, arvadostest.ActiveTokenV2, []byte("foo"), http.Header{"X-Keep-Storage-Classes": []string{" , testclass2 , "}})
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Header().Get("X-Keep-Replicas-Stored"), Equals, "1")
+	c.Check(resp.Header().Get("X-Keep-Storage-Classes-Confirmed"), Equals, "testclass2=1")
+}
+
+func sortCommaSeparated(s string) string {
+	slice := strings.Split(s, ", ")
+	sort.Strings(slice)
+	return strings.Join(slice, ", ")
+}
+
+func (s *routerSuite) TestBlockTouch(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	resp := call(router, "TOUCH", "http://example/"+fooHash+"+3", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusNotFound)
+
+	vol0 := router.keepstore.mountsW[0].volume.(*stubVolume)
+	err := vol0.BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+	vol1 := router.keepstore.mountsW[1].volume.(*stubVolume)
+	err = vol1.BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+
+	t1 := time.Now()
+	resp = call(router, "TOUCH", "http://example/"+fooHash+"+3", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	t2 := time.Now()
+
+	// Unauthorized request is a no-op
+	resp = call(router, "TOUCH", "http://example/"+fooHash+"+3", arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusForbidden)
+
+	// Volume 0 mtime should be updated
+	t, err := vol0.Mtime(fooHash)
+	c.Check(err, IsNil)
+	c.Check(t.After(t1), Equals, true)
+	c.Check(t.Before(t2), Equals, true)
+
+	// Volume 1 mtime should not be updated
+	t, err = vol1.Mtime(fooHash)
+	c.Check(err, IsNil)
+	c.Check(t.Before(t1), Equals, true)
+
+	err = vol0.BlockTrash(fooHash)
+	c.Assert(err, IsNil)
+	err = vol1.BlockTrash(fooHash)
+	c.Assert(err, IsNil)
+	resp = call(router, "TOUCH", "http://example/"+fooHash+"+3", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusNotFound)
+}
+
+func (s *routerSuite) TestBlockTrash(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	vol0 := router.keepstore.mountsW[0].volume.(*stubVolume)
+	err := vol0.BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+	err = vol0.blockTouchWithTime(fooHash, time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration()))
+	c.Assert(err, IsNil)
+	resp := call(router, "DELETE", "http://example/"+fooHash+"+3", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(vol0.stubLog.String(), Matches, `(?ms).* trash .*`)
+	_, err = vol0.BlockRead(context.Background(), fooHash, io.Discard)
+	c.Assert(err, Equals, os.ErrNotExist)
+}
+
+func (s *routerSuite) TestBlockUntrash(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	vol0 := router.keepstore.mountsW[0].volume.(*stubVolume)
+	err := vol0.BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+	err = vol0.BlockTrash(fooHash)
+	c.Assert(err, IsNil)
+	_, err = vol0.BlockRead(context.Background(), fooHash, io.Discard)
+	c.Assert(err, Equals, os.ErrNotExist)
+	resp := call(router, "PUT", "http://example/untrash/"+fooHash+"+3", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(vol0.stubLog.String(), Matches, `(?ms).* untrash .*`)
+	_, err = vol0.BlockRead(context.Background(), fooHash, io.Discard)
+	c.Check(err, IsNil)
+}
+
+func (s *routerSuite) TestBadRequest(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	for _, trial := range []string{
+		"GET /",
+		"GET /xyz",
+		"GET /aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcdefg",
+		"GET /untrash",
+		"GET /mounts/blocks/123",
+		"GET /trash",
+		"GET /pull",
+		"GET /debug.json",
+		"GET /status.json",
+		"POST /",
+		"POST /aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"POST /trash",
+		"PROPFIND /",
+		"MAKE-COFFEE /",
+	} {
+		c.Logf("=== %s", trial)
+		methodpath := strings.Split(trial, " ")
+		req := httptest.NewRequest(methodpath[0], "http://example"+methodpath[1], nil)
+		resp := httptest.NewRecorder()
+		router.ServeHTTP(resp, req)
+		c.Check(resp.Code, Equals, http.StatusBadRequest)
+	}
+}
+
+func (s *routerSuite) TestRequireAdminMgtToken(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	for _, token := range []string{"badtoken", ""} {
+		for _, trial := range []string{
+			"PUT /pull",
+			"PUT /trash",
+			"GET /index",
+			"GET /index/",
+			"GET /index/1234",
+			"PUT /untrash/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		} {
+			c.Logf("=== %s", trial)
+			methodpath := strings.Split(trial, " ")
+			req := httptest.NewRequest(methodpath[0], "http://example"+methodpath[1], nil)
+			if token != "" {
+				req.Header.Set("Authorization", "Bearer "+token)
+			}
+			resp := httptest.NewRecorder()
+			router.ServeHTTP(resp, req)
+			if token == "" {
+				c.Check(resp.Code, Equals, http.StatusUnauthorized)
+			} else {
+				c.Check(resp.Code, Equals, http.StatusForbidden)
+			}
+		}
+	}
+	req := httptest.NewRequest("TOUCH", "http://example/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", nil)
+	resp := httptest.NewRecorder()
+	router.ServeHTTP(resp, req)
+	c.Check(resp.Code, Equals, http.StatusUnauthorized)
+}
+
+func (s *routerSuite) TestVolumeErrorStatusCode(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+	router.keepstore.mountsW[0].volume.(*stubVolume).blockRead = func(_ context.Context, hash string, w io.Writer) (int, error) {
+		return 0, httpserver.ErrorWithStatus(errors.New("test error"), http.StatusBadGateway)
+	}
+
+	// To test whether we fall back to volume 1 after volume 0
+	// returns an error, we need to use a block whose rendezvous
+	// order has volume 0 first. Luckily "bar" is such a block.
+	c.Assert(router.keepstore.rendezvous(barHash, router.keepstore.mountsR)[0].UUID, DeepEquals, router.keepstore.mountsR[0].UUID)
+
+	locSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, barHash+"+3")
+
+	// Volume 0 fails with an error that specifies an HTTP status
+	// code, so that code should be propagated to caller.
+	resp := call(router, "GET", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusBadGateway)
+	c.Check(resp.Body.String(), Equals, "test error\n")
+
+	c.Assert(router.keepstore.mountsW[1].volume.BlockWrite(context.Background(), barHash, []byte("bar")), IsNil)
+
+	// If the requested block is available on the second volume,
+	// it doesn't matter that the first volume failed.
+	resp = call(router, "GET", "http://example/"+locSigned, arvadostest.ActiveTokenV2, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "bar")
+}
+
+func (s *routerSuite) TestIndex(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	resp := call(router, "GET", "http://example/index", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "\n")
+
+	resp = call(router, "GET", "http://example/index?prefix=fff", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(resp.Body.String(), Equals, "\n")
+
+	t0 := time.Now().Add(-time.Hour)
+	vol0 := router.keepstore.mounts["zzzzz-nyw5e-000000000000000"].volume.(*stubVolume)
+	err := vol0.BlockWrite(context.Background(), fooHash, []byte("foo"))
+	c.Assert(err, IsNil)
+	err = vol0.blockTouchWithTime(fooHash, t0)
+	c.Assert(err, IsNil)
+	err = vol0.BlockWrite(context.Background(), barHash, []byte("bar"))
+	c.Assert(err, IsNil)
+	err = vol0.blockTouchWithTime(barHash, t0)
+	c.Assert(err, IsNil)
+	t1 := time.Now().Add(-time.Minute)
+	vol1 := router.keepstore.mounts["zzzzz-nyw5e-111111111111111"].volume.(*stubVolume)
+	err = vol1.BlockWrite(context.Background(), barHash, []byte("bar"))
+	c.Assert(err, IsNil)
+	err = vol1.blockTouchWithTime(barHash, t1)
+	c.Assert(err, IsNil)
+
+	for _, path := range []string{
+		"/index?prefix=acb",
+		"/index/acb",
+		"/index/?prefix=acb",
+		"/mounts/zzzzz-nyw5e-000000000000000/blocks?prefix=acb",
+		"/mounts/zzzzz-nyw5e-000000000000000/blocks/?prefix=acb",
+		"/mounts/zzzzz-nyw5e-000000000000000/blocks/acb",
+	} {
+		c.Logf("=== %s", path)
+		resp = call(router, "GET", "http://example"+path, s.cluster.SystemRootToken, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusOK)
+		c.Check(resp.Body.String(), Equals, fooHash+"+3 "+fmt.Sprintf("%d", t0.UnixNano())+"\n\n")
+	}
+
+	for _, path := range []string{
+		"/index?prefix=37",
+		"/index/37",
+		"/index/?prefix=37",
+	} {
+		c.Logf("=== %s", path)
+		resp = call(router, "GET", "http://example"+path, s.cluster.SystemRootToken, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusOK)
+		c.Check(resp.Body.String(), Equals, ""+
+			barHash+"+3 "+fmt.Sprintf("%d", t0.UnixNano())+"\n"+
+			barHash+"+3 "+fmt.Sprintf("%d", t1.UnixNano())+"\n\n")
+	}
+
+	for _, path := range []string{
+		"/mounts/zzzzz-nyw5e-111111111111111/blocks",
+		"/mounts/zzzzz-nyw5e-111111111111111/blocks/",
+		"/mounts/zzzzz-nyw5e-111111111111111/blocks?prefix=37",
+		"/mounts/zzzzz-nyw5e-111111111111111/blocks/?prefix=37",
+		"/mounts/zzzzz-nyw5e-111111111111111/blocks/37",
+	} {
+		c.Logf("=== %s", path)
+		resp = call(router, "GET", "http://example"+path, s.cluster.SystemRootToken, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusOK)
+		c.Check(resp.Body.String(), Equals, barHash+"+3 "+fmt.Sprintf("%d", t1.UnixNano())+"\n\n")
+	}
+
+	for _, path := range []string{
+		"/index",
+		"/index?prefix=",
+		"/index/",
+		"/index/?prefix=",
+	} {
+		c.Logf("=== %s", path)
+		resp = call(router, "GET", "http://example"+path, s.cluster.SystemRootToken, nil, nil)
+		c.Check(resp.Code, Equals, http.StatusOK)
+		c.Check(strings.Split(resp.Body.String(), "\n"), HasLen, 5)
+	}
+
+}
+
+// Check that the context passed to a volume method gets cancelled
+// when the http client hangs up.
+func (s *routerSuite) TestCancelOnDisconnect(c *C) {
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	unblock := make(chan struct{})
+	router.keepstore.mountsW[0].volume.(*stubVolume).blockRead = func(ctx context.Context, hash string, w io.Writer) (int, error) {
+		<-unblock
+		c.Check(ctx.Err(), NotNil)
+		return 0, ctx.Err()
+	}
+	go func() {
+		time.Sleep(time.Second / 10)
+		cancel()
+		close(unblock)
+	}()
+	locSigned := router.keepstore.signLocator(arvadostest.ActiveTokenV2, fooHash+"+3")
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	req, err := http.NewRequestWithContext(ctx, "GET", "http://example/"+locSigned, nil)
+	c.Assert(err, IsNil)
+	req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+	resp := httptest.NewRecorder()
+	router.ServeHTTP(resp, req)
+	c.Check(resp.Code, Equals, 499)
+}
+
+func call(handler http.Handler, method, path, tok string, body []byte, hdr http.Header) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	req, err := http.NewRequest(method, path, bytes.NewReader(body))
+	if err != nil {
+		panic(err)
+	}
+	for k := range hdr {
+		req.Header.Set(k, hdr.Get(k))
+	}
+	if tok != "" {
+		req.Header.Set("Authorization", "Bearer "+tok)
+	}
+	handler.ServeHTTP(resp, req)
+	return resp
+}
diff --git a/services/keepstore/s3aws_volume.go b/services/keepstore/s3aws_volume.go
index 18b30f4638..2417bb8149 100644
--- a/services/keepstore/s3aws_volume.go
+++ b/services/keepstore/s3aws_volume.go
@@ -38,10 +38,14 @@ func init() {
 }
 
 const (
-	s3DefaultReadTimeout    = arvados.Duration(10 * time.Minute)
-	s3DefaultConnectTimeout = arvados.Duration(time.Minute)
-	maxClockSkew            = 600 * time.Second
-	nearlyRFC1123           = "Mon, 2 Jan 2006 15:04:05 GMT"
+	s3DefaultReadTimeout        = arvados.Duration(10 * time.Minute)
+	s3DefaultConnectTimeout     = arvados.Duration(time.Minute)
+	maxClockSkew                = 600 * time.Second
+	nearlyRFC1123               = "Mon, 2 Jan 2006 15:04:05 GMT"
+	s3downloaderPartSize        = 5 * 1024 * 1024
+	s3downloaderReadConcurrency = 13
+	s3uploaderPartSize          = 5 * 1024 * 1024
+	s3uploaderWriteConcurrency  = 5
 )
 
 var (
@@ -54,13 +58,14 @@ type S3AWSVolume struct {
 	AuthToken      string    // populated automatically when IAMRole is used
 	AuthExpiration time.Time // populated automatically when IAMRole is used
 
-	cluster   *arvados.Cluster
-	volume    arvados.Volume
-	logger    logrus.FieldLogger
-	metrics   *volumeMetricsVecs
-	bucket    *s3AWSbucket
-	region    string
-	startOnce sync.Once
+	cluster    *arvados.Cluster
+	volume     arvados.Volume
+	logger     logrus.FieldLogger
+	metrics    *volumeMetricsVecs
+	bufferPool *bufferPool
+	bucket     *s3AWSbucket
+	region     string
+	startOnce  sync.Once
 }
 
 // s3bucket wraps s3.bucket and counts I/O and API usage stats. The
@@ -73,11 +78,7 @@ type s3AWSbucket struct {
 	mu     sync.Mutex
 }
 
-const (
-	PartSize         = 5 * 1024 * 1024
-	ReadConcurrency  = 13
-	WriteConcurrency = 5
-)
+const ()
 
 var s3AWSKeepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
 var s3AWSZeroTime time.Time
@@ -100,13 +101,18 @@ func (v *S3AWSVolume) key(loc string) string {
 	}
 }
 
-func newS3AWSVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
-	v := &S3AWSVolume{cluster: cluster, volume: volume, metrics: metrics}
-	err := json.Unmarshal(volume.DriverParameters, v)
+func newS3AWSVolume(params newVolumeParams) (volume, error) {
+	v := &S3AWSVolume{
+		cluster:    params.Cluster,
+		volume:     params.ConfigVolume,
+		metrics:    params.MetricsVecs,
+		bufferPool: params.BufferPool,
+	}
+	err := json.Unmarshal(params.ConfigVolume.DriverParameters, v)
 	if err != nil {
 		return nil, err
 	}
-	v.logger = logger.WithField("Volume", v.String())
+	v.logger = params.Logger.WithField("Volume", v.DeviceID())
 	return v, v.check("")
 }
 
@@ -225,73 +231,17 @@ func (v *S3AWSVolume) check(ec2metadataHostname string) error {
 	}
 
 	// Set up prometheus metrics
-	lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+	lbls := prometheus.Labels{"device_id": v.DeviceID()}
 	v.bucket.stats.opsCounters, v.bucket.stats.errCounters, v.bucket.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
 
 	return nil
 }
 
-// String implements fmt.Stringer.
-func (v *S3AWSVolume) String() string {
-	return fmt.Sprintf("s3-bucket:%+q", v.Bucket)
-}
-
-// GetDeviceID returns a globally unique ID for the storage bucket.
-func (v *S3AWSVolume) GetDeviceID() string {
+// DeviceID returns a globally unique ID for the storage bucket.
+func (v *S3AWSVolume) DeviceID() string {
 	return "s3://" + v.Endpoint + "/" + v.Bucket
 }
 
-// Compare the given data with the stored data.
-func (v *S3AWSVolume) Compare(ctx context.Context, loc string, expect []byte) error {
-	key := v.key(loc)
-	errChan := make(chan error, 1)
-	go func() {
-		_, err := v.head("recent/" + key)
-		errChan <- err
-	}()
-	var err error
-	select {
-	case <-ctx.Done():
-		return ctx.Err()
-	case err = <-errChan:
-	}
-	if err != nil {
-		// Checking for the key itself here would interfere
-		// with future GET requests.
-		//
-		// On AWS, if X doesn't exist, a HEAD or GET request
-		// for X causes X's non-existence to be cached. Thus,
-		// if we test for X, then create X and return a
-		// signature to our client, the client might still get
-		// 404 from all keepstores when trying to read it.
-		//
-		// To avoid this, we avoid doing HEAD X or GET X until
-		// we know X has been written.
-		//
-		// Note that X might exist even though recent/X
-		// doesn't: for example, the response to HEAD recent/X
-		// might itself come from a stale cache. In such
-		// cases, we will return a false negative and
-		// PutHandler might needlessly create another replica
-		// on a different volume. That's not ideal, but it's
-		// better than passing the eventually-consistent
-		// problem on to our clients.
-		return v.translateError(err)
-	}
-
-	input := &s3.GetObjectInput{
-		Bucket: aws.String(v.bucket.bucket),
-		Key:    aws.String(key),
-	}
-
-	req := v.bucket.svc.GetObjectRequest(input)
-	result, err := req.Send(ctx)
-	if err != nil {
-		return v.translateError(err)
-	}
-	return v.translateError(compareReaderWithBuf(ctx, result.Body, expect, loc[:32]))
-}
-
 // EmptyTrash looks for trashed blocks that exceeded BlobTrashLifetime
 // and deletes them from the volume.
 func (v *S3AWSVolume) EmptyTrash() {
@@ -313,7 +263,7 @@ func (v *S3AWSVolume) EmptyTrash() {
 		recent, err := v.head("recent/" + key)
 		if err != nil && os.IsNotExist(v.translateError(err)) {
 			v.logger.Warnf("EmptyTrash: found trash marker %q but no %q (%s); calling Untrash", *trash.Key, "recent/"+key, err)
-			err = v.Untrash(loc)
+			err = v.BlockUntrash(loc)
 			if err != nil {
 				v.logger.WithError(err).Errorf("EmptyTrash: Untrash(%q) failed", loc)
 			}
@@ -334,7 +284,7 @@ func (v *S3AWSVolume) EmptyTrash() {
 				// necessary to avoid starvation.
 				v.logger.Infof("EmptyTrash: detected old race for %q, calling fixRace + Touch", loc)
 				v.fixRace(key)
-				v.Touch(loc)
+				v.BlockTouch(loc)
 				return
 			}
 			_, err := v.head(key)
@@ -401,7 +351,7 @@ func (v *S3AWSVolume) EmptyTrash() {
 	if err := trashL.Error(); err != nil {
 		v.logger.WithError(err).Error("EmptyTrash: lister failed")
 	}
-	v.logger.Infof("EmptyTrash: stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
+	v.logger.Infof("EmptyTrash: stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.DeviceID(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
 }
 
 // fixRace(X) is called when "recent/X" exists but "X" doesn't
@@ -462,55 +412,60 @@ func (v *S3AWSVolume) head(key string) (result *s3.HeadObjectOutput, err error)
 	return
 }
 
-// Get a block: copy the block data into buf, and return the number of
-// bytes copied.
-func (v *S3AWSVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
-	// Do not use getWithPipe here: the BlockReader interface does not pass
-	// through 'buf []byte', and we don't want to allocate two buffers for each
-	// read request. Instead, use a version of ReadBlock that accepts 'buf []byte'
-	// as an input.
-	key := v.key(loc)
-	count, err := v.readWorker(ctx, key, buf)
-	if err == nil {
-		return count, err
-	}
-
-	err = v.translateError(err)
-	if !os.IsNotExist(err) {
-		return 0, err
-	}
-
-	_, err = v.head("recent/" + key)
-	err = v.translateError(err)
+// BlockRead reads a Keep block that has been stored as a block blob
+// in the S3 bucket.
+func (v *S3AWSVolume) BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error) {
+	key := v.key(hash)
+	buf, err := v.bufferPool.GetContext(ctx)
 	if err != nil {
-		// If we can't read recent/X, there's no point in
-		// trying fixRace. Give up.
-		return 0, err
-	}
-	if !v.fixRace(key) {
-		err = os.ErrNotExist
 		return 0, err
 	}
+	defer v.bufferPool.Put(buf)
 
-	count, err = v.readWorker(ctx, key, buf)
+	streamer := newStreamWriterAt(writeTo, 65536, buf)
+	defer streamer.Close()
+	err = v.readWorker(ctx, key, streamer)
 	if err != nil {
-		v.logger.Warnf("reading %s after successful fixRace: %s", loc, err)
 		err = v.translateError(err)
-		return 0, err
+		if !os.IsNotExist(err) {
+			return 0, err
+		}
+		if streamer.WroteAt() > 0 {
+			return 0, errors.New("bug? readWorker returned ErrNotExist after writing to streamer")
+		}
+
+		_, err = v.head("recent/" + key)
+		err = v.translateError(err)
+		if err != nil {
+			// If we can't read recent/X, there's no point in
+			// trying fixRace. Give up.
+			return 0, err
+		}
+		if !v.fixRace(key) {
+			err = os.ErrNotExist
+			return 0, err
+		}
+
+		err = v.readWorker(ctx, key, streamer)
+		if err != nil {
+			v.logger.Warnf("reading %s after successful fixRace: %s", hash, err)
+			err = v.translateError(err)
+			return 0, err
+		}
 	}
-	return count, err
+	err = streamer.Close()
+	if err != nil {
+		return 0, v.translateError(err)
+	}
+	return streamer.Wrote(), nil
 }
 
-func (v *S3AWSVolume) readWorker(ctx context.Context, key string, buf []byte) (int, error) {
-	awsBuf := aws.NewWriteAtBuffer(buf)
+func (v *S3AWSVolume) readWorker(ctx context.Context, key string, dst io.WriterAt) error {
 	downloader := s3manager.NewDownloaderWithClient(v.bucket.svc, func(u *s3manager.Downloader) {
-		u.PartSize = PartSize
-		u.Concurrency = ReadConcurrency
+		u.PartSize = s3downloaderPartSize
+		u.Concurrency = s3downloaderReadConcurrency
 	})
-
-	v.logger.Debugf("Partsize: %d; Concurrency: %d\n", downloader.PartSize, downloader.Concurrency)
-
-	count, err := downloader.DownloadWithContext(ctx, awsBuf, &s3.GetObjectInput{
+	count, err := downloader.DownloadWithContext(ctx, dst, &s3.GetObjectInput{
 		Bucket: aws.String(v.bucket.bucket),
 		Key:    aws.String(key),
 	})
@@ -518,7 +473,7 @@ func (v *S3AWSVolume) readWorker(ctx context.Context, key string, buf []byte) (i
 	v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.GetOps)
 	v.bucket.stats.TickErr(err)
 	v.bucket.stats.TickInBytes(uint64(count))
-	return int(count), v.translateError(err)
+	return v.translateError(err)
 }
 
 func (v *S3AWSVolume) writeObject(ctx context.Context, key string, r io.Reader) error {
@@ -547,10 +502,10 @@ func (v *S3AWSVolume) writeObject(ctx context.Context, key string, r io.Reader)
 	// Experimentation indicated that using concurrency 5 yields the best
 	// throughput, better than higher concurrency (10 or 13) by ~5%.
 	// Defining u.BufferProvider = s3manager.NewBufferedReadSeekerWriteToPool(64 * 1024 * 1024)
-	// is detrimental to througput (minus ~15%).
+	// is detrimental to throughput (minus ~15%).
 	uploader := s3manager.NewUploaderWithClient(v.bucket.svc, func(u *s3manager.Uploader) {
-		u.PartSize = PartSize
-		u.Concurrency = WriteConcurrency
+		u.PartSize = s3uploaderPartSize
+		u.Concurrency = s3uploaderWriteConcurrency
 	})
 
 	// Unlike the goamz S3 driver, we don't need to precompute ContentSHA256:
@@ -571,16 +526,12 @@ func (v *S3AWSVolume) writeObject(ctx context.Context, key string, r io.Reader)
 }
 
 // Put writes a block.
-func (v *S3AWSVolume) Put(ctx context.Context, loc string, block []byte) error {
+func (v *S3AWSVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
 	// Do not use putWithPipe here; we want to pass an io.ReadSeeker to the S3
 	// sdk to avoid memory allocation there. See #17339 for more information.
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-
-	rdr := bytes.NewReader(block)
-	r := NewCountingReaderAtSeeker(rdr, v.bucket.stats.TickOutBytes)
-	key := v.key(loc)
+	rdr := bytes.NewReader(data)
+	r := newCountingReaderAtSeeker(rdr, v.bucket.stats.TickOutBytes)
+	key := v.key(hash)
 	err := v.writeObject(ctx, key, r)
 	if err != nil {
 		return err
@@ -675,9 +626,9 @@ func (lister *s3awsLister) pop() (k *s3.Object) {
 	return
 }
 
-// IndexTo writes a complete list of locators with the given prefix
+// Index writes a complete list of locators with the given prefix
 // for which Get() can retrieve data.
-func (v *S3AWSVolume) IndexTo(prefix string, writer io.Writer) error {
+func (v *S3AWSVolume) Index(ctx context.Context, prefix string, writer io.Writer) error {
 	prefix = v.key(prefix)
 	// Use a merge sort to find matching sets of X and recent/X.
 	dataL := s3awsLister{
@@ -695,6 +646,9 @@ func (v *S3AWSVolume) IndexTo(prefix string, writer io.Writer) error {
 		Stats:    &v.bucket.stats,
 	}
 	for data, recent := dataL.First(), recentL.First(); data != nil && dataL.Error() == nil; data = dataL.Next() {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
 		if *data.Key >= "g" {
 			// Conveniently, "recent/*" and "trash/*" are
 			// lexically greater than all hex-encoded data
@@ -769,28 +723,14 @@ func (v *S3AWSVolume) Mtime(loc string) (time.Time, error) {
 	return *resp.LastModified, err
 }
 
-// Status returns a *VolumeStatus representing the current in-use
-// storage capacity and a fake available capacity that doesn't make
-// the volume seem full or nearly-full.
-func (v *S3AWSVolume) Status() *VolumeStatus {
-	return &VolumeStatus{
-		DeviceNum: 1,
-		BytesFree: BlockSize * 1000,
-		BytesUsed: 1,
-	}
-}
-
 // InternalStats returns bucket I/O and API call counters.
 func (v *S3AWSVolume) InternalStats() interface{} {
 	return &v.bucket.stats
 }
 
-// Touch sets the timestamp for the given locator to the current time.
-func (v *S3AWSVolume) Touch(loc string) error {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	key := v.key(loc)
+// BlockTouch sets the timestamp for the given locator to the current time.
+func (v *S3AWSVolume) BlockTouch(hash string) error {
+	key := v.key(hash)
 	_, err := v.head(key)
 	err = v.translateError(err)
 	if os.IsNotExist(err) && v.fixRace(key) {
@@ -845,10 +785,7 @@ func (b *s3AWSbucket) Del(path string) error {
 }
 
 // Trash a Keep block.
-func (v *S3AWSVolume) Trash(loc string) error {
-	if v.volume.ReadOnly && !v.volume.AllowTrashWhenReadOnly {
-		return MethodDisabledError
-	}
+func (v *S3AWSVolume) BlockTrash(loc string) error {
 	if t, err := v.Mtime(loc); err != nil {
 		return err
 	} else if time.Since(t) < v.cluster.Collections.BlobSigningTTL.Duration() {
@@ -872,9 +809,9 @@ func (v *S3AWSVolume) Trash(loc string) error {
 	return v.translateError(v.bucket.Del(key))
 }
 
-// Untrash moves block from trash back into store
-func (v *S3AWSVolume) Untrash(loc string) error {
-	key := v.key(loc)
+// BlockUntrash moves block from trash back into store
+func (v *S3AWSVolume) BlockUntrash(hash string) error {
+	key := v.key(hash)
 	err := v.safeCopy(key, "trash/"+key)
 	if err != nil {
 		return err
diff --git a/services/keepstore/s3aws_volume_test.go b/services/keepstore/s3aws_volume_test.go
index c7e2d485df..f05cbee848 100644
--- a/services/keepstore/s3aws_volume_test.go
+++ b/services/keepstore/s3aws_volume_test.go
@@ -58,7 +58,6 @@ type StubbedS3AWSSuite struct {
 	s3server *httptest.Server
 	metadata *httptest.Server
 	cluster  *arvados.Cluster
-	handler  *handler
 	volumes  []*TestableS3AWSVolume
 }
 
@@ -70,33 +69,37 @@ func (s *StubbedS3AWSSuite) SetUpTest(c *check.C) {
 		"zzzzz-nyw5e-000000000000000": {Driver: "S3"},
 		"zzzzz-nyw5e-111111111111111": {Driver: "S3"},
 	}
-	s.handler = &handler{}
 }
 
 func (s *StubbedS3AWSSuite) TestGeneric(c *check.C) {
-	DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+	DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
 		// Use a negative raceWindow so s3test's 1-second
 		// timestamp precision doesn't confuse fixRace.
-		return s.newTestableVolume(c, cluster, volume, metrics, -2*time.Second)
+		return s.newTestableVolume(c, params, -2*time.Second)
 	})
 }
 
 func (s *StubbedS3AWSSuite) TestGenericReadOnly(c *check.C) {
-	DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableVolume(c, cluster, volume, metrics, -2*time.Second)
+	DoGenericVolumeTests(c, true, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableVolume(c, params, -2*time.Second)
 	})
 }
 
 func (s *StubbedS3AWSSuite) TestGenericWithPrefix(c *check.C) {
-	DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		v := s.newTestableVolume(c, cluster, volume, metrics, -2*time.Second)
+	DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
+		v := s.newTestableVolume(c, params, -2*time.Second)
 		v.PrefixLength = 3
 		return v
 	})
 }
 
 func (s *StubbedS3AWSSuite) TestIndex(c *check.C) {
-	v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 0)
+	v := s.newTestableVolume(c, newVolumeParams{
+		Cluster:      s.cluster,
+		ConfigVolume: arvados.Volume{Replication: 2},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	}, 0)
 	v.IndexPageSize = 3
 	for i := 0; i < 256; i++ {
 		v.PutRaw(fmt.Sprintf("%02x%030x", i, i), []byte{102, 111, 111})
@@ -111,7 +114,7 @@ func (s *StubbedS3AWSSuite) TestIndex(c *check.C) {
 		{"abc", 0},
 	} {
 		buf := new(bytes.Buffer)
-		err := v.IndexTo(spec.prefix, buf)
+		err := v.Index(context.Background(), spec.prefix, buf)
 		c.Check(err, check.IsNil)
 
 		idx := bytes.SplitAfter(buf.Bytes(), []byte{10})
@@ -146,7 +149,7 @@ func (s *StubbedS3AWSSuite) TestSignature(c *check.C) {
 	vol.bucket.svc.ForcePathStyle = true
 
 	c.Check(err, check.IsNil)
-	err = vol.Put(context.Background(), "acbd18db4cc2f85cedef654fccc4a4d8", []byte("foo"))
+	err = vol.BlockWrite(context.Background(), "acbd18db4cc2f85cedef654fccc4a4d8", []byte("foo"))
 	c.Check(err, check.IsNil)
 	c.Check(header.Get("Authorization"), check.Matches, `AWS4-HMAC-SHA256 .*`)
 }
@@ -202,7 +205,12 @@ func (s *StubbedS3AWSSuite) TestIAMRoleCredentials(c *check.C) {
 }
 
 func (s *StubbedS3AWSSuite) TestStats(c *check.C) {
-	v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
+	v := s.newTestableVolume(c, newVolumeParams{
+		Cluster:      s.cluster,
+		ConfigVolume: arvados.Volume{Replication: 2},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	}, 5*time.Minute)
 	stats := func() string {
 		buf, err := json.Marshal(v.InternalStats())
 		c.Check(err, check.IsNil)
@@ -212,20 +220,20 @@ func (s *StubbedS3AWSSuite) TestStats(c *check.C) {
 	c.Check(stats(), check.Matches, `.*"Ops":0,.*`)
 
 	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	_, err := v.Get(context.Background(), loc, make([]byte, 3))
+	_, err := v.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.NotNil)
 	c.Check(stats(), check.Matches, `.*"Ops":[^0],.*`)
 	c.Check(stats(), check.Matches, `.*"s3.requestFailure 404 NoSuchKey[^"]*":[^0].*`)
 	c.Check(stats(), check.Matches, `.*"InBytes":0,.*`)
 
-	err = v.Put(context.Background(), loc, []byte("foo"))
+	err = v.BlockWrite(context.Background(), loc, []byte("foo"))
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
 	c.Check(stats(), check.Matches, `.*"PutOps":2,.*`)
 
-	_, err = v.Get(context.Background(), loc, make([]byte, 3))
+	_, err = v.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.IsNil)
-	_, err = v.Get(context.Background(), loc, make([]byte, 3))
+	_, err = v.BlockRead(context.Background(), loc, io.Discard)
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
 }
@@ -251,30 +259,15 @@ func (h *s3AWSBlockingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
 }
 
 func (s *StubbedS3AWSSuite) TestGetContextCancel(c *check.C) {
-	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	buf := make([]byte, 3)
-
 	s.testContextCancel(c, func(ctx context.Context, v *TestableS3AWSVolume) error {
-		_, err := v.Get(ctx, loc, buf)
+		_, err := v.BlockRead(ctx, fooHash, io.Discard)
 		return err
 	})
 }
 
-func (s *StubbedS3AWSSuite) TestCompareContextCancel(c *check.C) {
-	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	buf := []byte("bar")
-
-	s.testContextCancel(c, func(ctx context.Context, v *TestableS3AWSVolume) error {
-		return v.Compare(ctx, loc, buf)
-	})
-}
-
 func (s *StubbedS3AWSSuite) TestPutContextCancel(c *check.C) {
-	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	buf := []byte("foo")
-
 	s.testContextCancel(c, func(ctx context.Context, v *TestableS3AWSVolume) error {
-		return v.Put(ctx, loc, buf)
+		return v.BlockWrite(ctx, fooHash, []byte("foo"))
 	})
 }
 
@@ -283,7 +276,12 @@ func (s *StubbedS3AWSSuite) testContextCancel(c *check.C, testFunc func(context.
 	s.s3server = httptest.NewServer(handler)
 	defer s.s3server.Close()
 
-	v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
+	v := s.newTestableVolume(c, newVolumeParams{
+		Cluster:      s.cluster,
+		ConfigVolume: arvados.Volume{Replication: 2},
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	}, 5*time.Minute)
 
 	ctx, cancel := context.WithCancel(context.Background())
 
@@ -323,7 +321,13 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 	s.cluster.Collections.BlobTrashLifetime.Set("1h")
 	s.cluster.Collections.BlobSigningTTL.Set("1h")
 
-	v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
+	v := s.newTestableVolume(c, newVolumeParams{
+		Cluster:      s.cluster,
+		ConfigVolume: arvados.Volume{Replication: 2},
+		Logger:       ctxlog.TestLogger(c),
+		MetricsVecs:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+		BufferPool:   newBufferPool(ctxlog.TestLogger(c), 8, prometheus.NewRegistry()),
+	}, 5*time.Minute)
 	var none time.Time
 
 	putS3Obj := func(t time.Time, key string, data []byte) {
@@ -475,8 +479,7 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 
 			// Check canGet
 			loc, blk := setupScenario()
-			buf := make([]byte, len(blk))
-			_, err := v.Get(context.Background(), loc, buf)
+			_, err := v.BlockRead(context.Background(), loc, io.Discard)
 			c.Check(err == nil, check.Equals, scenario.canGet)
 			if err != nil {
 				c.Check(os.IsNotExist(err), check.Equals, true)
@@ -484,9 +487,9 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 
 			// Call Trash, then check canTrash and canGetAfterTrash
 			loc, _ = setupScenario()
-			err = v.Trash(loc)
+			err = v.BlockTrash(loc)
 			c.Check(err == nil, check.Equals, scenario.canTrash)
-			_, err = v.Get(context.Background(), loc, buf)
+			_, err = v.BlockRead(context.Background(), loc, io.Discard)
 			c.Check(err == nil, check.Equals, scenario.canGetAfterTrash)
 			if err != nil {
 				c.Check(os.IsNotExist(err), check.Equals, true)
@@ -494,14 +497,14 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 
 			// Call Untrash, then check canUntrash
 			loc, _ = setupScenario()
-			err = v.Untrash(loc)
+			err = v.BlockUntrash(loc)
 			c.Check(err == nil, check.Equals, scenario.canUntrash)
 			if scenario.dataT != none || scenario.trashT != none {
 				// In all scenarios where the data exists, we
 				// should be able to Get after Untrash --
 				// regardless of timestamps, errors, race
 				// conditions, etc.
-				_, err = v.Get(context.Background(), loc, buf)
+				_, err = v.BlockRead(context.Background(), loc, io.Discard)
 				c.Check(err, check.IsNil)
 			}
 
@@ -522,7 +525,7 @@ func (s *StubbedS3AWSSuite) TestBackendStates(c *check.C) {
 			// Check for current Mtime after Put (applies to all
 			// scenarios)
 			loc, blk = setupScenario()
-			err = v.Put(context.Background(), loc, blk)
+			err = v.BlockWrite(context.Background(), loc, blk)
 			c.Check(err, check.IsNil)
 			t, err := v.Mtime(loc)
 			c.Check(err, check.IsNil)
@@ -555,7 +558,7 @@ func (l LogrusLog) Print(level gofakes3.LogLevel, v ...interface{}) {
 	}
 }
 
-func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs, raceWindow time.Duration) *TestableS3AWSVolume {
+func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, params newVolumeParams, raceWindow time.Duration) *TestableS3AWSVolume {
 
 	clock := &s3AWSFakeClock{}
 	// fake s3
@@ -591,10 +594,11 @@ func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, cluster *arvados.Clust
 				UnsafeDelete:       true,
 				IndexPageSize:      1000,
 			},
-			cluster: cluster,
-			volume:  volume,
-			logger:  ctxlog.TestLogger(c),
-			metrics: metrics,
+			cluster:    params.Cluster,
+			volume:     params.ConfigVolume,
+			logger:     params.Logger,
+			metrics:    params.MetricsVecs,
+			bufferPool: params.BufferPool,
 		},
 		c:           c,
 		server:      srv,
@@ -619,7 +623,7 @@ func (s *StubbedS3AWSSuite) newTestableVolume(c *check.C, cluster *arvados.Clust
 // PutRaw skips the ContentMD5 test
 func (v *TestableS3AWSVolume) PutRaw(loc string, block []byte) {
 	key := v.key(loc)
-	r := NewCountingReader(bytes.NewReader(block), v.bucket.stats.TickOutBytes)
+	r := newCountingReader(bytes.NewReader(block), v.bucket.stats.TickOutBytes)
 
 	uploader := s3manager.NewUploaderWithClient(v.bucket.svc, func(u *s3manager.Uploader) {
 		u.PartSize = 5 * 1024 * 1024
diff --git a/services/keepstore/status_test.go b/services/keepstore/status_test.go
deleted file mode 100644
index 80f98adb22..0000000000
--- a/services/keepstore/status_test.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"encoding/json"
-)
-
-// We don't have isolated unit tests for /status.json yet, but we do
-// check (e.g., in pull_worker_test.go) that /status.json reports
-// specific statistics correctly at the appropriate times.
-
-// getStatusItem("foo","bar","baz") retrieves /status.json, decodes
-// the response body into resp, and returns resp["foo"]["bar"]["baz"].
-func getStatusItem(h *handler, keys ...string) interface{} {
-	resp := IssueRequest(h, &RequestTester{"/status.json", "", "GET", nil, ""})
-	var s interface{}
-	json.NewDecoder(resp.Body).Decode(&s)
-	for _, k := range keys {
-		s = s.(map[string]interface{})[k]
-	}
-	return s
-}
diff --git a/services/keepstore/streamwriterat.go b/services/keepstore/streamwriterat.go
new file mode 100644
index 0000000000..365b55f233
--- /dev/null
+++ b/services/keepstore/streamwriterat.go
@@ -0,0 +1,154 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"sync"
+)
+
+// streamWriterAt translates random-access writes to sequential
+// writes. The caller is expected to use an arbitrary sequence of
+// non-overlapping WriteAt calls covering all positions between 0 and
+// N, for any N < len(buf), then call Close.
+//
+// streamWriterAt writes the data to the provided io.Writer in
+// sequential order.
+//
+// Close returns when all data has been written through.
+type streamWriterAt struct {
+	writer     io.Writer
+	buf        []byte
+	partsize   int         // size of each part written through to writer
+	endpos     int         // portion of buf actually used, judging by WriteAt calls so far
+	partfilled []int       // number of bytes written to each part so far
+	partready  chan []byte // parts of buf fully written / waiting for writer goroutine
+	partnext   int         // index of next part we will send to partready when it's ready
+	wroteAt    int         // bytes we copied to buf in WriteAt
+	wrote      int         // bytes successfully written through to writer
+	errWrite   chan error  // final outcome of writer goroutine
+	closed     bool        // streamWriterAt has been closed
+	mtx        sync.Mutex  // guard internal fields during concurrent calls to WriteAt and Close
+}
+
+// newStreamWriterAt creates a new streamWriterAt.
+func newStreamWriterAt(w io.Writer, partsize int, buf []byte) *streamWriterAt {
+	if partsize == 0 {
+		partsize = 65536
+	}
+	nparts := (len(buf) + partsize - 1) / partsize
+	swa := &streamWriterAt{
+		writer:     w,
+		partsize:   partsize,
+		buf:        buf,
+		partfilled: make([]int, nparts),
+		partready:  make(chan []byte, nparts),
+		errWrite:   make(chan error, 1),
+	}
+	go swa.writeToWriter()
+	return swa
+}
+
+// Wrote returns the number of bytes written through to the
+// io.Writer.
+//
+// Wrote must not be called until after Close.
+func (swa *streamWriterAt) Wrote() int {
+	return swa.wrote
+}
+
+// Wrote returns the number of bytes passed to WriteAt, regardless of
+// whether they were written through to the io.Writer.
+func (swa *streamWriterAt) WroteAt() int {
+	swa.mtx.Lock()
+	defer swa.mtx.Unlock()
+	return swa.wroteAt
+}
+
+func (swa *streamWriterAt) writeToWriter() {
+	defer close(swa.errWrite)
+	for p := range swa.partready {
+		n, err := swa.writer.Write(p)
+		if err != nil {
+			swa.errWrite <- err
+			return
+		}
+		swa.wrote += n
+	}
+}
+
+// WriteAt implements io.WriterAt.
+func (swa *streamWriterAt) WriteAt(p []byte, offset int64) (int, error) {
+	pos := int(offset)
+	n := 0
+	if pos <= len(swa.buf) {
+		n = copy(swa.buf[pos:], p)
+	}
+	if n < len(p) {
+		return n, fmt.Errorf("write beyond end of buffer: offset %d len %d buf %d", offset, len(p), len(swa.buf))
+	}
+	endpos := pos + n
+
+	swa.mtx.Lock()
+	defer swa.mtx.Unlock()
+	swa.wroteAt += len(p)
+	if swa.endpos < endpos {
+		swa.endpos = endpos
+	}
+	if swa.closed {
+		return 0, errors.New("invalid use of closed streamWriterAt")
+	}
+	// Track the number of bytes that landed in each of our
+	// (output) parts.
+	for i := pos; i < endpos; {
+		j := i + swa.partsize - (i % swa.partsize)
+		if j > endpos {
+			j = endpos
+		}
+		pf := swa.partfilled[i/swa.partsize]
+		pf += j - i
+		if pf > swa.partsize {
+			return 0, errors.New("streamWriterAt: overlapping WriteAt calls")
+		}
+		swa.partfilled[i/swa.partsize] = pf
+		i = j
+	}
+	// Flush filled parts to partready.
+	for swa.partnext < len(swa.partfilled) && swa.partfilled[swa.partnext] == swa.partsize {
+		offset := swa.partnext * swa.partsize
+		swa.partready <- swa.buf[offset : offset+swa.partsize]
+		swa.partnext++
+	}
+	return len(p), nil
+}
+
+// Close flushes all buffered data through to the io.Writer.
+func (swa *streamWriterAt) Close() error {
+	swa.mtx.Lock()
+	defer swa.mtx.Unlock()
+	if swa.closed {
+		return errors.New("invalid use of closed streamWriterAt")
+	}
+	swa.closed = true
+	// Flush last part if needed. If the input doesn't end on a
+	// part boundary, the last part never appears "filled" when we
+	// check in WriteAt.  But here, we know endpos is the end of
+	// the stream, so we can check whether the last part is ready.
+	if offset := swa.partnext * swa.partsize; offset < swa.endpos && offset+swa.partfilled[swa.partnext] == swa.endpos {
+		swa.partready <- swa.buf[offset:swa.endpos]
+		swa.partnext++
+	}
+	close(swa.partready)
+	err := <-swa.errWrite
+	if err != nil {
+		return err
+	}
+	if swa.wrote != swa.wroteAt {
+		return fmt.Errorf("streamWriterAt: detected hole in input: wrote %d but flushed %d", swa.wroteAt, swa.wrote)
+	}
+	return nil
+}
diff --git a/services/keepstore/streamwriterat_test.go b/services/keepstore/streamwriterat_test.go
new file mode 100644
index 0000000000..fe6837e522
--- /dev/null
+++ b/services/keepstore/streamwriterat_test.go
@@ -0,0 +1,83 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package keepstore
+
+import (
+	"bytes"
+	"sync"
+
+	. "gopkg.in/check.v1"
+)
+
+var _ = Suite(&streamWriterAtSuite{})
+
+type streamWriterAtSuite struct{}
+
+func (s *streamWriterAtSuite) TestPartSizes(c *C) {
+	for partsize := 1; partsize < 5; partsize++ {
+		for writesize := 1; writesize < 5; writesize++ {
+			for datasize := 1; datasize < 100; datasize += 13 {
+				for bufextra := 0; bufextra < 5; bufextra++ {
+					c.Logf("=== partsize %d writesize %d datasize %d bufextra %d", partsize, writesize, datasize, bufextra)
+					outbuf := bytes.NewBuffer(nil)
+					indata := make([]byte, datasize)
+					for i := range indata {
+						indata[i] = byte(i)
+					}
+					swa := newStreamWriterAt(outbuf, partsize, make([]byte, datasize+bufextra))
+					var wg sync.WaitGroup
+					for pos := 0; pos < datasize; pos += writesize {
+						pos := pos
+						wg.Add(1)
+						go func() {
+							defer wg.Done()
+							endpos := pos + writesize
+							if endpos > datasize {
+								endpos = datasize
+							}
+							swa.WriteAt(indata[pos:endpos], int64(pos))
+						}()
+					}
+					wg.Wait()
+					swa.Close()
+					c.Check(outbuf.Bytes(), DeepEquals, indata)
+				}
+			}
+		}
+	}
+}
+
+func (s *streamWriterAtSuite) TestOverflow(c *C) {
+	for offset := -1; offset < 2; offset++ {
+		buf := make([]byte, 50)
+		swa := newStreamWriterAt(bytes.NewBuffer(nil), 20, buf)
+		_, err := swa.WriteAt([]byte("foo"), int64(len(buf)+offset))
+		c.Check(err, NotNil)
+		err = swa.Close()
+		c.Check(err, IsNil)
+	}
+}
+
+func (s *streamWriterAtSuite) TestIncompleteWrite(c *C) {
+	for _, partsize := range []int{20, 25} {
+		for _, bufsize := range []int{50, 55, 60} {
+			for offset := 0; offset < 3; offset++ {
+				swa := newStreamWriterAt(bytes.NewBuffer(nil), partsize, make([]byte, bufsize))
+				_, err := swa.WriteAt(make([]byte, 1), 49)
+				c.Check(err, IsNil)
+				_, err = swa.WriteAt(make([]byte, 46), int64(offset))
+				c.Check(err, IsNil)
+				err = swa.Close()
+				c.Check(err, NotNil)
+				c.Check(swa.WroteAt(), Equals, 47)
+				if offset == 0 {
+					c.Check(swa.Wrote(), Equals, 40/partsize*partsize)
+				} else {
+					c.Check(swa.Wrote(), Equals, 0)
+				}
+			}
+		}
+	}
+}
diff --git a/services/keepstore/trash_worker.go b/services/keepstore/trash_worker.go
index 5e8a5a963c..d704c3a7d5 100644
--- a/services/keepstore/trash_worker.go
+++ b/services/keepstore/trash_worker.go
@@ -5,68 +5,183 @@
 package keepstore
 
 import (
-	"errors"
+	"context"
+	"sync"
+	"sync/atomic"
 	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"github.com/sirupsen/logrus"
+	"github.com/prometheus/client_golang/prometheus"
 )
 
-// RunTrashWorker processes the trash request queue.
-func RunTrashWorker(volmgr *RRVolumeManager, logger logrus.FieldLogger, cluster *arvados.Cluster, trashq *WorkQueue) {
-	for item := range trashq.NextItem {
-		trashRequest := item.(TrashRequest)
-		TrashItem(volmgr, logger, cluster, trashRequest)
-		trashq.DoneItem <- struct{}{}
-	}
+type TrashListItem struct {
+	Locator    string `json:"locator"`
+	BlockMtime int64  `json:"block_mtime"`
+	MountUUID  string `json:"mount_uuid"` // Target mount, or "" for "everywhere"
+}
+
+type trasher struct {
+	keepstore  *keepstore
+	todo       []TrashListItem
+	cond       *sync.Cond // lock guards todo accesses; cond broadcasts when todo becomes non-empty
+	inprogress atomic.Int64
 }
 
-// TrashItem deletes the indicated block from every writable volume.
-func TrashItem(volmgr *RRVolumeManager, logger logrus.FieldLogger, cluster *arvados.Cluster, trashRequest TrashRequest) {
-	reqMtime := time.Unix(0, trashRequest.BlockMtime)
-	if time.Since(reqMtime) < cluster.Collections.BlobSigningTTL.Duration() {
-		logger.Warnf("client asked to delete a %v old block %v (BlockMtime %d = %v), but my blobSignatureTTL is %v! Skipping.",
-			arvados.Duration(time.Since(reqMtime)),
-			trashRequest.Locator,
-			trashRequest.BlockMtime,
-			reqMtime,
-			cluster.Collections.BlobSigningTTL)
-		return
+func newTrasher(ctx context.Context, keepstore *keepstore, reg *prometheus.Registry) *trasher {
+	t := &trasher{
+		keepstore: keepstore,
+		cond:      sync.NewCond(&sync.Mutex{}),
+	}
+	reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "trash_queue_pending_entries",
+			Help:      "Number of queued trash requests",
+		},
+		func() float64 {
+			t.cond.L.Lock()
+			defer t.cond.L.Unlock()
+			return float64(len(t.todo))
+		},
+	))
+	reg.MustRegister(prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: "arvados",
+			Subsystem: "keepstore",
+			Name:      "trash_queue_inprogress_entries",
+			Help:      "Number of trash requests in progress",
+		},
+		func() float64 {
+			return float64(t.inprogress.Load())
+		},
+	))
+	if !keepstore.cluster.Collections.BlobTrash {
+		keepstore.logger.Info("not running trash worker because Collections.BlobTrash == false")
+		return t
 	}
 
-	var volumes []*VolumeMount
-	if uuid := trashRequest.MountUUID; uuid == "" {
-		volumes = volmgr.Mounts()
-	} else if mnt := volmgr.Lookup(uuid, false); mnt == nil {
-		logger.Warnf("trash request for nonexistent mount: %v", trashRequest)
-		return
-	} else if !mnt.KeepMount.AllowTrash {
-		logger.Warnf("trash request for mount with ReadOnly=true, AllowTrashWhenReadOnly=false: %v", trashRequest)
+	var mntsAllowTrash []*mount
+	for _, mnt := range t.keepstore.mounts {
+		if mnt.AllowTrash {
+			mntsAllowTrash = append(mntsAllowTrash, mnt)
+		}
+	}
+	if len(mntsAllowTrash) == 0 {
+		t.keepstore.logger.Info("not running trash worker because there are no writable or trashable volumes")
 	} else {
-		volumes = []*VolumeMount{mnt}
+		for i := 0; i < keepstore.cluster.Collections.BlobTrashConcurrency; i++ {
+			go t.runWorker(ctx, mntsAllowTrash)
+		}
 	}
+	return t
+}
+
+func (t *trasher) SetTrashList(newlist []TrashListItem) {
+	t.cond.L.Lock()
+	t.todo = newlist
+	t.cond.L.Unlock()
+	t.cond.Broadcast()
+}
 
-	for _, volume := range volumes {
-		mtime, err := volume.Mtime(trashRequest.Locator)
-		if err != nil {
-			logger.WithError(err).Errorf("%v Trash(%v)", volume, trashRequest.Locator)
-			continue
+func (t *trasher) runWorker(ctx context.Context, mntsAllowTrash []*mount) {
+	go func() {
+		<-ctx.Done()
+		t.cond.Broadcast()
+	}()
+	for {
+		t.cond.L.Lock()
+		for len(t.todo) == 0 && ctx.Err() == nil {
+			t.cond.Wait()
 		}
-		if trashRequest.BlockMtime != mtime.UnixNano() {
-			logger.Infof("%v Trash(%v): stored mtime %v does not match trash list value %v; skipping", volume, trashRequest.Locator, mtime.UnixNano(), trashRequest.BlockMtime)
-			continue
+		if ctx.Err() != nil {
+			t.cond.L.Unlock()
+			return
 		}
+		item := t.todo[0]
+		t.todo = t.todo[1:]
+		t.inprogress.Add(1)
+		t.cond.L.Unlock()
 
-		if !cluster.Collections.BlobTrash {
-			err = errors.New("skipping because Collections.BlobTrash is false")
-		} else {
-			err = volume.Trash(trashRequest.Locator)
-		}
+		func() {
+			defer t.inprogress.Add(-1)
+			logger := t.keepstore.logger.WithField("locator", item.Locator)
 
-		if err != nil {
-			logger.WithError(err).Errorf("%v Trash(%v)", volume, trashRequest.Locator)
-		} else {
-			logger.Infof("%v Trash(%v) OK", volume, trashRequest.Locator)
-		}
+			li, err := parseLocator(item.Locator)
+			if err != nil {
+				logger.Warn("ignoring trash request for invalid locator")
+				return
+			}
+
+			reqMtime := time.Unix(0, item.BlockMtime)
+			if time.Since(reqMtime) < t.keepstore.cluster.Collections.BlobSigningTTL.Duration() {
+				logger.Warnf("client asked to delete a %v old block (BlockMtime %d = %v), but my blobSignatureTTL is %v! Skipping.",
+					arvados.Duration(time.Since(reqMtime)),
+					item.BlockMtime,
+					reqMtime,
+					t.keepstore.cluster.Collections.BlobSigningTTL)
+				return
+			}
+
+			var mnts []*mount
+			if item.MountUUID == "" {
+				mnts = mntsAllowTrash
+			} else if mnt := t.keepstore.mounts[item.MountUUID]; mnt == nil {
+				logger.Warnf("ignoring trash request for nonexistent mount %s", item.MountUUID)
+				return
+			} else if !mnt.AllowTrash {
+				logger.Warnf("ignoring trash request for readonly mount %s with AllowTrashWhenReadOnly==false", item.MountUUID)
+				return
+			} else {
+				mnts = []*mount{mnt}
+			}
+
+			for _, mnt := range mnts {
+				logger := logger.WithField("mount", mnt.UUID)
+				mtime, err := mnt.Mtime(li.hash)
+				if err != nil {
+					logger.WithError(err).Error("error getting stored mtime")
+					continue
+				}
+				if !mtime.Equal(reqMtime) {
+					logger.Infof("stored mtime (%v) does not match trash list mtime (%v); skipping", mtime, reqMtime)
+					continue
+				}
+				err = mnt.BlockTrash(li.hash)
+				if err != nil {
+					logger.WithError(err).Info("error trashing block")
+					continue
+				}
+				logger.Info("block trashed")
+			}
+		}()
 	}
 }
+
+type trashEmptier struct{}
+
+func newTrashEmptier(ctx context.Context, ks *keepstore, reg *prometheus.Registry) *trashEmptier {
+	d := ks.cluster.Collections.BlobTrashCheckInterval.Duration()
+	if d <= 0 ||
+		!ks.cluster.Collections.BlobTrash ||
+		ks.cluster.Collections.BlobDeleteConcurrency <= 0 {
+		ks.logger.Infof("not running trash emptier because disabled by config (enabled=%t, interval=%v, concurrency=%d)", ks.cluster.Collections.BlobTrash, d, ks.cluster.Collections.BlobDeleteConcurrency)
+		return &trashEmptier{}
+	}
+	go func() {
+		ticker := time.NewTicker(d)
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+			}
+			for _, mnt := range ks.mounts {
+				if mnt.KeepMount.AllowTrash {
+					mnt.volume.EmptyTrash()
+				}
+			}
+		}
+	}()
+	return &trashEmptier{}
+}
diff --git a/services/keepstore/trash_worker_test.go b/services/keepstore/trash_worker_test.go
index a1648c52cc..0c304dbade 100644
--- a/services/keepstore/trash_worker_test.go
+++ b/services/keepstore/trash_worker_test.go
@@ -5,355 +5,198 @@
 package keepstore
 
 import (
-	"container/list"
 	"context"
+	"crypto/md5"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"sort"
 	"time"
 
-	"git.arvados.org/arvados.git/sdk/go/ctxlog"
-	"github.com/prometheus/client_golang/prometheus"
-	check "gopkg.in/check.v1"
+	"git.arvados.org/arvados.git/sdk/go/arvados"
+	. "gopkg.in/check.v1"
 )
 
-type TrashWorkerTestData struct {
-	Locator1    string
-	Block1      []byte
-	BlockMtime1 int64
-
-	Locator2    string
-	Block2      []byte
-	BlockMtime2 int64
-
-	CreateData      bool
-	CreateInVolume1 bool
-
-	UseTrashLifeTime bool
-	DifferentMtimes  bool
-
-	DeleteLocator    string
-	SpecifyMountUUID bool
-
-	ExpectLocator1 bool
-	ExpectLocator2 bool
-}
-
-// Delete block that does not exist in any of the keep volumes.
-// Expect no errors.
-func (s *HandlerSuite) TestTrashWorkerIntegration_GetNonExistingLocator(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: "5d41402abc4b2a76b9719d911017c592",
-		Block1:   []byte("hello"),
-
-		Locator2: "5d41402abc4b2a76b9719d911017c592",
-		Block2:   []byte("hello"),
-
-		CreateData: false,
-
-		DeleteLocator: "5d41402abc4b2a76b9719d911017c592",
-
-		ExpectLocator1: false,
-		ExpectLocator2: false,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Delete a block that exists on volume 1 of the keep servers. Expect
-// the second locator in volume 2 to be unaffected.
-func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInVolume1(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash2,
-		Block2:   TestBlock2,
-
-		CreateData: true,
-
-		DeleteLocator: TestHash, // first locator
-
-		ExpectLocator1: false,
-		ExpectLocator2: true,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Delete a block that exists on volume 2 of the keep servers. Expect
-// the first locator in volume 1 to be unaffected.
-func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInVolume2(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash2,
-		Block2:   TestBlock2,
-
-		CreateData: true,
-
-		DeleteLocator: TestHash2, // locator 2
-
-		ExpectLocator1: true,
-		ExpectLocator2: false,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Delete a block with matching mtime for locator in both
-// volumes. Expect locator to be deleted from both volumes.
-func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInBothVolumes(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash,
-		Block2:   TestBlock,
-
-		CreateData: true,
-
-		DeleteLocator: TestHash,
-
-		ExpectLocator1: false,
-		ExpectLocator2: false,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Same locator with different Mtimes exists in both volumes. Delete
-// the second and expect the first to be still around.
-func (s *HandlerSuite) TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash,
-		Block2:   TestBlock,
-
-		CreateData:      true,
-		DifferentMtimes: true,
-
-		DeleteLocator: TestHash,
-
-		ExpectLocator1: true,
-		ExpectLocator2: false,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Delete a block that exists on both volumes with matching mtimes,
-// but specify a MountUUID in the request so it only gets deleted from
-// the first volume.
-func (s *HandlerSuite) TestTrashWorkerIntegration_SpecifyMountUUID(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash,
-		Block2:   TestBlock,
-
-		CreateData: true,
-
-		DeleteLocator:    TestHash,
-		SpecifyMountUUID: true,
-
-		ExpectLocator1: true,
-		ExpectLocator2: true,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Two different locators in volume 1. Delete one of them. Expect the
-// other unaffected.
-func (s *HandlerSuite) TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash2,
-		Block2:   TestBlock2,
-
-		CreateData:      true,
-		CreateInVolume1: true,
-
-		DeleteLocator: TestHash, // locator 1
-
-		ExpectLocator1: false,
-		ExpectLocator2: true,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Allow default Trash Life time to be used. Thus, the newly created
-// block will not be deleted because its Mtime is within the trash
-// life time.
-func (s *HandlerSuite) TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(c *check.C) {
-	s.cluster.Collections.BlobTrash = true
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash2,
-		Block2:   TestBlock2,
-
-		CreateData:      true,
-		CreateInVolume1: true,
-
-		UseTrashLifeTime: true,
-
-		DeleteLocator: TestHash, // locator 1
-
-		// Since trash life time is in effect, block won't be deleted.
-		ExpectLocator1: true,
-		ExpectLocator2: true,
-	}
-	s.performTrashWorkerTest(c, testData)
-}
-
-// Delete a block with matching mtime for locator in both volumes, but
-// EnableDelete is false, so block won't be deleted.
-func (s *HandlerSuite) TestTrashWorkerIntegration_DisabledDelete(c *check.C) {
+func (s *routerSuite) TestTrashList_Clear(c *C) {
 	s.cluster.Collections.BlobTrash = false
-	testData := TrashWorkerTestData{
-		Locator1: TestHash,
-		Block1:   TestBlock,
-
-		Locator2: TestHash,
-		Block2:   TestBlock,
-
-		CreateData: true,
-
-		DeleteLocator: TestHash,
-
-		ExpectLocator1: true,
-		ExpectLocator2: true,
-	}
-	s.performTrashWorkerTest(c, testData)
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	resp := call(router, "PUT", "http://example/trash", s.cluster.SystemRootToken, []byte(`
+		[
+		 {
+		  "locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
+		  "block_mtime":1707249451308502672,
+		  "mount_uuid":"zzzzz-nyw5e-000000000000000"
+		 }
+		]
+		`), nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(router.trasher.todo, DeepEquals, []TrashListItem{{
+		Locator:    "acbd18db4cc2f85cedef654fccc4a4d8+3",
+		BlockMtime: 1707249451308502672,
+		MountUUID:  "zzzzz-nyw5e-000000000000000",
+	}})
+
+	resp = call(router, "PUT", "http://example/trash", s.cluster.SystemRootToken, []byte("[]"), nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	c.Check(router.trasher.todo, HasLen, 0)
 }
 
-func (s *HandlerSuite) performTrashWorkerTest(c *check.C, testData TrashWorkerTestData) {
-	c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
-	// Replace the router's trashq -- which the worker goroutines
-	// started by setup() are now receiving from -- with a new
-	// one, so we can see what the handler sends to it.
-	trashq := NewWorkQueue()
-	s.handler.Handler.(*router).trashq = trashq
-
-	// Put test content
-	mounts := s.handler.volmgr.AllWritable()
-	if testData.CreateData {
-		mounts[0].Put(context.Background(), testData.Locator1, testData.Block1)
-		mounts[0].Put(context.Background(), testData.Locator1+".meta", []byte("metadata"))
-
-		if testData.CreateInVolume1 {
-			mounts[0].Put(context.Background(), testData.Locator2, testData.Block2)
-			mounts[0].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
-		} else {
-			mounts[1].Put(context.Background(), testData.Locator2, testData.Block2)
-			mounts[1].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
-		}
-	}
-
-	oldBlockTime := time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration() - time.Minute)
-
-	// Create TrashRequest for the test
-	trashRequest := TrashRequest{
-		Locator:    testData.DeleteLocator,
-		BlockMtime: oldBlockTime.UnixNano(),
-	}
-	if testData.SpecifyMountUUID {
-		trashRequest.MountUUID = s.handler.volmgr.Mounts()[0].UUID
-	}
-
-	// Run trash worker and put the trashRequest on trashq
-	trashList := list.New()
-	trashList.PushBack(trashRequest)
-
-	if !testData.UseTrashLifeTime {
-		// Trash worker would not delete block if its Mtime is
-		// within trash life time. Back-date the block to
-		// allow the deletion to succeed.
-		for _, mnt := range mounts {
-			mnt.Volume.(*MockVolume).Timestamps[testData.DeleteLocator] = oldBlockTime
-			if testData.DifferentMtimes {
-				oldBlockTime = oldBlockTime.Add(time.Second)
+func (s *routerSuite) TestTrashList_Execute(c *C) {
+	s.cluster.Collections.BlobTrashConcurrency = 1
+	s.cluster.Volumes = map[string]arvados.Volume{
+		"zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "stub"},
+		"zzzzz-nyw5e-222222222222222": {Replication: 1, Driver: "stub", ReadOnly: true},
+		"zzzzz-nyw5e-333333333333333": {Replication: 1, Driver: "stub", ReadOnly: true, AllowTrashWhenReadOnly: true},
+	}
+	router, cancel := testRouter(c, s.cluster, nil)
+	defer cancel()
+
+	var mounts []struct {
+		UUID     string
+		DeviceID string `json:"device_id"`
+	}
+	resp := call(router, "GET", "http://example/mounts", s.cluster.SystemRootToken, nil, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+	err := json.Unmarshal(resp.Body.Bytes(), &mounts)
+	c.Assert(err, IsNil)
+	c.Assert(mounts, HasLen, 4)
+
+	// Sort mounts by UUID
+	sort.Slice(mounts, func(i, j int) bool {
+		return mounts[i].UUID < mounts[j].UUID
+	})
+
+	// Make vols (stub volumes) in same order as mounts
+	var vols []*stubVolume
+	for _, mount := range mounts {
+		vols = append(vols, router.keepstore.mounts[mount.UUID].volume.(*stubVolume))
+	}
+
+	// The "trial" loop below will construct the trashList which
+	// we'll send to trasher via router, plus a slice of checks
+	// which we'll run after the trasher has finished executing
+	// the list.
+	var trashList []TrashListItem
+	var checks []func()
+
+	tNew := time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration() / 2)
+	tOld := time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration() - time.Second)
+
+	for _, trial := range []struct {
+		comment        string
+		storeMtime     []time.Time
+		trashListItems []TrashListItem
+		expectData     []bool
+	}{
+		{
+			comment:    "timestamp matches, but is not old enough to trash => skip",
+			storeMtime: []time.Time{tNew},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tNew.UnixNano(),
+					MountUUID:  mounts[0].UUID,
+				},
+			},
+			expectData: []bool{true},
+		},
+		{
+			comment:    "timestamp matches, and is old enough => trash",
+			storeMtime: []time.Time{tOld},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tOld.UnixNano(),
+					MountUUID:  mounts[0].UUID,
+				},
+			},
+			expectData: []bool{false},
+		},
+		{
+			comment:    "timestamp matches and is old enough on mount 0, but the request specifies mount 1, where timestamp does not match => skip",
+			storeMtime: []time.Time{tOld, tOld.Add(-time.Second)},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tOld.UnixNano(),
+					MountUUID:  mounts[1].UUID,
+				},
+			},
+			expectData: []bool{true, true},
+		},
+		{
+			comment:    "MountUUID unspecified => trash from any mount where timestamp matches, leave alone elsewhere",
+			storeMtime: []time.Time{tOld, tOld.Add(-time.Second)},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tOld.UnixNano(),
+				},
+			},
+			expectData: []bool{false, true},
+		},
+		{
+			comment:    "MountUUID unspecified => trash from multiple mounts if timestamp matches, but skip readonly volumes unless AllowTrashWhenReadOnly",
+			storeMtime: []time.Time{tOld, tOld, tOld, tOld},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tOld.UnixNano(),
+				},
+			},
+			expectData: []bool{false, false, true, false},
+		},
+		{
+			comment:    "readonly MountUUID specified => skip",
+			storeMtime: []time.Time{tOld, tOld, tOld},
+			trashListItems: []TrashListItem{
+				{
+					BlockMtime: tOld.UnixNano(),
+					MountUUID:  mounts[2].UUID,
+				},
+			},
+			expectData: []bool{true, true, true},
+		},
+	} {
+		trial := trial
+		data := []byte(fmt.Sprintf("trial %+v", trial))
+		hash := fmt.Sprintf("%x", md5.Sum(data))
+		for i, t := range trial.storeMtime {
+			if t.IsZero() {
+				continue
 			}
+			err := vols[i].BlockWrite(context.Background(), hash, data)
+			c.Assert(err, IsNil)
+			err = vols[i].blockTouchWithTime(hash, t)
+			c.Assert(err, IsNil)
 		}
-	}
-	go RunTrashWorker(s.handler.volmgr, ctxlog.TestLogger(c), s.cluster, trashq)
-
-	// Install gate so all local operations block until we say go
-	gate := make(chan struct{})
-	for _, mnt := range mounts {
-		mnt.Volume.(*MockVolume).Gate = gate
-	}
-
-	assertStatusItem := func(k string, expect float64) {
-		if v := getStatusItem(s.handler, "TrashQueue", k); v != expect {
-			c.Errorf("Got %s %v, expected %v", k, v, expect)
-		}
-	}
-
-	assertStatusItem("InProgress", 0)
-	assertStatusItem("Queued", 0)
-
-	listLen := trashList.Len()
-	trashq.ReplaceQueue(trashList)
-
-	// Wait for worker to take request(s)
-	expectEqualWithin(c, time.Second, listLen, func() interface{} { return trashq.Status().InProgress })
-
-	// Ensure status.json also reports work is happening
-	assertStatusItem("InProgress", float64(1))
-	assertStatusItem("Queued", float64(listLen-1))
-
-	// Let worker proceed
-	close(gate)
-
-	// Wait for worker to finish
-	expectEqualWithin(c, time.Second, 0, func() interface{} { return trashq.Status().InProgress })
-
-	// Verify Locator1 to be un/deleted as expected
-	buf := make([]byte, BlockSize)
-	size, err := GetBlock(context.Background(), s.handler.volmgr, testData.Locator1, buf, nil)
-	if testData.ExpectLocator1 {
-		if size == 0 || err != nil {
-			c.Errorf("Expected Locator1 to be still present: %s", testData.Locator1)
+		for _, item := range trial.trashListItems {
+			item.Locator = fmt.Sprintf("%s+%d", hash, len(data))
+			trashList = append(trashList, item)
 		}
-	} else {
-		if size > 0 || err == nil {
-			c.Errorf("Expected Locator1 to be deleted: %s", testData.Locator1)
+		for i, expect := range trial.expectData {
+			i, expect := i, expect
+			checks = append(checks, func() {
+				ent := vols[i].data[hash]
+				dataPresent := ent.data != nil && ent.trash.IsZero()
+				c.Check(dataPresent, Equals, expect, Commentf("%s mount %d (%s) expect present=%v but got len(ent.data)=%d ent.trash=%v // %s\nlog:\n%s", hash, i, vols[i].params.UUID, expect, len(ent.data), !ent.trash.IsZero(), trial.comment, vols[i].stubLog.String()))
+			})
 		}
 	}
 
-	// Verify Locator2 to be un/deleted as expected
-	if testData.Locator1 != testData.Locator2 {
-		size, err = GetBlock(context.Background(), s.handler.volmgr, testData.Locator2, buf, nil)
-		if testData.ExpectLocator2 {
-			if size == 0 || err != nil {
-				c.Errorf("Expected Locator2 to be still present: %s", testData.Locator2)
-			}
-		} else {
-			if size > 0 || err == nil {
-				c.Errorf("Expected Locator2 to be deleted: %s", testData.Locator2)
-			}
+	listjson, err := json.Marshal(trashList)
+	resp = call(router, "PUT", "http://example/trash", s.cluster.SystemRootToken, listjson, nil)
+	c.Check(resp.Code, Equals, http.StatusOK)
+
+	for {
+		router.trasher.cond.L.Lock()
+		todolen := len(router.trasher.todo)
+		router.trasher.cond.L.Unlock()
+		if todolen == 0 && router.trasher.inprogress.Load() == 0 {
+			break
 		}
+		time.Sleep(time.Millisecond)
 	}
 
-	// The DifferentMtimes test puts the same locator in two
-	// different volumes, but only one copy has an Mtime matching
-	// the trash request.
-	if testData.DifferentMtimes {
-		locatorFoundIn := 0
-		for _, volume := range s.handler.volmgr.AllReadable() {
-			buf := make([]byte, BlockSize)
-			if _, err := volume.Get(context.Background(), testData.Locator1, buf); err == nil {
-				locatorFoundIn = locatorFoundIn + 1
-			}
-		}
-		c.Check(locatorFoundIn, check.Equals, 1)
+	for _, check := range checks {
+		check()
 	}
 }
diff --git a/services/keepstore/unix_volume.go b/services/keepstore/unix_volume.go
index dee4bdc1c1..98edfae14d 100644
--- a/services/keepstore/unix_volume.go
+++ b/services/keepstore/unix_volume.go
@@ -28,16 +28,22 @@ import (
 )
 
 func init() {
-	driver["Directory"] = newDirectoryVolume
+	driver["Directory"] = newUnixVolume
 }
 
-func newDirectoryVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
-	v := &UnixVolume{cluster: cluster, volume: volume, logger: logger, metrics: metrics}
-	err := json.Unmarshal(volume.DriverParameters, &v)
+func newUnixVolume(params newVolumeParams) (volume, error) {
+	v := &UnixVolume{
+		uuid:    params.UUID,
+		cluster: params.Cluster,
+		volume:  params.ConfigVolume,
+		logger:  params.Logger,
+		metrics: params.MetricsVecs,
+	}
+	err := json.Unmarshal(params.ConfigVolume.DriverParameters, &v)
 	if err != nil {
 		return nil, err
 	}
-	v.logger = v.logger.WithField("Volume", v.String())
+	v.logger = v.logger.WithField("Volume", v.DeviceID())
 	return v, v.check()
 }
 
@@ -53,7 +59,7 @@ func (v *UnixVolume) check() error {
 	}
 
 	// Set up prometheus metrics
-	lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+	lbls := prometheus.Labels{"device_id": v.DeviceID()}
 	v.os.stats.opsCounters, v.os.stats.errCounters, v.os.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
 
 	_, err := v.os.Stat(v.Root)
@@ -65,6 +71,7 @@ type UnixVolume struct {
 	Root      string // path to the volume's root directory
 	Serialize bool
 
+	uuid    string
 	cluster *arvados.Cluster
 	volume  arvados.Volume
 	logger  logrus.FieldLogger
@@ -77,15 +84,16 @@ type UnixVolume struct {
 	os osWithStats
 }
 
-// GetDeviceID returns a globally unique ID for the volume's root
+// DeviceID returns a globally unique ID for the volume's root
 // directory, consisting of the filesystem's UUID and the path from
 // filesystem root to storage directory, joined by "/". For example,
 // the device ID for a local directory "/mnt/xvda1/keep" might be
 // "fa0b6166-3b55-4994-bd3f-92f4e00a1bb0/keep".
-func (v *UnixVolume) GetDeviceID() string {
+func (v *UnixVolume) DeviceID() string {
 	giveup := func(f string, args ...interface{}) string {
-		v.logger.Infof(f+"; using blank DeviceID for volume %s", append(args, v)...)
-		return ""
+		v.logger.Infof(f+"; using hostname:path for volume %s", append(args, v.uuid)...)
+		host, _ := os.Hostname()
+		return host + ":" + v.Root
 	}
 	buf, err := exec.Command("findmnt", "--noheadings", "--target", v.Root).CombinedOutput()
 	if err != nil {
@@ -154,12 +162,9 @@ func (v *UnixVolume) GetDeviceID() string {
 	return giveup("could not find entry in %q matching %q", udir, dev)
 }
 
-// Touch sets the timestamp for the given locator to the current time
-func (v *UnixVolume) Touch(loc string) error {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	p := v.blockPath(loc)
+// BlockTouch sets the timestamp for the given locator to the current time
+func (v *UnixVolume) BlockTouch(hash string) error {
+	p := v.blockPath(hash)
 	f, err := v.os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
 	if err != nil {
 		return err
@@ -203,7 +208,7 @@ func (v *UnixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader
 		return err
 	}
 	defer f.Close()
-	return fn(NewCountingReader(ioutil.NopCloser(f), v.os.stats.TickInBytes))
+	return fn(newCountingReader(ioutil.NopCloser(f), v.os.stats.TickInBytes))
 }
 
 // stat is os.Stat() with some extra sanity checks.
@@ -213,72 +218,45 @@ func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
 		if stat.Size() < 0 {
 			err = os.ErrInvalid
 		} else if stat.Size() > BlockSize {
-			err = TooLongError
+			err = errTooLarge
 		}
 	}
 	return stat, err
 }
 
-// Get retrieves a block, copies it to the given slice, and returns
-// the number of bytes copied.
-func (v *UnixVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
-	return getWithPipe(ctx, loc, buf, v)
-}
-
-// ReadBlock implements BlockReader.
-func (v *UnixVolume) ReadBlock(ctx context.Context, loc string, w io.Writer) error {
-	path := v.blockPath(loc)
+// BlockRead reads a block from the volume.
+func (v *UnixVolume) BlockRead(ctx context.Context, hash string, w io.Writer) (int, error) {
+	path := v.blockPath(hash)
 	stat, err := v.stat(path)
 	if err != nil {
-		return v.translateError(err)
+		return 0, v.translateError(err)
 	}
-	return v.getFunc(ctx, path, func(rdr io.Reader) error {
-		n, err := io.Copy(w, rdr)
+	var n int64
+	err = v.getFunc(ctx, path, func(rdr io.Reader) error {
+		n, err = io.Copy(w, rdr)
 		if err == nil && n != stat.Size() {
 			err = io.ErrUnexpectedEOF
 		}
 		return err
 	})
+	return int(n), err
 }
 
-// Compare returns nil if Get(loc) would return the same content as
-// expect. It is functionally equivalent to Get() followed by
-// bytes.Compare(), but uses less memory.
-func (v *UnixVolume) Compare(ctx context.Context, loc string, expect []byte) error {
-	path := v.blockPath(loc)
-	if _, err := v.stat(path); err != nil {
-		return v.translateError(err)
-	}
-	return v.getFunc(ctx, path, func(rdr io.Reader) error {
-		return compareReaderWithBuf(ctx, rdr, expect, loc[:32])
-	})
-}
-
-// Put stores a block of data identified by the locator string
-// "loc".  It returns nil on success.  If the volume is full, it
-// returns a FullError.  If the write fails due to some other error,
-// that error is returned.
-func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
-	return putWithPipe(ctx, loc, block, v)
-}
-
-// WriteBlock implements BlockWriter.
-func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader) error {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	if v.IsFull() {
-		return FullError
+// BlockWrite stores a block on the volume. If it already exists, its
+// timestamp is updated.
+func (v *UnixVolume) BlockWrite(ctx context.Context, hash string, data []byte) error {
+	if v.isFull() {
+		return errFull
 	}
-	bdir := v.blockDir(loc)
+	bdir := v.blockDir(hash)
 	if err := os.MkdirAll(bdir, 0755); err != nil {
 		return fmt.Errorf("error creating directory %s: %s", bdir, err)
 	}
 
-	bpath := v.blockPath(loc)
-	tmpfile, err := v.os.TempFile(bdir, "tmp"+loc)
+	bpath := v.blockPath(hash)
+	tmpfile, err := v.os.TempFile(bdir, "tmp"+hash)
 	if err != nil {
-		return fmt.Errorf("TempFile(%s, tmp%s) failed: %s", bdir, loc, err)
+		return fmt.Errorf("TempFile(%s, tmp%s) failed: %s", bdir, hash, err)
 	}
 	defer v.os.Remove(tmpfile.Name())
 	defer tmpfile.Close()
@@ -287,7 +265,7 @@ func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
 		return err
 	}
 	defer v.unlock()
-	n, err := io.Copy(tmpfile, rdr)
+	n, err := tmpfile.Write(data)
 	v.os.stats.TickOutBytes(uint64(n))
 	if err != nil {
 		return fmt.Errorf("error writing %s: %s", bpath, err)
@@ -312,56 +290,10 @@ func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
 	return nil
 }
 
-// Status returns a VolumeStatus struct describing the volume's
-// current state, or nil if an error occurs.
-func (v *UnixVolume) Status() *VolumeStatus {
-	fi, err := v.os.Stat(v.Root)
-	if err != nil {
-		v.logger.WithError(err).Error("stat failed")
-		return nil
-	}
-	// uint64() cast here supports GOOS=darwin where Dev is
-	// int32. If the device number is negative, the unsigned
-	// devnum won't be the real device number any more, but that's
-	// fine -- all we care about is getting the same number each
-	// time.
-	devnum := uint64(fi.Sys().(*syscall.Stat_t).Dev)
-
-	var fs syscall.Statfs_t
-	if err := syscall.Statfs(v.Root, &fs); err != nil {
-		v.logger.WithError(err).Error("statfs failed")
-		return nil
-	}
-	// These calculations match the way df calculates disk usage:
-	// "free" space is measured by fs.Bavail, but "used" space
-	// uses fs.Blocks - fs.Bfree.
-	free := fs.Bavail * uint64(fs.Bsize)
-	used := (fs.Blocks - fs.Bfree) * uint64(fs.Bsize)
-	return &VolumeStatus{
-		MountPoint: v.Root,
-		DeviceNum:  devnum,
-		BytesFree:  free,
-		BytesUsed:  used,
-	}
-}
-
 var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
 var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
-// IndexTo writes (to the given Writer) a list of blocks found on this
-// volume which begin with the specified prefix. If the prefix is an
-// empty string, IndexTo writes a complete list of blocks.
-//
-// Each block is given in the format
-//
-//	locator+size modification-time {newline}
-//
-// e.g.:
-//
-//	e4df392f86be161ca6ed3773a962b8f3+67108864 1388894303
-//	e4d41e6fd68460e0e3fc18cc746959d2+67108864 1377796043
-//	e4de7a2810f5554cd39b36d8ddb132ff+67108864 1388701136
-func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
+func (v *UnixVolume) Index(ctx context.Context, prefix string, w io.Writer) error {
 	rootdir, err := v.os.Open(v.Root)
 	if err != nil {
 		return err
@@ -374,6 +306,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
 		return err
 	}
 	for _, subdir := range subdirs {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
 		if !strings.HasPrefix(subdir, prefix) && !strings.HasPrefix(prefix, subdir) {
 			// prefix excludes all blocks stored in this dir
 			continue
@@ -388,7 +323,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
 			v.os.stats.TickOps("readdir")
 			v.os.stats.Tick(&v.os.stats.ReaddirOps)
 			dirents, err = os.ReadDir(blockdirpath)
-			if err == nil {
+			if ctx.Err() != nil {
+				return ctx.Err()
+			} else if err == nil {
 				break
 			} else if attempt < 5 && strings.Contains(err.Error(), "errno 523") {
 				// EBADCOOKIE (NFS stopped accepting
@@ -402,6 +339,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
 		}
 
 		for _, dirent := range dirents {
+			if ctx.Err() != nil {
+				return ctx.Err()
+			}
 			fileInfo, err := dirent.Info()
 			if os.IsNotExist(err) {
 				// File disappeared between ReadDir() and now
@@ -430,11 +370,11 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
 	return nil
 }
 
-// Trash trashes the block data from the unix storage
-// If BlobTrashLifetime == 0, the block is deleted
-// Else, the block is renamed as path/{loc}.trash.{deadline},
-// where deadline = now + BlobTrashLifetime
-func (v *UnixVolume) Trash(loc string) error {
+// BlockTrash trashes the block data from the unix storage.  If
+// BlobTrashLifetime == 0, the block is deleted; otherwise, the block
+// is renamed as path/{loc}.trash.{deadline}, where deadline = now +
+// BlobTrashLifetime.
+func (v *UnixVolume) BlockTrash(loc string) error {
 	// Touch() must be called before calling Write() on a block.  Touch()
 	// also uses lockfile().  This avoids a race condition between Write()
 	// and Trash() because either (a) the file will be trashed and Touch()
@@ -442,9 +382,6 @@ func (v *UnixVolume) Trash(loc string) error {
 	// be re-written), or (b) Touch() will update the file's timestamp and
 	// Trash() will read the correct up-to-date timestamp and choose not to
 	// trash the file.
-	if v.volume.ReadOnly && !v.volume.AllowTrashWhenReadOnly {
-		return MethodDisabledError
-	}
 	if err := v.lock(context.TODO()); err != nil {
 		return err
 	}
@@ -477,17 +414,13 @@ func (v *UnixVolume) Trash(loc string) error {
 	return v.os.Rename(p, fmt.Sprintf("%v.trash.%d", p, time.Now().Add(v.cluster.Collections.BlobTrashLifetime.Duration()).Unix()))
 }
 
-// Untrash moves block from trash back into store
+// BlockUntrash moves block from trash back into store
 // Look for path/{loc}.trash.{deadline} in storage,
 // and rename the first such file as path/{loc}
-func (v *UnixVolume) Untrash(loc string) (err error) {
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-
+func (v *UnixVolume) BlockUntrash(hash string) error {
 	v.os.stats.TickOps("readdir")
 	v.os.stats.Tick(&v.os.stats.ReaddirOps)
-	files, err := ioutil.ReadDir(v.blockDir(loc))
+	files, err := ioutil.ReadDir(v.blockDir(hash))
 	if err != nil {
 		return err
 	}
@@ -497,11 +430,11 @@ func (v *UnixVolume) Untrash(loc string) (err error) {
 	}
 
 	foundTrash := false
-	prefix := fmt.Sprintf("%v.trash.", loc)
+	prefix := fmt.Sprintf("%v.trash.", hash)
 	for _, f := range files {
 		if strings.HasPrefix(f.Name(), prefix) {
 			foundTrash = true
-			err = v.os.Rename(v.blockPath(f.Name()), v.blockPath(loc))
+			err = v.os.Rename(v.blockPath(f.Name()), v.blockPath(hash))
 			if err == nil {
 				break
 			}
@@ -512,7 +445,7 @@ func (v *UnixVolume) Untrash(loc string) (err error) {
 		return os.ErrNotExist
 	}
 
-	return
+	return nil
 }
 
 // blockDir returns the fully qualified directory name for the directory
@@ -527,9 +460,9 @@ func (v *UnixVolume) blockPath(loc string) string {
 	return filepath.Join(v.blockDir(loc), loc)
 }
 
-// IsFull returns true if the free space on the volume is less than
+// isFull returns true if the free space on the volume is less than
 // MinFreeKilobytes.
-func (v *UnixVolume) IsFull() (isFull bool) {
+func (v *UnixVolume) isFull() (isFull bool) {
 	fullSymlink := v.Root + "/full"
 
 	// Check if the volume has been marked as full in the last hour.
@@ -543,9 +476,9 @@ func (v *UnixVolume) IsFull() (isFull bool) {
 	}
 
 	if avail, err := v.FreeDiskSpace(); err == nil {
-		isFull = avail < MinFreeKilobytes
+		isFull = avail < BlockSize
 	} else {
-		v.logger.WithError(err).Errorf("%s: FreeDiskSpace failed", v)
+		v.logger.WithError(err).Errorf("%s: FreeDiskSpace failed", v.DeviceID())
 		isFull = false
 	}
 
@@ -565,15 +498,11 @@ func (v *UnixVolume) FreeDiskSpace() (free uint64, err error) {
 	if err == nil {
 		// Statfs output is not guaranteed to measure free
 		// space in terms of 1K blocks.
-		free = fs.Bavail * uint64(fs.Bsize) / 1024
+		free = fs.Bavail * uint64(fs.Bsize)
 	}
 	return
 }
 
-func (v *UnixVolume) String() string {
-	return fmt.Sprintf("[UnixVolume %s]", v.Root)
-}
-
 // InternalStats returns I/O and filesystem ops counters.
 func (v *UnixVolume) InternalStats() interface{} {
 	return &v.os.stats
diff --git a/services/keepstore/unix_volume_test.go b/services/keepstore/unix_volume_test.go
index 75d9b22de5..a8dc4e809a 100644
--- a/services/keepstore/unix_volume_test.go
+++ b/services/keepstore/unix_volume_test.go
@@ -17,82 +17,74 @@ import (
 	"syscall"
 	"time"
 
-	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/sirupsen/logrus"
 	check "gopkg.in/check.v1"
 )
 
-type TestableUnixVolume struct {
+type testableUnixVolume struct {
 	UnixVolume
 	t TB
 }
 
-// PutRaw writes a Keep block directly into a UnixVolume, even if
-// the volume is readonly.
-func (v *TestableUnixVolume) PutRaw(locator string, data []byte) {
-	defer func(orig bool) {
-		v.volume.ReadOnly = orig
-	}(v.volume.ReadOnly)
-	v.volume.ReadOnly = false
-	err := v.Put(context.Background(), locator, data)
+func (v *testableUnixVolume) TouchWithDate(locator string, lastPut time.Time) {
+	err := syscall.Utime(v.blockPath(locator), &syscall.Utimbuf{Actime: lastPut.Unix(), Modtime: lastPut.Unix()})
 	if err != nil {
 		v.t.Fatal(err)
 	}
 }
 
-func (v *TestableUnixVolume) TouchWithDate(locator string, lastPut time.Time) {
-	err := syscall.Utime(v.blockPath(locator), &syscall.Utimbuf{lastPut.Unix(), lastPut.Unix()})
-	if err != nil {
-		v.t.Fatal(err)
-	}
-}
-
-func (v *TestableUnixVolume) Teardown() {
+func (v *testableUnixVolume) Teardown() {
 	if err := os.RemoveAll(v.Root); err != nil {
 		v.t.Error(err)
 	}
 }
 
-func (v *TestableUnixVolume) ReadWriteOperationLabelValues() (r, w string) {
+func (v *testableUnixVolume) ReadWriteOperationLabelValues() (r, w string) {
 	return "open", "create"
 }
 
-var _ = check.Suite(&UnixVolumeSuite{})
+var _ = check.Suite(&unixVolumeSuite{})
 
-type UnixVolumeSuite struct {
-	cluster *arvados.Cluster
-	volumes []*TestableUnixVolume
-	metrics *volumeMetricsVecs
+type unixVolumeSuite struct {
+	params  newVolumeParams
+	volumes []*testableUnixVolume
 }
 
-func (s *UnixVolumeSuite) SetUpTest(c *check.C) {
-	s.cluster = testCluster(c)
-	s.metrics = newVolumeMetricsVecs(prometheus.NewRegistry())
+func (s *unixVolumeSuite) SetUpTest(c *check.C) {
+	logger := ctxlog.TestLogger(c)
+	reg := prometheus.NewRegistry()
+	s.params = newVolumeParams{
+		UUID:        "zzzzz-nyw5e-999999999999999",
+		Cluster:     testCluster(c),
+		Logger:      logger,
+		MetricsVecs: newVolumeMetricsVecs(reg),
+		BufferPool:  newBufferPool(logger, 8, reg),
+	}
 }
 
-func (s *UnixVolumeSuite) TearDownTest(c *check.C) {
+func (s *unixVolumeSuite) TearDownTest(c *check.C) {
 	for _, v := range s.volumes {
 		v.Teardown()
 	}
 }
 
-func (s *UnixVolumeSuite) newTestableUnixVolume(c *check.C, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs, serialize bool) *TestableUnixVolume {
+func (s *unixVolumeSuite) newTestableUnixVolume(c *check.C, params newVolumeParams, serialize bool) *testableUnixVolume {
 	d, err := ioutil.TempDir("", "volume_test")
 	c.Check(err, check.IsNil)
 	var locker sync.Locker
 	if serialize {
 		locker = &sync.Mutex{}
 	}
-	v := &TestableUnixVolume{
+	v := &testableUnixVolume{
 		UnixVolume: UnixVolume{
 			Root:    d,
 			locker:  locker,
-			cluster: cluster,
-			logger:  ctxlog.TestLogger(c),
-			volume:  volume,
-			metrics: metrics,
+			uuid:    params.UUID,
+			cluster: params.Cluster,
+			logger:  params.Logger,
+			volume:  params.ConfigVolume,
+			metrics: params.MetricsVecs,
 		},
 		t: c,
 	}
@@ -101,56 +93,52 @@ func (s *UnixVolumeSuite) newTestableUnixVolume(c *check.C, cluster *arvados.Clu
 	return v
 }
 
-// serialize = false; readonly = false
-func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTests(c *check.C) {
-	DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableUnixVolume(c, cluster, volume, metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeWithGenericTests(c *check.C) {
+	DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableUnixVolume(c, params, false)
 	})
 }
 
-// serialize = false; readonly = true
-func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTestsReadOnly(c *check.C) {
-	DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableUnixVolume(c, cluster, volume, metrics, true)
+func (s *unixVolumeSuite) TestUnixVolumeWithGenericTests_ReadOnly(c *check.C) {
+	DoGenericVolumeTests(c, true, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableUnixVolume(c, params, false)
 	})
 }
 
-// serialize = true; readonly = false
-func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTestsSerialized(c *check.C) {
-	DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableUnixVolume(c, cluster, volume, metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeWithGenericTests_Serialized(c *check.C) {
+	DoGenericVolumeTests(c, false, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableUnixVolume(c, params, true)
 	})
 }
 
-// serialize = true; readonly = true
-func (s *UnixVolumeSuite) TestUnixVolumeHandlersWithGenericVolumeTests(c *check.C) {
-	DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
-		return s.newTestableUnixVolume(c, cluster, volume, metrics, true)
+func (s *unixVolumeSuite) TestUnixVolumeWithGenericTests_Readonly_Serialized(c *check.C) {
+	DoGenericVolumeTests(c, true, func(t TB, params newVolumeParams) TestableVolume {
+		return s.newTestableUnixVolume(c, params, true)
 	})
 }
 
-func (s *UnixVolumeSuite) TestGetNotFound(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestGetNotFound(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, true)
 	defer v.Teardown()
-	v.Put(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 
-	buf := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash2, buf)
+	buf := bytes.NewBuffer(nil)
+	_, err := v.BlockRead(context.Background(), TestHash2, buf)
 	switch {
 	case os.IsNotExist(err):
 		break
 	case err == nil:
-		c.Errorf("Read should have failed, returned %+q", buf[:n])
+		c.Errorf("Read should have failed, returned %+q", buf.Bytes())
 	default:
 		c.Errorf("Read expected ErrNotExist, got: %s", err)
 	}
 }
 
-func (s *UnixVolumeSuite) TestPut(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestPut(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
-	err := v.Put(context.Background(), TestHash, TestBlock)
+	err := v.BlockWrite(context.Background(), TestHash, TestBlock)
 	if err != nil {
 		c.Error(err)
 	}
@@ -163,89 +151,41 @@ func (s *UnixVolumeSuite) TestPut(c *check.C) {
 	}
 }
 
-func (s *UnixVolumeSuite) TestPutBadVolume(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestPutBadVolume(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
 	err := os.RemoveAll(v.Root)
 	c.Assert(err, check.IsNil)
-	err = v.Put(context.Background(), TestHash, TestBlock)
+	err = v.BlockWrite(context.Background(), TestHash, TestBlock)
 	c.Check(err, check.IsNil)
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeReadonly(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{ReadOnly: true, Replication: 1}, s.metrics, false)
-	defer v.Teardown()
-
-	v.PutRaw(TestHash, TestBlock)
-
-	buf := make([]byte, BlockSize)
-	_, err := v.Get(context.Background(), TestHash, buf)
-	if err != nil {
-		c.Errorf("got err %v, expected nil", err)
-	}
-
-	err = v.Put(context.Background(), TestHash, TestBlock)
-	if err != MethodDisabledError {
-		c.Errorf("got err %v, expected MethodDisabledError", err)
-	}
-
-	err = v.Touch(TestHash)
-	if err != MethodDisabledError {
-		c.Errorf("got err %v, expected MethodDisabledError", err)
-	}
-
-	err = v.Trash(TestHash)
-	if err != MethodDisabledError {
-		c.Errorf("got err %v, expected MethodDisabledError", err)
-	}
-}
-
-func (s *UnixVolumeSuite) TestIsFull(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestIsFull(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
 	fullPath := v.Root + "/full"
 	now := fmt.Sprintf("%d", time.Now().Unix())
 	os.Symlink(now, fullPath)
-	if !v.IsFull() {
-		c.Errorf("%s: claims not to be full", v)
+	if !v.isFull() {
+		c.Error("volume claims not to be full")
 	}
 	os.Remove(fullPath)
 
 	// Test with an expired /full link.
 	expired := fmt.Sprintf("%d", time.Now().Unix()-3605)
 	os.Symlink(expired, fullPath)
-	if v.IsFull() {
-		c.Errorf("%s: should no longer be full", v)
+	if v.isFull() {
+		c.Error("volume should no longer be full")
 	}
 }
 
-func (s *UnixVolumeSuite) TestNodeStatus(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeGetFuncWorkerError(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
-	// Get node status and make a basic sanity check.
-	volinfo := v.Status()
-	if volinfo.MountPoint != v.Root {
-		c.Errorf("GetNodeStatus mount_point %s, expected %s", volinfo.MountPoint, v.Root)
-	}
-	if volinfo.DeviceNum == 0 {
-		c.Errorf("uninitialized device_num in %v", volinfo)
-	}
-	if volinfo.BytesFree == 0 {
-		c.Errorf("uninitialized bytes_free in %v", volinfo)
-	}
-	if volinfo.BytesUsed == 0 {
-		c.Errorf("uninitialized bytes_used in %v", volinfo)
-	}
-}
-
-func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerError(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
-	defer v.Teardown()
-
-	v.Put(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	mockErr := errors.New("Mock error")
 	err := v.getFunc(context.Background(), v.blockPath(TestHash), func(rdr io.Reader) error {
 		return mockErr
@@ -255,8 +195,8 @@ func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerError(c *check.C) {
 	}
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeGetFuncFileError(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeGetFuncFileError(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
 	funcCalled := false
@@ -272,11 +212,11 @@ func (s *UnixVolumeSuite) TestUnixVolumeGetFuncFileError(c *check.C) {
 	}
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerWaitsOnMutex(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeGetFuncWorkerWaitsOnMutex(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, false)
 	defer v.Teardown()
 
-	v.Put(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 
 	mtx := NewMockMutex()
 	v.locker = mtx
@@ -307,80 +247,66 @@ func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerWaitsOnMutex(c *check.C) {
 	}
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeCompare(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
-	defer v.Teardown()
-
-	v.Put(context.Background(), TestHash, TestBlock)
-	err := v.Compare(context.Background(), TestHash, TestBlock)
-	if err != nil {
-		c.Errorf("Got err %q, expected nil", err)
-	}
+type MockMutex struct {
+	AllowLock   chan struct{}
+	AllowUnlock chan struct{}
+}
 
-	err = v.Compare(context.Background(), TestHash, []byte("baddata"))
-	if err != CollisionError {
-		c.Errorf("Got err %q, expected %q", err, CollisionError)
+func NewMockMutex() *MockMutex {
+	return &MockMutex{
+		AllowLock:   make(chan struct{}),
+		AllowUnlock: make(chan struct{}),
 	}
+}
 
-	v.Put(context.Background(), TestHash, []byte("baddata"))
-	err = v.Compare(context.Background(), TestHash, TestBlock)
-	if err != DiskHashError {
-		c.Errorf("Got err %q, expected %q", err, DiskHashError)
-	}
+// Lock waits for someone to send to AllowLock.
+func (m *MockMutex) Lock() {
+	<-m.AllowLock
+}
 
-	if os.Getuid() == 0 {
-		c.Log("skipping 'permission denied' check when running as root")
-	} else {
-		p := fmt.Sprintf("%s/%s/%s", v.Root, TestHash[:3], TestHash)
-		err = os.Chmod(p, 000)
-		c.Assert(err, check.IsNil)
-		err = v.Compare(context.Background(), TestHash, TestBlock)
-		c.Check(err, check.ErrorMatches, ".*permission denied.*")
-	}
+// Unlock waits for someone to send to AllowUnlock.
+func (m *MockMutex) Unlock() {
+	<-m.AllowUnlock
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeContextCancelPut(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, true)
+func (s *unixVolumeSuite) TestUnixVolumeContextCancelBlockWrite(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, true)
 	defer v.Teardown()
 	v.locker.Lock()
+	defer v.locker.Unlock()
 	ctx, cancel := context.WithCancel(context.Background())
 	go func() {
 		time.Sleep(50 * time.Millisecond)
 		cancel()
-		time.Sleep(50 * time.Millisecond)
-		v.locker.Unlock()
 	}()
-	err := v.Put(ctx, TestHash, TestBlock)
+	err := v.BlockWrite(ctx, TestHash, TestBlock)
 	if err != context.Canceled {
-		c.Errorf("Put() returned %s -- expected short read / canceled", err)
+		c.Errorf("BlockWrite() returned %s -- expected short read / canceled", err)
 	}
 }
 
-func (s *UnixVolumeSuite) TestUnixVolumeContextCancelGet(c *check.C) {
-	v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestUnixVolumeContextCancelBlockRead(c *check.C) {
+	v := s.newTestableUnixVolume(c, s.params, true)
 	defer v.Teardown()
-	bpath := v.blockPath(TestHash)
-	v.PutRaw(TestHash, TestBlock)
-	os.Remove(bpath)
-	err := syscall.Mkfifo(bpath, 0600)
+	err := v.BlockWrite(context.Background(), TestHash, TestBlock)
 	if err != nil {
-		c.Fatalf("Mkfifo %s: %s", bpath, err)
+		c.Fatal(err)
 	}
-	defer os.Remove(bpath)
 	ctx, cancel := context.WithCancel(context.Background())
+	v.locker.Lock()
+	defer v.locker.Unlock()
 	go func() {
 		time.Sleep(50 * time.Millisecond)
 		cancel()
 	}()
-	buf := make([]byte, len(TestBlock))
-	n, err := v.Get(ctx, TestHash, buf)
-	if n == len(TestBlock) || err != context.Canceled {
-		c.Errorf("Get() returned %d, %s -- expected short read / canceled", n, err)
+	n, err := v.BlockRead(ctx, TestHash, io.Discard)
+	if n > 0 || err != context.Canceled {
+		c.Errorf("BlockRead() returned %d, %s -- expected short read / canceled", n, err)
 	}
 }
 
-func (s *UnixVolumeSuite) TestStats(c *check.C) {
-	vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestStats(c *check.C) {
+	vol := s.newTestableUnixVolume(c, s.params, false)
 	stats := func() string {
 		buf, err := json.Marshal(vol.InternalStats())
 		c.Check(err, check.IsNil)
@@ -390,8 +316,7 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
 	c.Check(stats(), check.Matches, `.*"StatOps":1,.*`) // (*UnixVolume)check() calls Stat() once
 	c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
 
-	loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-	_, err := vol.Get(context.Background(), loc, make([]byte, 3))
+	_, err := vol.BlockRead(context.Background(), fooHash, io.Discard)
 	c.Check(err, check.NotNil)
 	c.Check(stats(), check.Matches, `.*"StatOps":[^0],.*`)
 	c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
@@ -400,33 +325,33 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
 	c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
 	c.Check(stats(), check.Matches, `.*"CreateOps":0,.*`)
 
-	err = vol.Put(context.Background(), loc, []byte("foo"))
+	err = vol.BlockWrite(context.Background(), fooHash, []byte("foo"))
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
 	c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
 	c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
 	c.Check(stats(), check.Matches, `.*"UtimesOps":1,.*`)
 
-	err = vol.Touch(loc)
+	err = vol.BlockTouch(fooHash)
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"FlockOps":1,.*`)
 	c.Check(stats(), check.Matches, `.*"OpenOps":1,.*`)
 	c.Check(stats(), check.Matches, `.*"UtimesOps":2,.*`)
 
-	_, err = vol.Get(context.Background(), loc, make([]byte, 3))
-	c.Check(err, check.IsNil)
-	err = vol.Compare(context.Background(), loc, []byte("foo"))
+	buf := bytes.NewBuffer(nil)
+	_, err = vol.BlockRead(context.Background(), fooHash, buf)
 	c.Check(err, check.IsNil)
-	c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
-	c.Check(stats(), check.Matches, `.*"OpenOps":3,.*`)
+	c.Check(buf.String(), check.Equals, "foo")
+	c.Check(stats(), check.Matches, `.*"InBytes":3,.*`)
+	c.Check(stats(), check.Matches, `.*"OpenOps":2,.*`)
 
-	err = vol.Trash(loc)
+	err = vol.BlockTrash(fooHash)
 	c.Check(err, check.IsNil)
 	c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
 }
 
-func (s *UnixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
-	vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+func (s *unixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
+	vol := s.newTestableUnixVolume(c, s.params, false)
 
 	err := os.Mkdir(vol.UnixVolume.Root+"/aaa", 0777)
 	c.Assert(err, check.IsNil)
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go
index f597ff5781..41a0eba86f 100644
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -6,426 +6,35 @@ package keepstore
 
 import (
 	"context"
-	"crypto/rand"
-	"fmt"
 	"io"
-	"math/big"
-	"sort"
-	"sync/atomic"
 	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"github.com/sirupsen/logrus"
 )
 
-type BlockWriter interface {
-	// WriteBlock reads all data from r, writes it to a backing
-	// store as "loc", and returns the number of bytes written.
-	WriteBlock(ctx context.Context, loc string, r io.Reader) error
-}
-
-type BlockReader interface {
-	// ReadBlock retrieves data previously stored as "loc" and
-	// writes it to w.
-	ReadBlock(ctx context.Context, loc string, w io.Writer) error
-}
-
-var driver = map[string]func(*arvados.Cluster, arvados.Volume, logrus.FieldLogger, *volumeMetricsVecs) (Volume, error){}
-
-// A Volume is an interface representing a Keep back-end storage unit:
-// for example, a single mounted disk, a RAID array, an Amazon S3 volume,
-// etc.
-type Volume interface {
-	// Get a block: copy the block data into buf, and return the
-	// number of bytes copied.
-	//
-	// loc is guaranteed to consist of 32 or more lowercase hex
-	// digits.
-	//
-	// Get should not verify the integrity of the data: it should
-	// just return whatever was found in its backing
-	// store. (Integrity checking is the caller's responsibility.)
-	//
-	// If an error is encountered that prevents it from
-	// retrieving the data, that error should be returned so the
-	// caller can log (and send to the client) a more useful
-	// message.
-	//
-	// If the error is "not found", and there's no particular
-	// reason to expect the block to be found (other than that a
-	// caller is asking for it), the returned error should satisfy
-	// os.IsNotExist(err): this is a normal condition and will not
-	// be logged as an error (except that a 404 will appear in the
-	// access log if the block is not found on any other volumes
-	// either).
-	//
-	// If the data in the backing store is bigger than len(buf),
-	// then Get is permitted to return an error without reading
-	// any of the data.
-	//
-	// len(buf) will not exceed BlockSize.
-	Get(ctx context.Context, loc string, buf []byte) (int, error)
-
-	// Compare the given data with the stored data (i.e., what Get
-	// would return). If equal, return nil. If not, return
-	// CollisionError or DiskHashError (depending on whether the
-	// data on disk matches the expected hash), or whatever error
-	// was encountered opening/reading the stored data.
-	Compare(ctx context.Context, loc string, data []byte) error
-
-	// Put writes a block to an underlying storage device.
-	//
-	// loc is as described in Get.
-	//
-	// len(block) is guaranteed to be between 0 and BlockSize.
-	//
-	// If a block is already stored under the same name (loc) with
-	// different content, Put must either overwrite the existing
-	// data with the new data or return a non-nil error. When
-	// overwriting existing data, it must never leave the storage
-	// device in an inconsistent state: a subsequent call to Get
-	// must return either the entire old block, the entire new
-	// block, or an error. (An implementation that cannot peform
-	// atomic updates must leave the old data alone and return an
-	// error.)
-	//
-	// Put also sets the timestamp for the given locator to the
-	// current time.
-	//
-	// Put must return a non-nil error unless it can guarantee
-	// that the entire block has been written and flushed to
-	// persistent storage, and that its timestamp is current. Of
-	// course, this guarantee is only as good as the underlying
-	// storage device, but it is Put's responsibility to at least
-	// get whatever guarantee is offered by the storage device.
-	//
-	// Put should not verify that loc==hash(block): this is the
-	// caller's responsibility.
-	Put(ctx context.Context, loc string, block []byte) error
-
-	// Touch sets the timestamp for the given locator to the
-	// current time.
-	//
-	// loc is as described in Get.
-	//
-	// If invoked at time t0, Touch must guarantee that a
-	// subsequent call to Mtime will return a timestamp no older
-	// than {t0 minus one second}. For example, if Touch is called
-	// at 2015-07-07T01:23:45.67890123Z, it is acceptable for a
-	// subsequent Mtime to return any of the following:
-	//
-	//   - 2015-07-07T01:23:45.00000000Z
-	//   - 2015-07-07T01:23:45.67890123Z
-	//   - 2015-07-07T01:23:46.67890123Z
-	//   - 2015-07-08T00:00:00.00000000Z
-	//
-	// It is not acceptable for a subsequente Mtime to return
-	// either of the following:
-	//
-	//   - 2015-07-07T00:00:00.00000000Z -- ERROR
-	//   - 2015-07-07T01:23:44.00000000Z -- ERROR
-	//
-	// Touch must return a non-nil error if the timestamp cannot
-	// be updated.
-	Touch(loc string) error
-
-	// Mtime returns the stored timestamp for the given locator.
-	//
-	// loc is as described in Get.
-	//
-	// Mtime must return a non-nil error if the given block is not
-	// found or the timestamp could not be retrieved.
-	Mtime(loc string) (time.Time, error)
-
-	// IndexTo writes a complete list of locators with the given
-	// prefix for which Get() can retrieve data.
-	//
-	// prefix consists of zero or more lowercase hexadecimal
-	// digits.
-	//
-	// Each locator must be written to the given writer using the
-	// following format:
-	//
-	//   loc "+" size " " timestamp "\n"
-	//
-	// where:
-	//
-	//   - size is the number of bytes of content, given as a
-	//     decimal number with one or more digits
-	//
-	//   - timestamp is the timestamp stored for the locator,
-	//     given as a decimal number of seconds after January 1,
-	//     1970 UTC.
-	//
-	// IndexTo must not write any other data to writer: for
-	// example, it must not write any blank lines.
-	//
-	// If an error makes it impossible to provide a complete
-	// index, IndexTo must return a non-nil error. It is
-	// acceptable to return a non-nil error after writing a
-	// partial index to writer.
-	//
-	// The resulting index is not expected to be sorted in any
-	// particular order.
-	IndexTo(prefix string, writer io.Writer) error
-
-	// Trash moves the block data from the underlying storage
-	// device to trash area. The block then stays in trash for
-	// BlobTrashLifetime before it is actually deleted.
-	//
-	// loc is as described in Get.
-	//
-	// If the timestamp for the given locator is newer than
-	// BlobSigningTTL, Trash must not trash the data.
-	//
-	// If a Trash operation overlaps with any Touch or Put
-	// operations on the same locator, the implementation must
-	// ensure one of the following outcomes:
-	//
-	//   - Touch and Put return a non-nil error, or
-	//   - Trash does not trash the block, or
-	//   - Both of the above.
-	//
-	// If it is possible for the storage device to be accessed by
-	// a different process or host, the synchronization mechanism
-	// should also guard against races with other processes and
-	// hosts. If such a mechanism is not available, there must be
-	// a mechanism for detecting unsafe configurations, alerting
-	// the operator, and aborting or falling back to a read-only
-	// state. In other words, running multiple keepstore processes
-	// with the same underlying storage device must either work
-	// reliably or fail outright.
-	//
-	// Corollary: A successful Touch or Put guarantees a block
-	// will not be trashed for at least BlobSigningTTL seconds.
-	Trash(loc string) error
-
-	// Untrash moves block from trash back into store
-	Untrash(loc string) error
-
-	// Status returns a *VolumeStatus representing the current
-	// in-use and available storage capacity and an
-	// implementation-specific volume identifier (e.g., "mount
-	// point" for a UnixVolume).
-	Status() *VolumeStatus
-
-	// String returns an identifying label for this volume,
-	// suitable for including in log messages. It should contain
-	// enough information to uniquely identify the underlying
-	// storage device, but should not contain any credentials or
-	// secrets.
-	String() string
-
-	// EmptyTrash looks for trashed blocks that exceeded
-	// BlobTrashLifetime and deletes them from the volume.
+// volume is the interface to a back-end storage device.
+type volume interface {
+	BlockRead(ctx context.Context, hash string, writeTo io.Writer) (int, error)
+	BlockWrite(ctx context.Context, hash string, data []byte) error
+	DeviceID() string
+	BlockTouch(hash string) error
+	BlockTrash(hash string) error
+	BlockUntrash(hash string) error
+	Index(ctx context.Context, prefix string, writeTo io.Writer) error
+	Mtime(hash string) (time.Time, error)
 	EmptyTrash()
-
-	// Return a globally unique ID of the underlying storage
-	// device if possible, otherwise "".
-	GetDeviceID() string
-}
-
-// A VolumeWithExamples provides example configs to display in the
-// -help message.
-type VolumeWithExamples interface {
-	Volume
-	Examples() []Volume
 }
 
-// A VolumeManager tells callers which volumes can read, which volumes
-// can write, and on which volume the next write should be attempted.
-type VolumeManager interface {
-	// Mounts returns all mounts (volume attachments).
-	Mounts() []*VolumeMount
-
-	// Lookup returns the mount with the given UUID. Returns nil
-	// if the mount does not exist. If write==true, returns nil if
-	// the mount is not writable.
-	Lookup(uuid string, write bool) *VolumeMount
-
-	// AllReadable returns all mounts.
-	AllReadable() []*VolumeMount
-
-	// AllWritable returns all mounts that aren't known to be in
-	// a read-only state. (There is no guarantee that a write to
-	// one will succeed, though.)
-	AllWritable() []*VolumeMount
-
-	// NextWritable returns the volume where the next new block
-	// should be written. A VolumeManager can select a volume in
-	// order to distribute activity across spindles, fill up disks
-	// with more free space, etc.
-	NextWritable() *VolumeMount
-
-	// VolumeStats returns the ioStats used for tracking stats for
-	// the given Volume.
-	VolumeStats(Volume) *ioStats
-
-	// Close shuts down the volume manager cleanly.
-	Close()
-}
-
-// A VolumeMount is an attachment of a Volume to a VolumeManager.
-type VolumeMount struct {
-	arvados.KeepMount
-	Volume
-}
-
-// Generate a UUID the way API server would for a "KeepVolumeMount"
-// object.
-func (*VolumeMount) generateUUID() string {
-	var max big.Int
-	_, ok := max.SetString("zzzzzzzzzzzzzzz", 36)
-	if !ok {
-		panic("big.Int parse failed")
-	}
-	r, err := rand.Int(rand.Reader, &max)
-	if err != nil {
-		panic(err)
-	}
-	return fmt.Sprintf("zzzzz-ivpuk-%015s", r.Text(36))
-}
-
-// RRVolumeManager is a round-robin VolumeManager: the Nth call to
-// NextWritable returns the (N % len(writables))th writable Volume
-// (where writables are all Volumes v where v.Writable()==true).
-type RRVolumeManager struct {
-	mounts    []*VolumeMount
-	mountMap  map[string]*VolumeMount
-	readables []*VolumeMount
-	writables []*VolumeMount
-	counter   uint32
-	iostats   map[Volume]*ioStats
-}
-
-func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, myURL arvados.URL, metrics *volumeMetricsVecs) (*RRVolumeManager, error) {
-	vm := &RRVolumeManager{
-		iostats: make(map[Volume]*ioStats),
-	}
-	vm.mountMap = make(map[string]*VolumeMount)
-	for uuid, cfgvol := range cluster.Volumes {
-		va, ok := cfgvol.AccessViaHosts[myURL]
-		if !ok && len(cfgvol.AccessViaHosts) > 0 {
-			continue
-		}
-		dri, ok := driver[cfgvol.Driver]
-		if !ok {
-			return nil, fmt.Errorf("volume %s: invalid driver %q", uuid, cfgvol.Driver)
-		}
-		vol, err := dri(cluster, cfgvol, logger, metrics)
-		if err != nil {
-			return nil, fmt.Errorf("error initializing volume %s: %s", uuid, err)
-		}
-		sc := cfgvol.StorageClasses
-		if len(sc) == 0 {
-			sc = map[string]bool{"default": true}
-		}
-		repl := cfgvol.Replication
-		if repl < 1 {
-			repl = 1
-		}
-		mnt := &VolumeMount{
-			KeepMount: arvados.KeepMount{
-				UUID:           uuid,
-				DeviceID:       vol.GetDeviceID(),
-				AllowWrite:     !va.ReadOnly && !cfgvol.ReadOnly,
-				AllowTrash:     !va.ReadOnly && (!cfgvol.ReadOnly || cfgvol.AllowTrashWhenReadOnly),
-				Replication:    repl,
-				StorageClasses: sc,
-			},
-			Volume: vol,
-		}
-		vm.iostats[vol] = &ioStats{}
-		vm.mounts = append(vm.mounts, mnt)
-		vm.mountMap[uuid] = mnt
-		vm.readables = append(vm.readables, mnt)
-		if mnt.KeepMount.AllowWrite {
-			vm.writables = append(vm.writables, mnt)
-		}
-		logger.Printf("started volume %s (%s), AllowWrite=%v, AllowTrash=%v", uuid, vol, mnt.AllowWrite, mnt.AllowTrash)
-	}
-	// pri(mnt): return highest priority of any storage class
-	// offered by mnt
-	pri := func(mnt *VolumeMount) int {
-		any, best := false, 0
-		for class := range mnt.KeepMount.StorageClasses {
-			if p := cluster.StorageClasses[class].Priority; !any || best < p {
-				best = p
-				any = true
-			}
-		}
-		return best
-	}
-	// less(a,b): sort first by highest priority of any offered
-	// storage class (highest->lowest), then by volume UUID
-	less := func(a, b *VolumeMount) bool {
-		if pa, pb := pri(a), pri(b); pa != pb {
-			return pa > pb
-		} else {
-			return a.KeepMount.UUID < b.KeepMount.UUID
-		}
-	}
-	sort.Slice(vm.readables, func(i, j int) bool {
-		return less(vm.readables[i], vm.readables[j])
-	})
-	sort.Slice(vm.writables, func(i, j int) bool {
-		return less(vm.writables[i], vm.writables[j])
-	})
-	sort.Slice(vm.mounts, func(i, j int) bool {
-		return less(vm.mounts[i], vm.mounts[j])
-	})
-	return vm, nil
-}
-
-func (vm *RRVolumeManager) Mounts() []*VolumeMount {
-	return vm.mounts
-}
-
-func (vm *RRVolumeManager) Lookup(uuid string, needWrite bool) *VolumeMount {
-	if mnt, ok := vm.mountMap[uuid]; ok && (!needWrite || mnt.AllowWrite) {
-		return mnt
-	}
-	return nil
-}
-
-// AllReadable returns an array of all readable volumes
-func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
-	return vm.readables
-}
-
-// AllWritable returns writable volumes, sorted by priority/uuid. Used
-// by CompareAndTouch to ensure higher-priority volumes are checked
-// first.
-func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
-	return vm.writables
-}
-
-// NextWritable returns writable volumes, rotated by vm.counter so
-// each volume gets a turn to be first. Used by PutBlock to distribute
-// new data across available volumes.
-func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
-	if len(vm.writables) == 0 {
-		return nil
-	}
-	offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
-	return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
-}
-
-// VolumeStats returns an ioStats for the given volume.
-func (vm *RRVolumeManager) VolumeStats(v Volume) *ioStats {
-	return vm.iostats[v]
-}
-
-// Close the RRVolumeManager
-func (vm *RRVolumeManager) Close() {
-}
+type volumeDriver func(newVolumeParams) (volume, error)
 
-// VolumeStatus describes the current condition of a volume
-type VolumeStatus struct {
-	MountPoint string
-	DeviceNum  uint64
-	BytesFree  uint64
-	BytesUsed  uint64
+type newVolumeParams struct {
+	UUID         string
+	Cluster      *arvados.Cluster
+	ConfigVolume arvados.Volume
+	Logger       logrus.FieldLogger
+	MetricsVecs  *volumeMetricsVecs
+	BufferPool   *bufferPool
 }
 
 // ioStats tracks I/O statistics for a volume or server
diff --git a/services/keepstore/volume_generic_test.go b/services/keepstore/volume_generic_test.go
index 2180412431..22667743dd 100644
--- a/services/keepstore/volume_generic_test.go
+++ b/services/keepstore/volume_generic_test.go
@@ -9,11 +9,13 @@ import (
 	"context"
 	"crypto/md5"
 	"fmt"
+	"io"
 	"os"
 	"regexp"
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
@@ -39,7 +41,7 @@ type TB interface {
 // A TestableVolumeFactory returns a new TestableVolume. The factory
 // function, and the TestableVolume it returns, can use "t" to write
 // logs, fail the current test, etc.
-type TestableVolumeFactory func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume
+type TestableVolumeFactory func(t TB, params newVolumeParams) TestableVolume
 
 // DoGenericVolumeTests runs a set of tests that every TestableVolume
 // is expected to pass. It calls factory to create a new TestableVolume
@@ -51,16 +53,6 @@ func DoGenericVolumeTests(t TB, readonly bool, factory TestableVolumeFactory) {
 	s.testGet(t, factory)
 	s.testGetNoSuchBlock(t, factory)
 
-	s.testCompareNonexistent(t, factory)
-	s.testCompareSameContent(t, factory, TestHash, TestBlock)
-	s.testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
-	s.testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
-	s.testCompareWithCollision(t, factory, TestHash, TestBlock, EmptyBlock)
-	s.testCompareWithCollision(t, factory, EmptyHash, EmptyBlock, TestBlock)
-	s.testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, []byte("baddata"))
-	s.testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, EmptyBlock)
-	s.testCompareWithCorruptStoredData(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
-
 	if !readonly {
 		s.testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
 		s.testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
@@ -76,7 +68,7 @@ func DoGenericVolumeTests(t TB, readonly bool, factory TestableVolumeFactory) {
 
 	s.testMtimeNoSuchBlock(t, factory)
 
-	s.testIndexTo(t, factory)
+	s.testIndex(t, factory)
 
 	if !readonly {
 		s.testDeleteNewBlock(t, factory)
@@ -84,33 +76,24 @@ func DoGenericVolumeTests(t TB, readonly bool, factory TestableVolumeFactory) {
 	}
 	s.testDeleteNoSuchBlock(t, factory)
 
-	s.testStatus(t, factory)
-
 	s.testMetrics(t, readonly, factory)
 
-	s.testString(t, factory)
-
-	if readonly {
-		s.testUpdateReadOnly(t, factory)
-	}
-
 	s.testGetConcurrent(t, factory)
 	if !readonly {
 		s.testPutConcurrent(t, factory)
-
 		s.testPutFullBlock(t, factory)
+		s.testTrashUntrash(t, readonly, factory)
+		s.testTrashEmptyTrashUntrash(t, factory)
 	}
-
-	s.testTrashUntrash(t, readonly, factory)
-	s.testTrashEmptyTrashUntrash(t, factory)
 }
 
 type genericVolumeSuite struct {
-	cluster  *arvados.Cluster
-	volume   arvados.Volume
-	logger   logrus.FieldLogger
-	metrics  *volumeMetricsVecs
-	registry *prometheus.Registry
+	cluster    *arvados.Cluster
+	volume     arvados.Volume
+	logger     logrus.FieldLogger
+	metrics    *volumeMetricsVecs
+	registry   *prometheus.Registry
+	bufferPool *bufferPool
 }
 
 func (s *genericVolumeSuite) setup(t TB) {
@@ -118,10 +101,18 @@ func (s *genericVolumeSuite) setup(t TB) {
 	s.logger = ctxlog.TestLogger(t)
 	s.registry = prometheus.NewRegistry()
 	s.metrics = newVolumeMetricsVecs(s.registry)
+	s.bufferPool = newBufferPool(s.logger, 8, s.registry)
 }
 
 func (s *genericVolumeSuite) newVolume(t TB, factory TestableVolumeFactory) TestableVolume {
-	return factory(t, s.cluster, s.volume, s.logger, s.metrics)
+	return factory(t, newVolumeParams{
+		UUID:         "zzzzz-nyw5e-999999999999999",
+		Cluster:      s.cluster,
+		ConfigVolume: s.volume,
+		Logger:       s.logger,
+		MetricsVecs:  s.metrics,
+		BufferPool:   s.bufferPool,
+	})
 }
 
 // Put a test block, get it and verify content
@@ -131,95 +122,30 @@ func (s *genericVolumeSuite) testGet(t TB, factory TestableVolumeFactory) {
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.PutRaw(TestHash, TestBlock)
-
-	buf := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash, buf)
+	err := v.BlockWrite(context.Background(), TestHash, TestBlock)
 	if err != nil {
-		t.Fatal(err)
-	}
-
-	if bytes.Compare(buf[:n], TestBlock) != 0 {
-		t.Errorf("expected %s, got %s", string(TestBlock), string(buf))
-	}
-}
-
-// Invoke get on a block that does not exist in volume; should result in error
-// Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	buf := make([]byte, BlockSize)
-	if _, err := v.Get(context.Background(), TestHash2, buf); err == nil {
-		t.Errorf("Expected error while getting non-existing block %v", TestHash2)
-	}
-}
-
-// Compare() should return os.ErrNotExist if the block does not exist.
-// Otherwise, writing new data causes CompareAndTouch() to generate
-// error logs even though everything is working fine.
-func (s *genericVolumeSuite) testCompareNonexistent(t TB, factory TestableVolumeFactory) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	err := v.Compare(context.Background(), TestHash, TestBlock)
-	if err != os.ErrNotExist {
-		t.Errorf("Got err %T %q, expected os.ErrNotExist", err, err)
+		t.Error(err)
 	}
-}
 
-// Put a test block and compare the locator with same content
-// Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testCompareSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	v.PutRaw(testHash, testData)
-
-	// Compare the block locator with same content
-	err := v.Compare(context.Background(), testHash, testData)
+	buf := bytes.NewBuffer(nil)
+	_, err = v.BlockRead(context.Background(), TestHash, buf)
 	if err != nil {
-		t.Errorf("Got err %q, expected nil", err)
+		t.Error(err)
 	}
-}
-
-// Test behavior of Compare() when stored data matches expected
-// checksum but differs from new data we need to store. Requires
-// testHash = md5(testDataA).
-//
-// Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testCompareWithCollision(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	v.PutRaw(testHash, testDataA)
-
-	// Compare the block locator with different content; collision
-	err := v.Compare(context.Background(), TestHash, testDataB)
-	if err == nil {
-		t.Errorf("Got err nil, expected error due to collision")
+	if bytes.Compare(buf.Bytes(), TestBlock) != 0 {
+		t.Errorf("expected %s, got %s", "foo", buf.String())
 	}
 }
 
-// Test behavior of Compare() when stored data has become
-// corrupted. Requires testHash = md5(testDataA) != md5(testDataB).
-//
+// Invoke get on a block that does not exist in volume; should result in error
 // Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testCompareWithCorruptStoredData(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+func (s *genericVolumeSuite) testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
 	s.setup(t)
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.PutRaw(TestHash, testDataB)
-
-	err := v.Compare(context.Background(), testHash, testDataA)
-	if err == nil || err == CollisionError {
-		t.Errorf("Got err %+v, expected non-collision error", err)
+	if _, err := v.BlockRead(context.Background(), barHash, io.Discard); err == nil {
+		t.Errorf("Expected error while getting non-existing block %v", barHash)
 	}
 }
 
@@ -230,12 +156,12 @@ func (s *genericVolumeSuite) testPutBlockWithSameContent(t TB, factory TestableV
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	err := v.Put(context.Background(), testHash, testData)
+	err := v.BlockWrite(context.Background(), testHash, testData)
 	if err != nil {
 		t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
 	}
 
-	err = v.Put(context.Background(), testHash, testData)
+	err = v.BlockWrite(context.Background(), testHash, testData)
 	if err != nil {
 		t.Errorf("Got err putting block second time %q: %q, expected nil", TestBlock, err)
 	}
@@ -248,23 +174,23 @@ func (s *genericVolumeSuite) testPutBlockWithDifferentContent(t TB, factory Test
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.PutRaw(testHash, testDataA)
+	v.BlockWrite(context.Background(), testHash, testDataA)
 
-	putErr := v.Put(context.Background(), testHash, testDataB)
-	buf := make([]byte, BlockSize)
-	n, getErr := v.Get(context.Background(), testHash, buf)
+	putErr := v.BlockWrite(context.Background(), testHash, testDataB)
+	buf := bytes.NewBuffer(nil)
+	_, getErr := v.BlockRead(context.Background(), testHash, buf)
 	if putErr == nil {
 		// Put must not return a nil error unless it has
 		// overwritten the existing data.
-		if bytes.Compare(buf[:n], testDataB) != 0 {
-			t.Errorf("Put succeeded but Get returned %+q, expected %+q", buf[:n], testDataB)
+		if buf.String() != string(testDataB) {
+			t.Errorf("Put succeeded but Get returned %+q, expected %+q", buf, testDataB)
 		}
 	} else {
 		// It is permissible for Put to fail, but it must
 		// leave us with either the original data, the new
 		// data, or nothing at all.
-		if getErr == nil && bytes.Compare(buf[:n], testDataA) != 0 && bytes.Compare(buf[:n], testDataB) != 0 {
-			t.Errorf("Put failed but Get returned %+q, which is neither %+q nor %+q", buf[:n], testDataA, testDataB)
+		if getErr == nil && buf.String() != string(testDataA) && buf.String() != string(testDataB) {
+			t.Errorf("Put failed but Get returned %+q, which is neither %+q nor %+q", buf, testDataA, testDataB)
 		}
 	}
 }
@@ -276,46 +202,48 @@ func (s *genericVolumeSuite) testPutMultipleBlocks(t TB, factory TestableVolumeF
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	err := v.Put(context.Background(), TestHash, TestBlock)
+	err := v.BlockWrite(context.Background(), TestHash, TestBlock)
 	if err != nil {
 		t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
 	}
 
-	err = v.Put(context.Background(), TestHash2, TestBlock2)
+	err = v.BlockWrite(context.Background(), TestHash2, TestBlock2)
 	if err != nil {
 		t.Errorf("Got err putting block %q: %q, expected nil", TestBlock2, err)
 	}
 
-	err = v.Put(context.Background(), TestHash3, TestBlock3)
+	err = v.BlockWrite(context.Background(), TestHash3, TestBlock3)
 	if err != nil {
 		t.Errorf("Got err putting block %q: %q, expected nil", TestBlock3, err)
 	}
 
-	data := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash, data)
+	buf := bytes.NewBuffer(nil)
+	_, err = v.BlockRead(context.Background(), TestHash, buf)
 	if err != nil {
 		t.Error(err)
 	} else {
-		if bytes.Compare(data[:n], TestBlock) != 0 {
-			t.Errorf("Block present, but got %+q, expected %+q", data[:n], TestBlock)
+		if bytes.Compare(buf.Bytes(), TestBlock) != 0 {
+			t.Errorf("Block present, but got %+q, expected %+q", buf, TestBlock)
 		}
 	}
 
-	n, err = v.Get(context.Background(), TestHash2, data)
+	buf.Reset()
+	_, err = v.BlockRead(context.Background(), TestHash2, buf)
 	if err != nil {
 		t.Error(err)
 	} else {
-		if bytes.Compare(data[:n], TestBlock2) != 0 {
-			t.Errorf("Block present, but got %+q, expected %+q", data[:n], TestBlock2)
+		if bytes.Compare(buf.Bytes(), TestBlock2) != 0 {
+			t.Errorf("Block present, but got %+q, expected %+q", buf, TestBlock2)
 		}
 	}
 
-	n, err = v.Get(context.Background(), TestHash3, data)
+	buf.Reset()
+	_, err = v.BlockRead(context.Background(), TestHash3, buf)
 	if err != nil {
 		t.Error(err)
 	} else {
-		if bytes.Compare(data[:n], TestBlock3) != 0 {
-			t.Errorf("Block present, but to %+q, expected %+q", data[:n], TestBlock3)
+		if bytes.Compare(buf.Bytes(), TestBlock3) != 0 {
+			t.Errorf("Block present, but to %+q, expected %+q", buf, TestBlock3)
 		}
 	}
 }
@@ -328,13 +256,13 @@ func (s *genericVolumeSuite) testPutAndTouch(t TB, factory TestableVolumeFactory
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	if err := v.Put(context.Background(), TestHash, TestBlock); err != nil {
+	if err := v.BlockWrite(context.Background(), TestHash, TestBlock); err != nil {
 		t.Error(err)
 	}
 
 	// We'll verify { t0 < threshold < t1 }, where t0 is the
-	// existing block's timestamp on disk before Put() and t1 is
-	// its timestamp after Put().
+	// existing block's timestamp on disk before BlockWrite() and t1 is
+	// its timestamp after BlockWrite().
 	threshold := time.Now().Add(-time.Second)
 
 	// Set the stored block's mtime far enough in the past that we
@@ -348,7 +276,7 @@ func (s *genericVolumeSuite) testPutAndTouch(t TB, factory TestableVolumeFactory
 	}
 
 	// Write the same block again.
-	if err := v.Put(context.Background(), TestHash, TestBlock); err != nil {
+	if err := v.BlockWrite(context.Background(), TestHash, TestBlock); err != nil {
 		t.Error(err)
 	}
 
@@ -367,7 +295,7 @@ func (s *genericVolumeSuite) testTouchNoSuchBlock(t TB, factory TestableVolumeFa
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	if err := v.Touch(TestHash); err == nil {
+	if err := v.BlockTouch(TestHash); err == nil {
 		t.Error("Expected error when attempted to touch a non-existing block")
 	}
 }
@@ -384,12 +312,12 @@ func (s *genericVolumeSuite) testMtimeNoSuchBlock(t TB, factory TestableVolumeFa
 	}
 }
 
-// Put a few blocks and invoke IndexTo with:
+// Put a few blocks and invoke Index with:
 // * no prefix
 // * with a prefix
 // * with no such prefix
 // Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
+func (s *genericVolumeSuite) testIndex(t TB, factory TestableVolumeFactory) {
 	s.setup(t)
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
@@ -400,9 +328,9 @@ func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
 	minMtime := time.Now().UTC().UnixNano()
 	minMtime -= minMtime % 1e9
 
-	v.PutRaw(TestHash, TestBlock)
-	v.PutRaw(TestHash2, TestBlock2)
-	v.PutRaw(TestHash3, TestBlock3)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash2, TestBlock2)
+	v.BlockWrite(context.Background(), TestHash3, TestBlock3)
 
 	maxMtime := time.Now().UTC().UnixNano()
 	if maxMtime%1e9 > 0 {
@@ -412,13 +340,13 @@ func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
 
 	// Blocks whose names aren't Keep hashes should be omitted from
 	// index
-	v.PutRaw("fffffffffnotreallyahashfffffffff", nil)
-	v.PutRaw("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", nil)
-	v.PutRaw("f0000000000000000000000000000000f", nil)
-	v.PutRaw("f00", nil)
+	v.BlockWrite(context.Background(), "fffffffffnotreallyahashfffffffff", nil)
+	v.BlockWrite(context.Background(), "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", nil)
+	v.BlockWrite(context.Background(), "f0000000000000000000000000000000f", nil)
+	v.BlockWrite(context.Background(), "f00", nil)
 
 	buf := new(bytes.Buffer)
-	v.IndexTo("", buf)
+	v.Index(context.Background(), "", buf)
 	indexRows := strings.Split(string(buf.Bytes()), "\n")
 	sort.Strings(indexRows)
 	sortedIndex := strings.Join(indexRows, "\n")
@@ -441,7 +369,7 @@ func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
 
 	for _, prefix := range []string{"f", "f15", "f15ac"} {
 		buf = new(bytes.Buffer)
-		v.IndexTo(prefix, buf)
+		v.Index(context.Background(), prefix, buf)
 
 		m, err := regexp.MatchString(`^`+TestHash2+`\+\d+ \d+\n$`, string(buf.Bytes()))
 		if err != nil {
@@ -453,11 +381,11 @@ func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
 
 	for _, prefix := range []string{"zero", "zip", "zilch"} {
 		buf = new(bytes.Buffer)
-		err := v.IndexTo(prefix, buf)
+		err := v.Index(context.Background(), prefix, buf)
 		if err != nil {
-			t.Errorf("Got error on IndexTo with no such prefix %v", err.Error())
+			t.Errorf("Got error on Index with no such prefix %v", err.Error())
 		} else if buf.Len() != 0 {
-			t.Errorf("Expected empty list for IndexTo with no such prefix %s", prefix)
+			t.Errorf("Expected empty list for Index with no such prefix %s", prefix)
 		}
 	}
 }
@@ -471,17 +399,17 @@ func (s *genericVolumeSuite) testDeleteNewBlock(t TB, factory TestableVolumeFact
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.Put(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 
-	if err := v.Trash(TestHash); err != nil {
+	if err := v.BlockTrash(TestHash); err != nil {
 		t.Error(err)
 	}
-	data := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash, data)
+	buf := bytes.NewBuffer(nil)
+	_, err := v.BlockRead(context.Background(), TestHash, buf)
 	if err != nil {
 		t.Error(err)
-	} else if bytes.Compare(data[:n], TestBlock) != 0 {
-		t.Errorf("Got data %+q, expected %+q", data[:n], TestBlock)
+	} else if buf.String() != string(TestBlock) {
+		t.Errorf("Got data %+q, expected %+q", buf.String(), TestBlock)
 	}
 }
 
@@ -494,36 +422,30 @@ func (s *genericVolumeSuite) testDeleteOldBlock(t TB, factory TestableVolumeFact
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.Put(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
-	if err := v.Trash(TestHash); err != nil {
+	if err := v.BlockTrash(TestHash); err != nil {
 		t.Error(err)
 	}
-	data := make([]byte, BlockSize)
-	if _, err := v.Get(context.Background(), TestHash, data); err == nil || !os.IsNotExist(err) {
+	if _, err := v.BlockRead(context.Background(), TestHash, io.Discard); err == nil || !os.IsNotExist(err) {
 		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	_, err := v.Mtime(TestHash)
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
-	}
-
-	err = v.Compare(context.Background(), TestHash, TestBlock)
-	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	indexBuf := new(bytes.Buffer)
-	v.IndexTo("", indexBuf)
+	v.Index(context.Background(), "", indexBuf)
 	if strings.Contains(string(indexBuf.Bytes()), TestHash) {
-		t.Fatalf("Found trashed block in IndexTo")
+		t.Errorf("Found trashed block in Index")
 	}
 
-	err = v.Touch(TestHash)
+	err = v.BlockTouch(TestHash)
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 }
 
@@ -534,33 +456,11 @@ func (s *genericVolumeSuite) testDeleteNoSuchBlock(t TB, factory TestableVolumeF
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	if err := v.Trash(TestHash2); err == nil {
+	if err := v.BlockTrash(TestHash2); err == nil {
 		t.Errorf("Expected error when attempting to delete a non-existing block")
 	}
 }
 
-// Invoke Status and verify that VolumeStatus is returned
-// Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testStatus(t TB, factory TestableVolumeFactory) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	// Get node status and make a basic sanity check.
-	status := v.Status()
-	if status.DeviceNum == 0 {
-		t.Errorf("uninitialized device_num in %v", status)
-	}
-
-	if status.BytesFree == 0 {
-		t.Errorf("uninitialized bytes_free in %v", status)
-	}
-
-	if status.BytesUsed == 0 {
-		t.Errorf("uninitialized bytes_used in %v", status)
-	}
-}
-
 func getValueFrom(cv *prometheus.CounterVec, lbls prometheus.Labels) float64 {
 	c, _ := cv.GetMetricWith(lbls)
 	pb := &dto.Metric{}
@@ -575,7 +475,7 @@ func (s *genericVolumeSuite) testMetrics(t TB, readonly bool, factory TestableVo
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	opsC, _, ioC := s.metrics.getCounterVecsFor(prometheus.Labels{"device_id": v.GetDeviceID()})
+	opsC, _, ioC := s.metrics.getCounterVecsFor(prometheus.Labels{"device_id": v.DeviceID()})
 
 	if ioC == nil {
 		t.Error("ioBytes CounterVec is nil")
@@ -600,7 +500,7 @@ func (s *genericVolumeSuite) testMetrics(t TB, readonly bool, factory TestableVo
 
 	// Test Put if volume is writable
 	if !readonly {
-		err = v.Put(context.Background(), TestHash, TestBlock)
+		err = v.BlockWrite(context.Background(), TestHash, TestBlock)
 		if err != nil {
 			t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
 		}
@@ -614,13 +514,12 @@ func (s *genericVolumeSuite) testMetrics(t TB, readonly bool, factory TestableVo
 			t.Error("ioBytes{direction=out} counter shouldn't be zero")
 		}
 	} else {
-		v.PutRaw(TestHash, TestBlock)
+		v.BlockWrite(context.Background(), TestHash, TestBlock)
 	}
 
-	buf := make([]byte, BlockSize)
-	_, err = v.Get(context.Background(), TestHash, buf)
+	_, err = v.BlockRead(context.Background(), TestHash, io.Discard)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Check that the operations counter increased
@@ -634,63 +533,6 @@ func (s *genericVolumeSuite) testMetrics(t TB, readonly bool, factory TestableVo
 	}
 }
 
-// Invoke String for the volume; expect non-empty result
-// Test should pass for both writable and read-only volumes
-func (s *genericVolumeSuite) testString(t TB, factory TestableVolumeFactory) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	if id := v.String(); len(id) == 0 {
-		t.Error("Got empty string for v.String()")
-	}
-}
-
-// Putting, updating, touching, and deleting blocks from a read-only volume result in error.
-// Test is intended for only read-only volumes
-func (s *genericVolumeSuite) testUpdateReadOnly(t TB, factory TestableVolumeFactory) {
-	s.setup(t)
-	v := s.newVolume(t, factory)
-	defer v.Teardown()
-
-	v.PutRaw(TestHash, TestBlock)
-	buf := make([]byte, BlockSize)
-
-	// Get from read-only volume should succeed
-	_, err := v.Get(context.Background(), TestHash, buf)
-	if err != nil {
-		t.Errorf("got err %v, expected nil", err)
-	}
-
-	// Put a new block to read-only volume should result in error
-	err = v.Put(context.Background(), TestHash2, TestBlock2)
-	if err == nil {
-		t.Errorf("Expected error when putting block in a read-only volume")
-	}
-	_, err = v.Get(context.Background(), TestHash2, buf)
-	if err == nil {
-		t.Errorf("Expected error when getting block whose put in read-only volume failed")
-	}
-
-	// Touch a block in read-only volume should result in error
-	err = v.Touch(TestHash)
-	if err == nil {
-		t.Errorf("Expected error when touching block in a read-only volume")
-	}
-
-	// Delete a block from a read-only volume should result in error
-	err = v.Trash(TestHash)
-	if err == nil {
-		t.Errorf("Expected error when deleting block from a read-only volume")
-	}
-
-	// Overwriting an existing block in read-only volume should result in error
-	err = v.Put(context.Background(), TestHash, TestBlock)
-	if err == nil {
-		t.Errorf("Expected error when putting block in a read-only volume")
-	}
-}
-
 // Launch concurrent Gets
 // Test should pass for both writable and read-only volumes
 func (s *genericVolumeSuite) testGetConcurrent(t TB, factory TestableVolumeFactory) {
@@ -698,43 +540,43 @@ func (s *genericVolumeSuite) testGetConcurrent(t TB, factory TestableVolumeFacto
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	v.PutRaw(TestHash, TestBlock)
-	v.PutRaw(TestHash2, TestBlock2)
-	v.PutRaw(TestHash3, TestBlock3)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash2, TestBlock2)
+	v.BlockWrite(context.Background(), TestHash3, TestBlock3)
 
 	sem := make(chan int)
 	go func() {
-		buf := make([]byte, BlockSize)
-		n, err := v.Get(context.Background(), TestHash, buf)
+		buf := bytes.NewBuffer(nil)
+		_, err := v.BlockRead(context.Background(), TestHash, buf)
 		if err != nil {
 			t.Errorf("err1: %v", err)
 		}
-		if bytes.Compare(buf[:n], TestBlock) != 0 {
-			t.Errorf("buf should be %s, is %s", string(TestBlock), string(buf[:n]))
+		if buf.String() != string(TestBlock) {
+			t.Errorf("buf should be %s, is %s", TestBlock, buf)
 		}
 		sem <- 1
 	}()
 
 	go func() {
-		buf := make([]byte, BlockSize)
-		n, err := v.Get(context.Background(), TestHash2, buf)
+		buf := bytes.NewBuffer(nil)
+		_, err := v.BlockRead(context.Background(), TestHash2, buf)
 		if err != nil {
 			t.Errorf("err2: %v", err)
 		}
-		if bytes.Compare(buf[:n], TestBlock2) != 0 {
-			t.Errorf("buf should be %s, is %s", string(TestBlock2), string(buf[:n]))
+		if buf.String() != string(TestBlock2) {
+			t.Errorf("buf should be %s, is %s", TestBlock2, buf)
 		}
 		sem <- 1
 	}()
 
 	go func() {
-		buf := make([]byte, BlockSize)
-		n, err := v.Get(context.Background(), TestHash3, buf)
+		buf := bytes.NewBuffer(nil)
+		_, err := v.BlockRead(context.Background(), TestHash3, buf)
 		if err != nil {
 			t.Errorf("err3: %v", err)
 		}
-		if bytes.Compare(buf[:n], TestBlock3) != 0 {
-			t.Errorf("buf should be %s, is %s", string(TestBlock3), string(buf[:n]))
+		if buf.String() != string(TestBlock3) {
+			t.Errorf("buf should be %s, is %s", TestBlock3, buf)
 		}
 		sem <- 1
 	}()
@@ -752,60 +594,38 @@ func (s *genericVolumeSuite) testPutConcurrent(t TB, factory TestableVolumeFacto
 	v := s.newVolume(t, factory)
 	defer v.Teardown()
 
-	sem := make(chan int)
-	go func(sem chan int) {
-		err := v.Put(context.Background(), TestHash, TestBlock)
+	blks := []struct {
+		hash string
+		data []byte
+	}{
+		{hash: TestHash, data: TestBlock},
+		{hash: TestHash2, data: TestBlock2},
+		{hash: TestHash3, data: TestBlock3},
+	}
+
+	var wg sync.WaitGroup
+	for _, blk := range blks {
+		blk := blk
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			err := v.BlockWrite(context.Background(), blk.hash, blk.data)
+			if err != nil {
+				t.Errorf("%s: %v", blk.hash, err)
+			}
+		}()
+	}
+	wg.Wait()
+
+	// Check that we actually wrote the blocks.
+	for _, blk := range blks {
+		buf := bytes.NewBuffer(nil)
+		_, err := v.BlockRead(context.Background(), blk.hash, buf)
 		if err != nil {
-			t.Errorf("err1: %v", err)
+			t.Errorf("get %s: %v", blk.hash, err)
+		} else if buf.String() != string(blk.data) {
+			t.Errorf("get %s: expected %s, got %s", blk.hash, blk.data, buf)
 		}
-		sem <- 1
-	}(sem)
-
-	go func(sem chan int) {
-		err := v.Put(context.Background(), TestHash2, TestBlock2)
-		if err != nil {
-			t.Errorf("err2: %v", err)
-		}
-		sem <- 1
-	}(sem)
-
-	go func(sem chan int) {
-		err := v.Put(context.Background(), TestHash3, TestBlock3)
-		if err != nil {
-			t.Errorf("err3: %v", err)
-		}
-		sem <- 1
-	}(sem)
-
-	// Wait for all goroutines to finish
-	for done := 0; done < 3; done++ {
-		<-sem
-	}
-
-	// Double check that we actually wrote the blocks we expected to write.
-	buf := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash, buf)
-	if err != nil {
-		t.Errorf("Get #1: %v", err)
-	}
-	if bytes.Compare(buf[:n], TestBlock) != 0 {
-		t.Errorf("Get #1: expected %s, got %s", string(TestBlock), string(buf[:n]))
-	}
-
-	n, err = v.Get(context.Background(), TestHash2, buf)
-	if err != nil {
-		t.Errorf("Get #2: %v", err)
-	}
-	if bytes.Compare(buf[:n], TestBlock2) != 0 {
-		t.Errorf("Get #2: expected %s, got %s", string(TestBlock2), string(buf[:n]))
-	}
-
-	n, err = v.Get(context.Background(), TestHash3, buf)
-	if err != nil {
-		t.Errorf("Get #3: %v", err)
-	}
-	if bytes.Compare(buf[:n], TestBlock3) != 0 {
-		t.Errorf("Get #3: expected %s, got %s", string(TestBlock3), string(buf[:n]))
 	}
 }
 
@@ -819,17 +639,18 @@ func (s *genericVolumeSuite) testPutFullBlock(t TB, factory TestableVolumeFactor
 	wdata[0] = 'a'
 	wdata[BlockSize-1] = 'z'
 	hash := fmt.Sprintf("%x", md5.Sum(wdata))
-	err := v.Put(context.Background(), hash, wdata)
+	err := v.BlockWrite(context.Background(), hash, wdata)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
-	buf := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), hash, buf)
+
+	buf := bytes.NewBuffer(nil)
+	_, err = v.BlockRead(context.Background(), hash, buf)
 	if err != nil {
 		t.Error(err)
 	}
-	if bytes.Compare(buf[:n], wdata) != 0 {
-		t.Error("buf %+q != wdata %+q", buf[:n], wdata)
+	if buf.String() != string(wdata) {
+		t.Error("buf %+q != wdata %+q", buf, wdata)
 	}
 }
 
@@ -844,48 +665,44 @@ func (s *genericVolumeSuite) testTrashUntrash(t TB, readonly bool, factory Testa
 	defer v.Teardown()
 
 	// put block and backdate it
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
-	buf := make([]byte, BlockSize)
-	n, err := v.Get(context.Background(), TestHash, buf)
+	buf := bytes.NewBuffer(nil)
+	_, err := v.BlockRead(context.Background(), TestHash, buf)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
-	if bytes.Compare(buf[:n], TestBlock) != 0 {
-		t.Errorf("Got data %+q, expected %+q", buf[:n], TestBlock)
+	if buf.String() != string(TestBlock) {
+		t.Errorf("Got data %+q, expected %+q", buf, TestBlock)
 	}
 
 	// Trash
-	err = v.Trash(TestHash)
-	if readonly {
-		if err != MethodDisabledError {
-			t.Fatal(err)
-		}
-	} else if err != nil {
-		if err != ErrNotImplemented {
-			t.Fatal(err)
-		}
-	} else {
-		_, err = v.Get(context.Background(), TestHash, buf)
-		if err == nil || !os.IsNotExist(err) {
-			t.Errorf("os.IsNotExist(%v) should have been true", err)
-		}
+	err = v.BlockTrash(TestHash)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	buf.Reset()
+	_, err = v.BlockRead(context.Background(), TestHash, buf)
+	if err == nil || !os.IsNotExist(err) {
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
+	}
 
-		// Untrash
-		err = v.Untrash(TestHash)
-		if err != nil {
-			t.Fatal(err)
-		}
+	// Untrash
+	err = v.BlockUntrash(TestHash)
+	if err != nil {
+		t.Error(err)
 	}
 
 	// Get the block - after trash and untrash sequence
-	n, err = v.Get(context.Background(), TestHash, buf)
+	buf.Reset()
+	_, err = v.BlockRead(context.Background(), TestHash, buf)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
-	if bytes.Compare(buf[:n], TestBlock) != 0 {
-		t.Errorf("Got data %+q, expected %+q", buf[:n], TestBlock)
+	if buf.String() != string(TestBlock) {
+		t.Errorf("Got data %+q, expected %+q", buf, TestBlock)
 	}
 }
 
@@ -895,13 +712,13 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 	defer v.Teardown()
 
 	checkGet := func() error {
-		buf := make([]byte, BlockSize)
-		n, err := v.Get(context.Background(), TestHash, buf)
+		buf := bytes.NewBuffer(nil)
+		_, err := v.BlockRead(context.Background(), TestHash, buf)
 		if err != nil {
 			return err
 		}
-		if bytes.Compare(buf[:n], TestBlock) != 0 {
-			t.Fatalf("Got data %+q, expected %+q", buf[:n], TestBlock)
+		if buf.String() != string(TestBlock) {
+			t.Errorf("Got data %+q, expected %+q", buf, TestBlock)
 		}
 
 		_, err = v.Mtime(TestHash)
@@ -909,13 +726,8 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 			return err
 		}
 
-		err = v.Compare(context.Background(), TestHash, TestBlock)
-		if err != nil {
-			return err
-		}
-
 		indexBuf := new(bytes.Buffer)
-		v.IndexTo("", indexBuf)
+		v.Index(context.Background(), "", indexBuf)
 		if !strings.Contains(string(indexBuf.Bytes()), TestHash) {
 			return os.ErrNotExist
 		}
@@ -927,50 +739,47 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 
 	s.cluster.Collections.BlobTrashLifetime.Set("1h")
 
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
 	err := checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Trash the block
-	err = v.Trash(TestHash)
-	if err == MethodDisabledError || err == ErrNotImplemented {
-		// Skip the trash tests for read-only volumes, and
-		// volume types that don't support
-		// BlobTrashLifetime>0.
-		return
+	err = v.BlockTrash(TestHash)
+	if err != nil {
+		t.Error(err)
 	}
 
 	err = checkGet()
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
-	err = v.Touch(TestHash)
+	err = v.BlockTouch(TestHash)
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	v.EmptyTrash()
 
 	// Even after emptying the trash, we can untrash our block
 	// because the deadline hasn't been reached.
-	err = v.Untrash(TestHash)
+	err = v.BlockUntrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	err = checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
-	err = v.Touch(TestHash)
+	err = v.BlockTouch(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Because we Touch'ed, need to backdate again for next set of tests
@@ -979,16 +788,16 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 	// If the only block in the trash has already been untrashed,
 	// most volumes will fail a subsequent Untrash with a 404, but
 	// it's also acceptable for Untrash to succeed.
-	err = v.Untrash(TestHash)
+	err = v.BlockUntrash(TestHash)
 	if err != nil && !os.IsNotExist(err) {
-		t.Fatalf("Expected success or os.IsNotExist(), but got: %v", err)
+		t.Errorf("Expected success or os.IsNotExist(), but got: %v", err)
 	}
 
 	// The additional Untrash should not interfere with our
 	// already-untrashed copy.
 	err = checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Untrash might have updated the timestamp, so backdate again
@@ -998,74 +807,74 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 
 	s.cluster.Collections.BlobTrashLifetime.Set("1ns")
 
-	err = v.Trash(TestHash)
+	err = v.BlockTrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 	err = checkGet()
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	// Even though 1ns has passed, we can untrash because we
 	// haven't called EmptyTrash yet.
-	err = v.Untrash(TestHash)
+	err = v.BlockUntrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 	err = checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Trash it again, and this time call EmptyTrash so it really
 	// goes away.
 	// (In Azure volumes, un/trash changes Mtime, so first backdate again)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
-	_ = v.Trash(TestHash)
+	_ = v.BlockTrash(TestHash)
 	err = checkGet()
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 	v.EmptyTrash()
 
 	// Untrash won't find it
-	err = v.Untrash(TestHash)
+	err = v.BlockUntrash(TestHash)
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	// Get block won't find it
 	err = checkGet()
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
 	// Third set: If the same data block gets written again after
 	// being trashed, and then the trash gets emptied, the newer
 	// un-trashed copy doesn't get deleted along with it.
 
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
 	s.cluster.Collections.BlobTrashLifetime.Set("1ns")
-	err = v.Trash(TestHash)
+	err = v.BlockTrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 	err = checkGet()
 	if err == nil || !os.IsNotExist(err) {
-		t.Fatalf("os.IsNotExist(%v) should have been true", err)
+		t.Errorf("os.IsNotExist(%v) should have been true", err)
 	}
 
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
 	// EmptyTrash should not delete the untrashed copy.
 	v.EmptyTrash()
 	err = checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// Fourth set: If the same data block gets trashed twice with
@@ -1073,33 +882,33 @@ func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVo
 	// at intermediate time B (A < B < C), it is still possible to
 	// untrash the block whose deadline is "C".
 
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
 	s.cluster.Collections.BlobTrashLifetime.Set("1ns")
-	err = v.Trash(TestHash)
+	err = v.BlockTrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
-	v.PutRaw(TestHash, TestBlock)
+	v.BlockWrite(context.Background(), TestHash, TestBlock)
 	v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
 	s.cluster.Collections.BlobTrashLifetime.Set("1h")
-	err = v.Trash(TestHash)
+	err = v.BlockTrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 
 	// EmptyTrash should not prevent us from recovering the
 	// time.Hour ("C") trash
 	v.EmptyTrash()
-	err = v.Untrash(TestHash)
+	err = v.BlockUntrash(TestHash)
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 	err = checkGet()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
 }
diff --git a/services/keepstore/volume_test.go b/services/keepstore/volume_test.go
index 950b3989aa..5a17b3a7dc 100644
--- a/services/keepstore/volume_test.go
+++ b/services/keepstore/volume_test.go
@@ -5,25 +5,12 @@
 package keepstore
 
 import (
-	"bytes"
-	"context"
-	"crypto/md5"
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"strings"
-	"sync"
 	"time"
-
-	"git.arvados.org/arvados.git/sdk/go/arvados"
-	"github.com/sirupsen/logrus"
 )
 
 var (
-	TestBlock       = []byte("The quick brown fox jumps over the lazy dog.")
-	TestHash        = "e4d909c290d0fb1ca068ffaddf22cbd0"
-	TestHashPutResp = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
+	TestBlock = []byte("The quick brown fox jumps over the lazy dog.")
+	TestHash  = "e4d909c290d0fb1ca068ffaddf22cbd0"
 
 	TestBlock2 = []byte("Pack my box with five dozen liquor jugs.")
 	TestHash2  = "f15ac516f788aec4f30932ffb6395c39"
@@ -31,10 +18,6 @@ var (
 	TestBlock3 = []byte("Now is the time for all good men to come to the aid of their country.")
 	TestHash3  = "eed29bbffbc2dbe5e5ee0bb71888e61f"
 
-	// BadBlock is used to test collisions and corruption.
-	// It must not match any test hashes.
-	BadBlock = []byte("The magic words are squeamish ossifrage.")
-
 	EmptyHash  = "d41d8cd98f00b204e9800998ecf8427e"
 	EmptyBlock = []byte("")
 )
@@ -43,230 +26,15 @@ var (
 // underlying Volume, in order to test behavior in cases that are
 // impractical to achieve with a sequence of normal Volume operations.
 type TestableVolume interface {
-	Volume
-
-	// [Over]write content for a locator with the given data,
-	// bypassing all constraints like readonly and serialize.
-	PutRaw(locator string, data []byte)
+	volume
 
 	// Returns the strings that a driver uses to record read/write operations.
 	ReadWriteOperationLabelValues() (r, w string)
 
 	// Specify the value Mtime() should return, until the next
-	// call to Touch, TouchWithDate, or Put.
-	TouchWithDate(locator string, lastPut time.Time)
+	// call to Touch, TouchWithDate, or BlockWrite.
+	TouchWithDate(locator string, lastBlockWrite time.Time)
 
 	// Clean up, delete temporary files.
 	Teardown()
 }
-
-func init() {
-	driver["mock"] = newMockVolume
-}
-
-// MockVolumes are test doubles for Volumes, used to test handlers.
-type MockVolume struct {
-	Store      map[string][]byte
-	Timestamps map[string]time.Time
-
-	// Bad volumes return an error for every operation.
-	Bad            bool
-	BadVolumeError error
-
-	// Touchable volumes' Touch() method succeeds for a locator
-	// that has been Put().
-	Touchable bool
-
-	// Gate is a "starting gate", allowing test cases to pause
-	// volume operations long enough to inspect state. Every
-	// operation (except Status) starts by receiving from
-	// Gate. Sending one value unblocks one operation; closing the
-	// channel unblocks all operations. By default, Gate is a
-	// closed channel, so all operations proceed without
-	// blocking. See trash_worker_test.go for an example.
-	Gate chan struct{} `json:"-"`
-
-	cluster *arvados.Cluster
-	volume  arvados.Volume
-	logger  logrus.FieldLogger
-	metrics *volumeMetricsVecs
-	called  map[string]int
-	mutex   sync.Mutex
-}
-
-// newMockVolume returns a non-Bad, non-Readonly, Touchable mock
-// volume.
-func newMockVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
-	gate := make(chan struct{})
-	close(gate)
-	return &MockVolume{
-		Store:      make(map[string][]byte),
-		Timestamps: make(map[string]time.Time),
-		Bad:        false,
-		Touchable:  true,
-		called:     map[string]int{},
-		Gate:       gate,
-		cluster:    cluster,
-		volume:     volume,
-		logger:     logger,
-		metrics:    metrics,
-	}, nil
-}
-
-// CallCount returns how many times the named method has been called.
-func (v *MockVolume) CallCount(method string) int {
-	v.mutex.Lock()
-	defer v.mutex.Unlock()
-	c, ok := v.called[method]
-	if !ok {
-		return 0
-	}
-	return c
-}
-
-func (v *MockVolume) gotCall(method string) {
-	v.mutex.Lock()
-	defer v.mutex.Unlock()
-	if _, ok := v.called[method]; !ok {
-		v.called[method] = 1
-	} else {
-		v.called[method]++
-	}
-}
-
-func (v *MockVolume) Compare(ctx context.Context, loc string, buf []byte) error {
-	v.gotCall("Compare")
-	<-v.Gate
-	if v.Bad {
-		return v.BadVolumeError
-	} else if block, ok := v.Store[loc]; ok {
-		if fmt.Sprintf("%x", md5.Sum(block)) != loc {
-			return DiskHashError
-		}
-		if bytes.Compare(buf, block) != 0 {
-			return CollisionError
-		}
-		return nil
-	} else {
-		return os.ErrNotExist
-	}
-}
-
-func (v *MockVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
-	v.gotCall("Get")
-	<-v.Gate
-	if v.Bad {
-		return 0, v.BadVolumeError
-	} else if block, ok := v.Store[loc]; ok {
-		copy(buf[:len(block)], block)
-		return len(block), nil
-	}
-	return 0, os.ErrNotExist
-}
-
-func (v *MockVolume) Put(ctx context.Context, loc string, block []byte) error {
-	v.gotCall("Put")
-	<-v.Gate
-	if v.Bad {
-		return v.BadVolumeError
-	}
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	v.Store[loc] = block
-	return v.Touch(loc)
-}
-
-func (v *MockVolume) Touch(loc string) error {
-	return v.TouchWithDate(loc, time.Now())
-}
-
-func (v *MockVolume) TouchWithDate(loc string, t time.Time) error {
-	v.gotCall("Touch")
-	<-v.Gate
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	if _, exists := v.Store[loc]; !exists {
-		return os.ErrNotExist
-	}
-	if v.Touchable {
-		v.Timestamps[loc] = t
-		return nil
-	}
-	return errors.New("Touch failed")
-}
-
-func (v *MockVolume) Mtime(loc string) (time.Time, error) {
-	v.gotCall("Mtime")
-	<-v.Gate
-	var mtime time.Time
-	var err error
-	if v.Bad {
-		err = v.BadVolumeError
-	} else if t, ok := v.Timestamps[loc]; ok {
-		mtime = t
-	} else {
-		err = os.ErrNotExist
-	}
-	return mtime, err
-}
-
-func (v *MockVolume) IndexTo(prefix string, w io.Writer) error {
-	v.gotCall("IndexTo")
-	<-v.Gate
-	for loc, block := range v.Store {
-		if !IsValidLocator(loc) || !strings.HasPrefix(loc, prefix) {
-			continue
-		}
-		_, err := fmt.Fprintf(w, "%s+%d %d\n",
-			loc, len(block), 123456789)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (v *MockVolume) Trash(loc string) error {
-	v.gotCall("Delete")
-	<-v.Gate
-	if v.volume.ReadOnly {
-		return MethodDisabledError
-	}
-	if _, ok := v.Store[loc]; ok {
-		if time.Since(v.Timestamps[loc]) < time.Duration(v.cluster.Collections.BlobSigningTTL) {
-			return nil
-		}
-		delete(v.Store, loc)
-		return nil
-	}
-	return os.ErrNotExist
-}
-
-func (v *MockVolume) GetDeviceID() string {
-	return "mock-device-id"
-}
-
-func (v *MockVolume) Untrash(loc string) error {
-	return nil
-}
-
-func (v *MockVolume) Status() *VolumeStatus {
-	var used uint64
-	for _, block := range v.Store {
-		used = used + uint64(len(block))
-	}
-	return &VolumeStatus{"/bogo", 123, 1000000 - used, used}
-}
-
-func (v *MockVolume) String() string {
-	return "[MockVolume]"
-}
-
-func (v *MockVolume) EmptyTrash() {
-}
-
-func (v *MockVolume) GetStorageClasses() []string {
-	return nil
-}
diff --git a/services/keepstore/work_queue.go b/services/keepstore/work_queue.go
deleted file mode 100644
index be3d118ff0..0000000000
--- a/services/keepstore/work_queue.go
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-/* A WorkQueue is an asynchronous thread-safe queue manager.  It
-   provides a channel from which items can be read off the queue, and
-   permits replacing the contents of the queue at any time.
-
-   The overall work flow for a WorkQueue is as follows:
-
-     1. A WorkQueue is created with NewWorkQueue().  This
-        function instantiates a new WorkQueue and starts a manager
-        goroutine.  The manager listens on an input channel
-        (manager.newlist) and an output channel (manager.NextItem).
-
-     2. The manager first waits for a new list of requests on the
-        newlist channel.  When another goroutine calls
-        manager.ReplaceQueue(lst), it sends lst over the newlist
-        channel to the manager.  The manager goroutine now has
-        ownership of the list.
-
-     3. Once the manager has this initial list, it listens on both the
-        input and output channels for one of the following to happen:
-
-          a. A worker attempts to read an item from the NextItem
-             channel.  The manager sends the next item from the list
-             over this channel to the worker, and loops.
-
-          b. New data is sent to the manager on the newlist channel.
-             This happens when another goroutine calls
-             manager.ReplaceItem() with a new list.  The manager
-             discards the current list, replaces it with the new one,
-             and begins looping again.
-
-          c. The input channel is closed.  The manager closes its
-             output channel (signalling any workers to quit) and
-             terminates.
-
-   Tasks currently handled by WorkQueue:
-     * the pull list
-     * the trash list
-
-   Example usage:
-
-        // Any kind of user-defined type can be used with the
-        // WorkQueue.
-		type FrobRequest struct {
-			frob string
-		}
-
-		// Make a work list.
-		froblist := NewWorkQueue()
-
-		// Start a concurrent worker to read items from the NextItem
-		// channel until it is closed, deleting each one.
-		go func(list WorkQueue) {
-			for i := range list.NextItem {
-				req := i.(FrobRequest)
-				frob.Run(req)
-			}
-		}(froblist)
-
-		// Set up a HTTP handler for PUT /frob
-		router.HandleFunc(`/frob`,
-			func(w http.ResponseWriter, req *http.Request) {
-				// Parse the request body into a list.List
-				// of FrobRequests, and give this list to the
-				// frob manager.
-				newfrobs := parseBody(req.Body)
-				froblist.ReplaceQueue(newfrobs)
-			}).Methods("PUT")
-
-   Methods available on a WorkQueue:
-
-		ReplaceQueue(list)
-			Replaces the current item list with a new one.  The list
-            manager discards any unprocessed items on the existing
-            list and replaces it with the new one. If the worker is
-            processing a list item when ReplaceQueue is called, it
-            finishes processing before receiving items from the new
-            list.
-		Close()
-			Shuts down the manager goroutine. When Close is called,
-			the manager closes the NextItem channel.
-*/
-
-import "container/list"
-
-// WorkQueue definition
-type WorkQueue struct {
-	getStatus chan WorkQueueStatus
-	newlist   chan *list.List
-	// Workers get work items by reading from this channel.
-	NextItem <-chan interface{}
-	// Each worker must send struct{}{} to DoneItem exactly once
-	// for each work item received from NextItem, when it stops
-	// working on that item (regardless of whether the work was
-	// successful).
-	DoneItem chan<- struct{}
-}
-
-// WorkQueueStatus reflects the queue status.
-type WorkQueueStatus struct {
-	InProgress int
-	Queued     int
-}
-
-// NewWorkQueue returns a new empty WorkQueue.
-func NewWorkQueue() *WorkQueue {
-	nextItem := make(chan interface{})
-	reportDone := make(chan struct{})
-	newList := make(chan *list.List)
-	b := WorkQueue{
-		getStatus: make(chan WorkQueueStatus),
-		newlist:   newList,
-		NextItem:  nextItem,
-		DoneItem:  reportDone,
-	}
-	go func() {
-		// Read new work lists from the newlist channel.
-		// Reply to "status" and "get next item" queries by
-		// sending to the getStatus and nextItem channels
-		// respectively. Return when the newlist channel
-		// closes.
-
-		todo := &list.List{}
-		status := WorkQueueStatus{}
-
-		// When we're done, close the output channel; workers will
-		// shut down next time they ask for new work.
-		defer close(nextItem)
-		defer close(b.getStatus)
-
-		// nextChan and nextVal are both nil when we have
-		// nothing to send; otherwise they are, respectively,
-		// the nextItem channel and the next work item to send
-		// to it.
-		var nextChan chan interface{}
-		var nextVal interface{}
-
-		for newList != nil || status.InProgress > 0 {
-			select {
-			case p, ok := <-newList:
-				if !ok {
-					// Closed, stop receiving
-					newList = nil
-				}
-				todo = p
-				if todo == nil {
-					todo = &list.List{}
-				}
-				status.Queued = todo.Len()
-				if status.Queued == 0 {
-					// Stop sending work
-					nextChan = nil
-					nextVal = nil
-				} else {
-					nextChan = nextItem
-					nextVal = todo.Front().Value
-				}
-			case nextChan <- nextVal:
-				todo.Remove(todo.Front())
-				status.InProgress++
-				status.Queued--
-				if status.Queued == 0 {
-					// Stop sending work
-					nextChan = nil
-					nextVal = nil
-				} else {
-					nextVal = todo.Front().Value
-				}
-			case <-reportDone:
-				status.InProgress--
-			case b.getStatus <- status:
-			}
-		}
-	}()
-	return &b
-}
-
-// ReplaceQueue abandons any work items left in the existing queue,
-// and starts giving workers items from the given list. After giving
-// it to ReplaceQueue, the caller must not read or write the given
-// list.
-func (b *WorkQueue) ReplaceQueue(list *list.List) {
-	b.newlist <- list
-}
-
-// Close shuts down the manager and terminates the goroutine, which
-// abandons any pending requests, but allows any pull request already
-// in progress to continue.
-//
-// After Close, Status will return correct values, NextItem will be
-// closed, and ReplaceQueue will panic.
-func (b *WorkQueue) Close() {
-	close(b.newlist)
-}
-
-// Status returns an up-to-date WorkQueueStatus reflecting the current
-// queue status.
-func (b *WorkQueue) Status() WorkQueueStatus {
-	// If the channel is closed, we get the nil value of
-	// WorkQueueStatus, which is an accurate description of a
-	// finished queue.
-	return <-b.getStatus
-}
diff --git a/services/keepstore/work_queue_test.go b/services/keepstore/work_queue_test.go
deleted file mode 100644
index 254f96cb2d..0000000000
--- a/services/keepstore/work_queue_test.go
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package keepstore
-
-import (
-	"container/list"
-	"runtime"
-	"testing"
-	"time"
-)
-
-type fatalfer interface {
-	Fatalf(string, ...interface{})
-}
-
-func makeTestWorkList(ary []interface{}) *list.List {
-	l := list.New()
-	for _, n := range ary {
-		l.PushBack(n)
-	}
-	return l
-}
-
-func expectChannelEmpty(t fatalfer, c <-chan interface{}) {
-	select {
-	case item, ok := <-c:
-		if ok {
-			t.Fatalf("Received value (%+v) from channel that we expected to be empty", item)
-		}
-	default:
-	}
-}
-
-func expectChannelNotEmpty(t fatalfer, c <-chan interface{}) interface{} {
-	select {
-	case item, ok := <-c:
-		if !ok {
-			t.Fatalf("expected data on a closed channel")
-		}
-		return item
-	case <-time.After(time.Second):
-		t.Fatalf("expected data on an empty channel")
-		return nil
-	}
-}
-
-func expectChannelClosedWithin(t fatalfer, timeout time.Duration, c <-chan interface{}) {
-	select {
-	case received, ok := <-c:
-		if ok {
-			t.Fatalf("Expected channel to be closed, but received %+v instead", received)
-		}
-	case <-time.After(timeout):
-		t.Fatalf("Expected channel to be closed, but it is still open after %v", timeout)
-	}
-}
-
-func doWorkItems(t fatalfer, q *WorkQueue, expected []interface{}) {
-	for i := range expected {
-		actual, ok := <-q.NextItem
-		if !ok {
-			t.Fatalf("Expected %+v but channel was closed after receiving %+v as expected.", expected, expected[:i])
-		}
-		q.DoneItem <- struct{}{}
-		if actual.(int) != expected[i] {
-			t.Fatalf("Expected %+v but received %+v after receiving %+v as expected.", expected[i], actual, expected[:i])
-		}
-	}
-}
-
-func expectEqualWithin(t fatalfer, timeout time.Duration, expect interface{}, f func() interface{}) {
-	ok := make(chan struct{})
-	giveup := false
-	go func() {
-		for f() != expect && !giveup {
-			time.Sleep(time.Millisecond)
-		}
-		close(ok)
-	}()
-	select {
-	case <-ok:
-	case <-time.After(timeout):
-		giveup = true
-		_, file, line, _ := runtime.Caller(1)
-		t.Fatalf("Still getting %+v, timed out waiting for %+v\n%s:%d", f(), expect, file, line)
-	}
-}
-
-func expectQueued(t fatalfer, b *WorkQueue, expectQueued int) {
-	if l := b.Status().Queued; l != expectQueued {
-		t.Fatalf("Got Queued==%d, expected %d", l, expectQueued)
-	}
-}
-
-func TestWorkQueueDoneness(t *testing.T) {
-	b := NewWorkQueue()
-	defer b.Close()
-	b.ReplaceQueue(makeTestWorkList([]interface{}{1, 2, 3}))
-	expectQueued(t, b, 3)
-	gate := make(chan struct{})
-	go func() {
-		<-gate
-		for range b.NextItem {
-			<-gate
-			time.Sleep(time.Millisecond)
-			b.DoneItem <- struct{}{}
-		}
-	}()
-	expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
-	b.ReplaceQueue(makeTestWorkList([]interface{}{4, 5, 6}))
-	for i := 1; i <= 3; i++ {
-		gate <- struct{}{}
-		expectEqualWithin(t, time.Second, 3-i, func() interface{} { return b.Status().Queued })
-		expectEqualWithin(t, time.Second, 1, func() interface{} { return b.Status().InProgress })
-	}
-	close(gate)
-	expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
-	expectChannelEmpty(t, b.NextItem)
-}
-
-// Create a WorkQueue, generate a list for it, and instantiate a worker.
-func TestWorkQueueReadWrite(t *testing.T) {
-	var input = []interface{}{1, 1, 2, 3, 5, 8, 13, 21, 34}
-
-	b := NewWorkQueue()
-	expectQueued(t, b, 0)
-
-	b.ReplaceQueue(makeTestWorkList(input))
-	expectQueued(t, b, len(input))
-
-	doWorkItems(t, b, input)
-	expectChannelEmpty(t, b.NextItem)
-	b.Close()
-}
-
-// Start a worker before the list has any input.
-func TestWorkQueueEarlyRead(t *testing.T) {
-	var input = []interface{}{1, 1, 2, 3, 5, 8, 13, 21, 34}
-
-	b := NewWorkQueue()
-	defer b.Close()
-
-	// First, demonstrate that nothing is available on the NextItem
-	// channel.
-	expectChannelEmpty(t, b.NextItem)
-
-	// Start a reader in a goroutine. The reader will block until the
-	// block work list has been initialized.
-	//
-	done := make(chan int)
-	go func() {
-		doWorkItems(t, b, input)
-		done <- 1
-	}()
-
-	// Feed the blocklist a new worklist, and wait for the worker to
-	// finish.
-	b.ReplaceQueue(makeTestWorkList(input))
-	<-done
-	expectQueued(t, b, 0)
-}
-
-// After Close(), NextItem closes, work finishes, then stats return zero.
-func TestWorkQueueClose(t *testing.T) {
-	b := NewWorkQueue()
-	input := []interface{}{1, 2, 3, 4, 5, 6, 7, 8}
-	mark := make(chan struct{})
-	go func() {
-		<-b.NextItem
-		mark <- struct{}{}
-		<-mark
-		b.DoneItem <- struct{}{}
-	}()
-	b.ReplaceQueue(makeTestWorkList(input))
-	// Wait for worker to take item 1
-	<-mark
-	b.Close()
-	expectEqualWithin(t, time.Second, 1, func() interface{} { return b.Status().InProgress })
-	// Tell worker to report done
-	mark <- struct{}{}
-	expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
-	expectChannelClosedWithin(t, time.Second, b.NextItem)
-}
-
-// Show that a reader may block when the manager's list is exhausted,
-// and that the reader resumes automatically when new data is
-// available.
-func TestWorkQueueReaderBlocks(t *testing.T) {
-	var (
-		inputBeforeBlock = []interface{}{1, 2, 3, 4, 5}
-		inputAfterBlock  = []interface{}{6, 7, 8, 9, 10}
-	)
-
-	b := NewWorkQueue()
-	defer b.Close()
-	sendmore := make(chan int)
-	done := make(chan int)
-	go func() {
-		doWorkItems(t, b, inputBeforeBlock)
-
-		// Confirm that the channel is empty, so a subsequent read
-		// on it will block.
-		expectChannelEmpty(t, b.NextItem)
-
-		// Signal that we're ready for more input.
-		sendmore <- 1
-		doWorkItems(t, b, inputAfterBlock)
-		done <- 1
-	}()
-
-	// Write a slice of the first five elements and wait for the
-	// reader to signal that it's ready for us to send more input.
-	b.ReplaceQueue(makeTestWorkList(inputBeforeBlock))
-	<-sendmore
-
-	b.ReplaceQueue(makeTestWorkList(inputAfterBlock))
-
-	// Wait for the reader to complete.
-	<-done
-}
-
-// Replace one active work list with another.
-func TestWorkQueueReplaceQueue(t *testing.T) {
-	var firstInput = []interface{}{1, 1, 2, 3, 5, 8, 13, 21, 34}
-	var replaceInput = []interface{}{1, 4, 9, 16, 25, 36, 49, 64, 81}
-
-	b := NewWorkQueue()
-	b.ReplaceQueue(makeTestWorkList(firstInput))
-
-	// Read just the first five elements from the work list.
-	// Confirm that the channel is not empty.
-	doWorkItems(t, b, firstInput[0:5])
-	expectChannelNotEmpty(t, b.NextItem)
-
-	// Replace the work list and read five more elements.
-	// The old list should have been discarded and all new
-	// elements come from the new list.
-	b.ReplaceQueue(makeTestWorkList(replaceInput))
-	doWorkItems(t, b, replaceInput[0:5])
-
-	b.Close()
-}
diff --git a/tools/keep-block-check/keep-block-check_test.go b/tools/keep-block-check/keep-block-check_test.go
index 4dcb47a8da..5bd7136eaa 100644
--- a/tools/keep-block-check/keep-block-check_test.go
+++ b/tools/keep-block-check/keep-block-check_test.go
@@ -48,6 +48,7 @@ func (s *ServerRequiredSuite) TearDownSuite(c *C) {
 }
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
+	logBuffer.Reset()
 	logOutput := io.MultiWriter(&logBuffer)
 	log.SetOutput(logOutput)
 }
@@ -55,7 +56,7 @@ func (s *ServerRequiredSuite) SetUpTest(c *C) {
 func (s *ServerRequiredSuite) TearDownTest(c *C) {
 	arvadostest.StopKeep(2)
 	log.SetOutput(os.Stdout)
-	log.Printf("%v", logBuffer.String())
+	c.Log(logBuffer.String())
 }
 
 func (s *DoMainTestSuite) SetUpSuite(c *C) {
@@ -226,7 +227,9 @@ func (s *ServerRequiredSuite) TestBlockCheck_BadSignature(c *C) {
 	setupTestData(c)
 	err := performKeepBlockCheck(kc, blobSignatureTTL, "badblobsigningkey", []string{TestHash, TestHash2}, false)
 	c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 2 blocks with matching prefix")
-	checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "HTTP 403")
+	// older versions of keepstore return 403 Forbidden for
+	// invalid signatures, newer versions return 400 Bad Request.
+	checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "HTTP 40[03]")
 	// verbose logging not requested
 	c.Assert(strings.Contains(logBuffer.String(), "Verifying block 1 of 2"), Equals, false)
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list