[ARVADOS] created: 1.3.0-1659-g915d1913a

Git user git at public.curoverse.com
Thu Sep 26 18:38:44 UTC 2019


        at  915d1913a691d980c3362d14ad50d6f6dc3ff9af (commit)


commit 915d1913a691d980c3362d14ad50d6f6dc3ff9af
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Thu Sep 26 11:22:18 2019 -0400

    14714: Sets the token on the arvados client
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go
index 92ed644b5..779614df0 100644
--- a/services/keep-balance/main.go
+++ b/services/keep-balance/main.go
@@ -26,13 +26,14 @@ var (
 	options RunOptions
 )
 
-func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string) service.Handler {
+func newHandler(ctx context.Context, cluster *arvados.Cluster, token string) service.Handler {
 	if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) {
 		return service.ErrorHandler(ctx, cluster, fmt.Errorf("You must either run keep-balance with the -once flag, or set Collections.BalancePeriod in the config. "+
 			"If using the legacy keep-balance.yml config, RunPeriod is the equivalant of Collections.BalancePeriod."))
 	}
 
 	ac, err := arvados.NewClientFromConfig(cluster)
+	ac.AuthToken = token
 	if err != nil {
 		return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
 	}

commit ae03057c03ca6b8543c93a4b3938ba06dad41571
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Thu Sep 26 10:22:12 2019 -0400

    14714: Removes context from Server functions. Adds sync.Once to setup
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index 1478e6e2e..3ef3b0007 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -5,7 +5,6 @@
 package main
 
 import (
-	"context"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -319,7 +318,7 @@ func (s *runSuite) newServer(options *RunOptions) *Server {
 		Logger:     options.Logger,
 		Dumper:     options.Dumper,
 	}
-	srv.init(context.Background())
+	srv.setup()
 	return srv
 }
 
diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go
index 606fde498..92ed644b5 100644
--- a/services/keep-balance/main.go
+++ b/services/keep-balance/main.go
@@ -41,14 +41,20 @@ func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string) service
 		debugf = log.Printf
 	}
 
+	if options.Logger == nil {
+		options.Logger = ctxlog.FromContext(ctx)
+	}
+
 	srv := &Server{
 		Cluster:    cluster,
 		ArvClient:  ac,
 		RunOptions: options,
 		Metrics:    newMetrics(),
+		Logger:     options.Logger,
+		Dumper:     options.Dumper,
 	}
 
-	go srv.Start(ctx)
+	srv.Start()
 	return srv
 }
 
diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go
index 23e597c89..3e665a30d 100644
--- a/services/keep-balance/server.go
+++ b/services/keep-balance/server.go
@@ -5,16 +5,15 @@
 package main
 
 import (
-	"context"
 	"net/http"
 	"os"
 	"os/signal"
+	"sync"
 	"syscall"
 	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
-	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"github.com/julienschmidt/httprouter"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/sirupsen/logrus"
@@ -48,6 +47,7 @@ type Server struct {
 	Metrics    *metrics
 
 	httpHandler http.Handler
+	setupOnce   sync.Once
 
 	Logger logrus.FieldLogger
 	Dumper logrus.FieldLogger
@@ -64,28 +64,12 @@ func (srv *Server) CheckHealth() error {
 }
 
 // Start sets up and runs the balancer.
-func (srv *Server) Start(ctx context.Context) {
-	srv.init(ctx)
-
-	var err error
-	if srv.RunOptions.Once {
-		_, err = srv.runOnce()
-	} else {
-		err = srv.runForever(nil)
-	}
-	if err != nil {
-		srv.Logger.Error(err)
-	}
+func (srv *Server) Start() {
+	srv.setupOnce.Do(srv.setup)
+	go srv.run()
 }
 
-func (srv *Server) init(ctx context.Context) {
-	if srv.RunOptions.Logger == nil {
-		srv.RunOptions.Logger = ctxlog.FromContext(ctx)
-	}
-
-	srv.Logger = srv.RunOptions.Logger
-	srv.Dumper = srv.RunOptions.Dumper
-
+func (srv *Server) setup() {
 	if srv.Cluster.ManagementToken == "" {
 		srv.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
@@ -101,6 +85,18 @@ func (srv *Server) init(ctx context.Context) {
 	}
 }
 
+func (srv *Server) run() {
+	var err error
+	if srv.RunOptions.Once {
+		_, err = srv.runOnce()
+	} else {
+		err = srv.runForever(nil)
+	}
+	if err != nil {
+		srv.Logger.Error(err)
+	}
+}
+
 func (srv *Server) runOnce() (*Balancer, error) {
 	bal := &Balancer{
 		Logger:         srv.Logger,

commit 0020a0bc96ee13203fbdc2af28ffa077799213d0
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Wed Sep 25 13:51:07 2019 -0400

    14714: Tests use cluster config
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index db530bc49..1478e6e2e 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -16,7 +17,9 @@ import (
 	"sync"
 	"time"
 
+	"git.curoverse.com/arvados.git/lib/config"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
 	"github.com/sirupsen/logrus"
 	check "gopkg.in/check.v1"
 )
@@ -303,7 +306,21 @@ func (s *stubServer) serveKeepstorePull() *reqTracker {
 
 type runSuite struct {
 	stub   stubServer
-	config Config
+	config *arvados.Cluster
+	client *arvados.Client
+}
+
+func (s *runSuite) newServer(options *RunOptions) *Server {
+	srv := &Server{
+		Cluster:    s.config,
+		ArvClient:  s.client,
+		RunOptions: *options,
+		Metrics:    newMetrics(),
+		Logger:     options.Logger,
+		Dumper:     options.Dumper,
+	}
+	srv.init(context.Background())
+	return srv
 }
 
 // make a log.Logger that writes to the current test's c.Log().
@@ -330,14 +347,19 @@ func (s *runSuite) logger(c *check.C) *logrus.Logger {
 }
 
 func (s *runSuite) SetUpTest(c *check.C) {
-	s.config = Config{
-		Client: arvados.Client{
-			AuthToken: "xyzzy",
-			APIHost:   "zzzzz.arvadosapi.com",
-			Client:    s.stub.Start()},
-		KeepServiceTypes: []string{"disk"},
-		RunPeriod:        arvados.Duration(time.Second),
-	}
+	cfg, err := config.NewLoader(nil, nil).Load()
+	c.Assert(err, check.Equals, nil)
+	s.config, err = cfg.GetCluster("")
+	c.Assert(err, check.Equals, nil)
+
+	s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
+	arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
+
+	s.client = &arvados.Client{
+		AuthToken: "xyzzy",
+		APIHost:   "zzzzz.arvadosapi.com",
+		Client:    s.stub.Start()}
+
 	s.stub.serveDiscoveryDoc()
 	s.stub.logf = c.Logf
 }
@@ -359,35 +381,13 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
 	s.stub.serveKeepstoreIndexFoo4Bar1()
 	trashReqs := s.stub.serveKeepstoreTrash()
 	pullReqs := s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	_, err = srv.Run()
+	srv := s.newServer(&opts)
+	_, err := srv.runOnce()
 	c.Check(err, check.ErrorMatches, "received zero collections")
 	c.Check(trashReqs.Count(), check.Equals, 4)
 	c.Check(pullReqs.Count(), check.Equals, 0)
 }
 
-func (s *runSuite) TestServiceTypes(c *check.C) {
-	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      s.logger(c),
-	}
-	s.config.KeepServiceTypes = []string{"unlisted-type"}
-	s.stub.serveCurrentUserAdmin()
-	s.stub.serveFooBarFileCollections()
-	s.stub.serveKeepServices(stubServices)
-	s.stub.serveKeepstoreMounts()
-	indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
-	trashReqs := s.stub.serveKeepstoreTrash()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	_, err = srv.Run()
-	c.Check(err, check.IsNil)
-	c.Check(indexReqs.Count(), check.Equals, 0)
-	c.Check(trashReqs.Count(), check.Equals, 0)
-}
-
 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
 	opts := RunOptions{
 		CommitPulls: true,
@@ -400,9 +400,8 @@ func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
 	s.stub.serveKeepstoreMounts()
 	trashReqs := s.stub.serveKeepstoreTrash()
 	pullReqs := s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	_, err = srv.Run()
+	srv := s.newServer(&opts)
+	_, err := srv.runOnce()
 	c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
 	c.Check(trashReqs.Count(), check.Equals, 0)
 	c.Check(pullReqs.Count(), check.Equals, 0)
@@ -421,9 +420,8 @@ func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
 	s.stub.serveKeepstoreIndexFoo4Bar1()
 	trashReqs := s.stub.serveKeepstoreTrash()
 	pullReqs := s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	_, err = srv.Run()
+	srv := s.newServer(&opts)
+	_, err := srv.runOnce()
 	c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
 	c.Check(trashReqs.Count(), check.Equals, 4)
 	c.Check(pullReqs.Count(), check.Equals, 0)
@@ -432,7 +430,7 @@ func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
 func (s *runSuite) TestWriteLostBlocks(c *check.C) {
 	lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
 	c.Assert(err, check.IsNil)
-	s.config.LostBlocksFile = lostf.Name()
+	s.config.Collections.BlobMissingReport = lostf.Name()
 	defer os.Remove(lostf.Name())
 	opts := RunOptions{
 		CommitPulls: true,
@@ -446,9 +444,9 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
 	s.stub.serveKeepstoreIndexFoo1()
 	s.stub.serveKeepstoreTrash()
 	s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
+	srv := s.newServer(&opts)
 	c.Assert(err, check.IsNil)
-	_, err = srv.Run()
+	_, err = srv.runOnce()
 	c.Check(err, check.IsNil)
 	lost, err := ioutil.ReadFile(lostf.Name())
 	c.Assert(err, check.IsNil)
@@ -468,9 +466,8 @@ func (s *runSuite) TestDryRun(c *check.C) {
 	s.stub.serveKeepstoreIndexFoo4Bar1()
 	trashReqs := s.stub.serveKeepstoreTrash()
 	pullReqs := s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	bal, err := srv.Run()
+	srv := s.newServer(&opts)
+	bal, err := srv.runOnce()
 	c.Check(err, check.IsNil)
 	for _, req := range collReqs.reqs {
 		c.Check(req.Form.Get("include_trash"), check.Equals, "true")
@@ -486,10 +483,9 @@ func (s *runSuite) TestDryRun(c *check.C) {
 func (s *runSuite) TestCommit(c *check.C) {
 	lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
 	c.Assert(err, check.IsNil)
-	s.config.LostBlocksFile = lostf.Name()
+	s.config.Collections.BlobMissingReport = lostf.Name()
 	defer os.Remove(lostf.Name())
 
-	s.config.Listen = ":"
 	s.config.ManagementToken = "xyzzy"
 	opts := RunOptions{
 		CommitPulls: true,
@@ -504,9 +500,8 @@ func (s *runSuite) TestCommit(c *check.C) {
 	s.stub.serveKeepstoreIndexFoo4Bar1()
 	trashReqs := s.stub.serveKeepstoreTrash()
 	pullReqs := s.stub.serveKeepstorePull()
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
-	bal, err := srv.Run()
+	srv := s.newServer(&opts)
+	bal, err := srv.runOnce()
 	c.Check(err, check.IsNil)
 	c.Check(trashReqs.Count(), check.Equals, 8)
 	c.Check(pullReqs.Count(), check.Equals, 4)
@@ -529,7 +524,6 @@ func (s *runSuite) TestCommit(c *check.C) {
 }
 
 func (s *runSuite) TestRunForever(c *check.C) {
-	s.config.Listen = ":"
 	s.config.ManagementToken = "xyzzy"
 	opts := RunOptions{
 		CommitPulls: true,
@@ -546,13 +540,12 @@ func (s *runSuite) TestRunForever(c *check.C) {
 	pullReqs := s.stub.serveKeepstorePull()
 
 	stop := make(chan interface{})
-	s.config.RunPeriod = arvados.Duration(time.Millisecond)
-	srv, err := NewServer(s.config, opts)
-	c.Assert(err, check.IsNil)
+	s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
+	srv := s.newServer(&opts)
 
 	done := make(chan bool)
 	go func() {
-		srv.RunForever(stop)
+		srv.runForever(stop)
 		close(done)
 	}()
 
@@ -571,13 +564,16 @@ func (s *runSuite) TestRunForever(c *check.C) {
 }
 
 func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
-	resp, err := http.Get("http://" + srv.listening + "/metrics")
-	c.Assert(err, check.IsNil)
-	c.Check(resp.StatusCode, check.Equals, http.StatusUnauthorized)
+	req := httptest.NewRequest("GET", "/metrics", nil)
+	resp := httptest.NewRecorder()
+	srv.ServeHTTP(resp, req)
+	c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
+
+	req = httptest.NewRequest("GET", "/metrics?api_token=xyzzy", nil)
+	resp = httptest.NewRecorder()
+	srv.ServeHTTP(resp, req)
+	c.Check(resp.Code, check.Equals, http.StatusOK)
 
-	resp, err = http.Get("http://" + srv.listening + "/metrics?api_token=xyzzy")
-	c.Assert(err, check.IsNil)
-	c.Check(resp.StatusCode, check.Equals, http.StatusOK)
 	buf, err := ioutil.ReadAll(resp.Body)
 	c.Check(err, check.IsNil)
 	return string(buf)
diff --git a/services/keep-balance/collection_test.go b/services/keep-balance/collection_test.go
index 6aaf07aba..a2200e1db 100644
--- a/services/keep-balance/collection_test.go
+++ b/services/keep-balance/collection_test.go
@@ -29,7 +29,7 @@ func (s *integrationSuite) TestIdenticalTimestamps(c *check.C) {
 			longestStreak := 0
 			var lastMod time.Time
 			sawUUID := make(map[string]bool)
-			err := EachCollection(&s.config.Client, pageSize, func(c arvados.Collection) error {
+			err := EachCollection(s.client, pageSize, func(c arvados.Collection) error {
 				if c.ModifiedAt == nil {
 					return nil
 				}
diff --git a/services/keep-balance/integration_test.go b/services/keep-balance/integration_test.go
index a79779c7d..b50b6caf5 100644
--- a/services/keep-balance/integration_test.go
+++ b/services/keep-balance/integration_test.go
@@ -11,6 +11,7 @@ import (
 	"testing"
 	"time"
 
+	"git.curoverse.com/arvados.git/lib/config"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
@@ -22,7 +23,8 @@ import (
 var _ = check.Suite(&integrationSuite{})
 
 type integrationSuite struct {
-	config     Config
+	config     *arvados.Cluster
+	client     *arvados.Client
 	keepClient *keepclient.KeepClient
 }
 
@@ -59,14 +61,16 @@ func (s *integrationSuite) TearDownSuite(c *check.C) {
 }
 
 func (s *integrationSuite) SetUpTest(c *check.C) {
-	s.config = Config{
-		Client: arvados.Client{
-			APIHost:   os.Getenv("ARVADOS_API_HOST"),
-			AuthToken: arvadostest.DataManagerToken,
-			Insecure:  true,
-		},
-		KeepServiceTypes: []string{"disk"},
-		RunPeriod:        arvados.Duration(time.Second),
+	cfg, err := config.NewLoader(nil, nil).Load()
+	c.Assert(err, check.Equals, nil)
+	s.config, err = cfg.GetCluster("")
+	c.Assert(err, check.Equals, nil)
+	s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
+
+	s.client = &arvados.Client{
+		APIHost:   os.Getenv("ARVADOS_API_HOST"),
+		AuthToken: arvadostest.DataManagerToken,
+		Insecure:  true,
 	}
 }
 
@@ -86,7 +90,7 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) {
 			Logger:  logger,
 			Metrics: newMetrics(),
 		}
-		nextOpts, err := bal.Run(s.config, opts)
+		nextOpts, err := bal.Run(s.client, s.config, opts)
 		c.Check(err, check.IsNil)
 		c.Check(nextOpts.SafeRendezvousState, check.Not(check.Equals), "")
 		c.Check(nextOpts.CommitPulls, check.Equals, true)
diff --git a/services/keep-balance/main_test.go b/services/keep-balance/main_test.go
deleted file mode 100644
index a2804344b..000000000
--- a/services/keep-balance/main_test.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-	"time"
-
-	"github.com/ghodss/yaml"
-	check "gopkg.in/check.v1"
-)
-
-var _ = check.Suite(&mainSuite{})
-
-type mainSuite struct{}
-
-func (s *mainSuite) TestExampleJSON(c *check.C) {
-	var config Config
-	c.Check(yaml.Unmarshal(exampleConfigFile, &config), check.IsNil)
-	c.Check(config.KeepServiceTypes, check.DeepEquals, []string{"disk"})
-	c.Check(config.Client.AuthToken, check.Equals, "xyzzy")
-	c.Check(time.Duration(config.RunPeriod), check.Equals, 600*time.Second)
-}
-
-func (s *mainSuite) TestConfigJSONWithKeepServiceList(c *check.C) {
-	var config Config
-	c.Check(yaml.Unmarshal([]byte(`{
-		    "Client": {
-			"APIHost": "zzzzz.arvadosapi.com:443",
-			"AuthToken": "xyzzy",
-			"Insecure": false
-		    },
-		    "KeepServiceList": {
-			"items": [
-			    {"uuid":"zzzzz-bi64l-abcdefghijklmno", "service_type":"disk", "service_host":"a.zzzzz.arvadosapi.com", "service_port":12345},
-			    {"uuid":"zzzzz-bi64l-bcdefghijklmnop", "service_type":"blob", "service_host":"b.zzzzz.arvadosapi.com", "service_port":12345}
-			]
-		    },
-		    "RunPeriod": "600s"
-		}`), &config), check.IsNil)
-	c.Assert(len(config.KeepServiceList.Items), check.Equals, 2)
-	c.Check(config.KeepServiceList.Items[0].UUID, check.Equals, "zzzzz-bi64l-abcdefghijklmno")
-	c.Check(config.KeepServiceList.Items[0].ServicePort, check.Equals, 12345)
-	c.Check(config.Client.AuthToken, check.Equals, "xyzzy")
-}
diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go
index 0f4bb7176..23e597c89 100644
--- a/services/keep-balance/server.go
+++ b/services/keep-balance/server.go
@@ -13,7 +13,10 @@ import (
 	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
+	"github.com/julienschmidt/httprouter"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/sirupsen/logrus"
 )
 
@@ -39,16 +42,22 @@ type RunOptions struct {
 }
 
 type Server struct {
-	http.Handler
 	Cluster    *arvados.Cluster
 	ArvClient  *arvados.Client
 	RunOptions RunOptions
 	Metrics    *metrics
 
+	httpHandler http.Handler
+
 	Logger logrus.FieldLogger
 	Dumper logrus.FieldLogger
 }
 
+// ServeHTTP implements service.Handler.
+func (srv *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	srv.httpHandler.ServeHTTP(w, r)
+}
+
 // CheckHealth implements service.Handler.
 func (srv *Server) CheckHealth() error {
 	return nil
@@ -56,16 +65,11 @@ func (srv *Server) CheckHealth() error {
 
 // Start sets up and runs the balancer.
 func (srv *Server) Start(ctx context.Context) {
-	if srv.RunOptions.Logger == nil {
-		srv.RunOptions.Logger = ctxlog.FromContext(ctx)
-	}
-
-	srv.Logger = srv.RunOptions.Logger
-	srv.Dumper = srv.RunOptions.Dumper
+	srv.init(ctx)
 
 	var err error
 	if srv.RunOptions.Once {
-		_, err = srv.run()
+		_, err = srv.runOnce()
 	} else {
 		err = srv.runForever(nil)
 	}
@@ -74,7 +78,30 @@ func (srv *Server) Start(ctx context.Context) {
 	}
 }
 
-func (srv *Server) run() (*Balancer, error) {
+func (srv *Server) init(ctx context.Context) {
+	if srv.RunOptions.Logger == nil {
+		srv.RunOptions.Logger = ctxlog.FromContext(ctx)
+	}
+
+	srv.Logger = srv.RunOptions.Logger
+	srv.Dumper = srv.RunOptions.Dumper
+
+	if srv.Cluster.ManagementToken == "" {
+		srv.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
+		})
+	} else {
+		mux := httprouter.New()
+		metricsH := promhttp.HandlerFor(srv.Metrics.reg, promhttp.HandlerOpts{
+			ErrorLog: srv.Logger,
+		})
+		mux.Handler("GET", "/metrics", metricsH)
+		mux.Handler("GET", "/metrics.json", metricsH)
+		srv.httpHandler = auth.RequireLiteralToken(srv.Cluster.ManagementToken, mux)
+	}
+}
+
+func (srv *Server) runOnce() (*Balancer, error) {
 	bal := &Balancer{
 		Logger:         srv.Logger,
 		Dumper:         srv.Dumper,
@@ -106,7 +133,7 @@ func (srv *Server) runForever(stop <-chan interface{}) error {
 			logger.Print("=======  Consider using -commit-pulls and -commit-trash flags.")
 		}
 
-		_, err := srv.run()
+		_, err := srv.runOnce()
 		if err != nil {
 			logger.Print("run failed: ", err)
 		} else {

commit da79c28c17eb5de3196ab49718b86c3f73db2380
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Tue Sep 24 09:58:40 2019 -0400

    14714: keep-balance uses cluster config
    
    - Removes dumpconfig flag.
    - Removes the options to specify a keep service list or type. Will now balance all keep services of type disk reported by the keep_services endpoint.
    - Debug flag removed. Uses SystemLogs.LogLevel from cluster config instead.
    - Reorganizes the Server struct to use lib/service to do generic service things.
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index 9f814a20d..a887c3c69 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -66,7 +66,7 @@ type Balancer struct {
 // Typical usage:
 //
 //   runOptions, err = (&Balancer{}).Run(config, runOptions)
-func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions RunOptions, err error) {
+func (bal *Balancer) Run(client *arvados.Client, cluster *arvados.Cluster, runOptions RunOptions) (nextRunOptions RunOptions, err error) {
 	nextRunOptions = runOptions
 
 	defer bal.time("sweep", "wall clock time to run one full sweep")()
@@ -95,24 +95,21 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
 		bal.lostBlocks = ioutil.Discard
 	}
 
-	if len(config.KeepServiceList.Items) > 0 {
-		err = bal.SetKeepServices(config.KeepServiceList)
-	} else {
-		err = bal.DiscoverKeepServices(&config.Client, config.KeepServiceTypes)
-	}
+	diskService := []string{"disk"}
+	err = bal.DiscoverKeepServices(client, diskService)
 	if err != nil {
 		return
 	}
 
 	for _, srv := range bal.KeepServices {
-		err = srv.discoverMounts(&config.Client)
+		err = srv.discoverMounts(client)
 		if err != nil {
 			return
 		}
 	}
 	bal.cleanupMounts()
 
-	if err = bal.CheckSanityEarly(&config.Client); err != nil {
+	if err = bal.CheckSanityEarly(client); err != nil {
 		return
 	}
 	rs := bal.rendezvousState()
@@ -121,7 +118,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
 			bal.logf("notice: KeepServices list has changed since last run")
 		}
 		bal.logf("clearing existing trash lists, in case the new rendezvous order differs from previous run")
-		if err = bal.ClearTrashLists(&config.Client); err != nil {
+		if err = bal.ClearTrashLists(client); err != nil {
 			return
 		}
 		// The current rendezvous state becomes "safe" (i.e.,
@@ -130,7 +127,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
 		// succeed in clearing existing trash lists.
 		nextRunOptions.SafeRendezvousState = rs
 	}
-	if err = bal.GetCurrentState(&config.Client, config.CollectionBatchSize, config.CollectionBuffers); err != nil {
+	if err = bal.GetCurrentState(client, cluster.Collections.BalanceCollectionBatch, cluster.Collections.BalanceCollectionBuffers); err != nil {
 		return
 	}
 	bal.ComputeChangeSets()
@@ -150,14 +147,14 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
 		lbFile = nil
 	}
 	if runOptions.CommitPulls {
-		err = bal.CommitPulls(&config.Client)
+		err = bal.CommitPulls(client)
 		if err != nil {
 			// Skip trash if we can't pull. (Too cautious?)
 			return
 		}
 	}
 	if runOptions.CommitTrash {
-		err = bal.CommitTrash(&config.Client)
+		err = bal.CommitTrash(client)
 	}
 	return
 }
diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go
index 84516a821..606fde498 100644
--- a/services/keep-balance/main.go
+++ b/services/keep-balance/main.go
@@ -5,97 +5,90 @@
 package main
 
 import (
-	"encoding/json"
+	"context"
 	"flag"
 	"fmt"
+	"io"
 	"log"
-	"net/http"
 	"os"
-	"time"
 
+	"git.curoverse.com/arvados.git/lib/config"
+	"git.curoverse.com/arvados.git/lib/service"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
-	"git.curoverse.com/arvados.git/sdk/go/config"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"github.com/sirupsen/logrus"
 )
 
-var debugf = func(string, ...interface{}) {}
+var (
+	version = "dev"
+	debugf  = func(string, ...interface{}) {}
+	command = service.Command(arvados.ServiceNameKeepbalance, newHandler)
+	options RunOptions
+)
 
-func main() {
-	var cfg Config
-	var runOptions RunOptions
-
-	configPath := flag.String("config", defaultConfigPath,
-		"`path` of JSON or YAML configuration file")
-	serviceListPath := flag.String("config.KeepServiceList", "",
-		"`path` of JSON or YAML file with list of keep services to balance, as given by \"arv keep_service list\" "+
-			"(default: config[\"KeepServiceList\"], or if none given, get all available services and filter by config[\"KeepServiceTypes\"])")
-	flag.BoolVar(&runOptions.Once, "once", false,
-		"balance once and then exit")
-	flag.BoolVar(&runOptions.CommitPulls, "commit-pulls", false,
-		"send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
-	flag.BoolVar(&runOptions.CommitTrash, "commit-trash", false,
-		"send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
-	dumpConfig := flag.Bool("dump-config", false, "write current configuration to stdout and exit")
-	dumpFlag := flag.Bool("dump", false, "dump details for each block to stdout")
-	debugFlag := flag.Bool("debug", false, "enable debug messages")
-	getVersion := flag.Bool("version", false, "Print version information and exit.")
-	flag.Usage = usage
-	flag.Parse()
-
-	// Print version information if requested
-	if *getVersion {
-		fmt.Printf("keep-balance %s\n", version)
-		return
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string) service.Handler {
+	if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) {
+		return service.ErrorHandler(ctx, cluster, fmt.Errorf("You must either run keep-balance with the -once flag, or set Collections.BalancePeriod in the config. "+
+			"If using the legacy keep-balance.yml config, RunPeriod is the equivalant of Collections.BalancePeriod."))
 	}
 
-	mustReadConfig(&cfg, *configPath)
-	if *serviceListPath != "" {
-		mustReadConfig(&cfg.KeepServiceList, *serviceListPath)
+	ac, err := arvados.NewClientFromConfig(cluster)
+	if err != nil {
+		return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
 	}
 
-	if *dumpConfig {
-		log.Fatal(config.DumpAndExit(cfg))
+	if cluster.SystemLogs.LogLevel == "debug" {
+		debugf = log.Printf
 	}
 
-	to := time.Duration(cfg.RequestTimeout)
-	if to == 0 {
-		to = 30 * time.Minute
+	srv := &Server{
+		Cluster:    cluster,
+		ArvClient:  ac,
+		RunOptions: options,
+		Metrics:    newMetrics(),
 	}
-	arvados.DefaultSecureClient.Timeout = to
-	arvados.InsecureHTTPClient.Timeout = to
-	http.DefaultClient.Timeout = to
 
-	log.Printf("keep-balance %s started", version)
+	go srv.Start(ctx)
+	return srv
+}
+
+func main() {
+	os.Exit(runCommand(os.Args[0], os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
+}
+
+func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+	logger := ctxlog.FromContext(context.Background())
+
+	flags := flag.NewFlagSet(prog, flag.ExitOnError)
+	flags.BoolVar(&options.Once, "once", false,
+		"balance once and then exit")
+	flags.BoolVar(&options.CommitPulls, "commit-pulls", false,
+		"send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
+	flags.BoolVar(&options.CommitTrash, "commit-trash", false,
+		"send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
+	flags.Bool("version", false, "Write version information to stdout and exit 0")
+	dumpFlag := flags.Bool("dump", false, "dump details for each block to stdout")
+
+	loader := config.NewLoader(os.Stdin, logger)
+	loader.SetupFlags(flags)
+
+	munged := loader.MungeLegacyConfigArgs(logger, args, "-legacy-keepbalance-config")
+	flags.Parse(munged)
 
-	if *debugFlag {
-		debugf = log.Printf
-		if j, err := json.Marshal(cfg); err != nil {
-			log.Fatal(err)
-		} else {
-			log.Printf("config is %s", j)
-		}
-	}
 	if *dumpFlag {
 		dumper := logrus.New()
 		dumper.Out = os.Stdout
 		dumper.Formatter = &logrus.TextFormatter{}
-		runOptions.Dumper = dumper
-	}
-	srv, err := NewServer(cfg, runOptions)
-	if err != nil {
-		// (don't run)
-	} else if runOptions.Once {
-		_, err = srv.Run()
-	} else {
-		err = srv.RunForever(nil)
-	}
-	if err != nil {
-		log.Fatal(err)
+		options.Dumper = dumper
 	}
-}
 
-func mustReadConfig(dst interface{}, path string) {
-	if err := config.LoadFile(dst, path); err != nil {
-		log.Fatal(err)
-	}
+	// Only pass along the version flag, which gets handled in RunCommand
+	args = nil
+	flags.Visit(func(f *flag.Flag) {
+		if f.Name == "version" {
+			args = append(args, "-"+f.Name, f.Value.String())
+		}
+	})
+
+	return command.RunCommand(prog, args, stdin, stdout, stderr)
 }
diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go
index e2f13a425..0f4bb7176 100644
--- a/services/keep-balance/server.go
+++ b/services/keep-balance/server.go
@@ -6,7 +6,6 @@ package main
 
 import (
 	"context"
-	"fmt"
 	"net/http"
 	"os"
 	"os/signal"
@@ -14,57 +13,10 @@ import (
 	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
-	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
-	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 	"github.com/sirupsen/logrus"
 )
 
-var version = "dev"
-
-const (
-	defaultConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
-	rfc3339NanoFixed  = "2006-01-02T15:04:05.000000000Z07:00"
-)
-
-// Config specifies site configuration, like API credentials and the
-// choice of which servers are to be balanced.
-//
-// Config is loaded from a JSON config file (see usage()).
-type Config struct {
-	// Arvados API endpoint and credentials.
-	Client arvados.Client
-
-	// List of service types (e.g., "disk") to balance.
-	KeepServiceTypes []string
-
-	KeepServiceList arvados.KeepServiceList
-
-	// address, address:port, or :port for management interface
-	Listen string
-
-	// token for management APIs
-	ManagementToken string
-
-	// How often to check
-	RunPeriod arvados.Duration
-
-	// Number of collections to request in each API call
-	CollectionBatchSize int
-
-	// Max collections to buffer in memory (bigger values consume
-	// more memory, but can reduce store-and-forward latency when
-	// fetching pages)
-	CollectionBuffers int
-
-	// Timeout for outgoing http request/response cycle.
-	RequestTimeout arvados.Duration
-
-	// Destination filename for the list of lost block hashes, one
-	// per line. Updated atomically during each successful run.
-	LostBlocksFile string
-}
-
 // RunOptions controls runtime behavior. The flags/options that belong
 // here are the ones that are useful for interactive use. For example,
 // "CommitTrash" is a runtime option rather than a config item because
@@ -87,100 +39,74 @@ type RunOptions struct {
 }
 
 type Server struct {
-	config     Config
-	runOptions RunOptions
-	metrics    *metrics
-	listening  string // for tests
+	http.Handler
+	Cluster    *arvados.Cluster
+	ArvClient  *arvados.Client
+	RunOptions RunOptions
+	Metrics    *metrics
 
 	Logger logrus.FieldLogger
 	Dumper logrus.FieldLogger
 }
 
-// NewServer returns a new Server that runs Balancers using the given
-// config and runOptions.
-func NewServer(config Config, runOptions RunOptions) (*Server, error) {
-	if len(config.KeepServiceList.Items) > 0 && config.KeepServiceTypes != nil {
-		return nil, fmt.Errorf("cannot specify both KeepServiceList and KeepServiceTypes in config")
-	}
-	if !runOptions.Once && config.RunPeriod == arvados.Duration(0) {
-		return nil, fmt.Errorf("you must either use the -once flag, or specify RunPeriod in config")
-	}
+// CheckHealth implements service.Handler.
+func (srv *Server) CheckHealth() error {
+	return nil
+}
 
-	if runOptions.Logger == nil {
-		log := logrus.New()
-		log.Formatter = &logrus.JSONFormatter{
-			TimestampFormat: rfc3339NanoFixed,
-		}
-		log.Out = os.Stderr
-		runOptions.Logger = log
+// Start sets up and runs the balancer.
+func (srv *Server) Start(ctx context.Context) {
+	if srv.RunOptions.Logger == nil {
+		srv.RunOptions.Logger = ctxlog.FromContext(ctx)
 	}
 
-	srv := &Server{
-		config:     config,
-		runOptions: runOptions,
-		metrics:    newMetrics(),
-		Logger:     runOptions.Logger,
-		Dumper:     runOptions.Dumper,
-	}
-	return srv, srv.start()
-}
+	srv.Logger = srv.RunOptions.Logger
+	srv.Dumper = srv.RunOptions.Dumper
 
-func (srv *Server) start() error {
-	if srv.config.Listen == "" {
-		return nil
-	}
-	ctx := ctxlog.Context(context.Background(), srv.Logger)
-	server := &httpserver.Server{
-		Server: http.Server{
-			Handler: httpserver.HandlerWithContext(ctx,
-				httpserver.LogRequests(
-					auth.RequireLiteralToken(srv.config.ManagementToken,
-						srv.metrics.Handler(srv.Logger)))),
-		},
-		Addr: srv.config.Listen,
+	var err error
+	if srv.RunOptions.Once {
+		_, err = srv.run()
+	} else {
+		err = srv.runForever(nil)
 	}
-	err := server.Start()
 	if err != nil {
-		return err
+		srv.Logger.Error(err)
 	}
-	srv.Logger.Printf("listening at %s", server.Addr)
-	srv.listening = server.Addr
-	return nil
 }
 
-func (srv *Server) Run() (*Balancer, error) {
+func (srv *Server) run() (*Balancer, error) {
 	bal := &Balancer{
 		Logger:         srv.Logger,
 		Dumper:         srv.Dumper,
-		Metrics:        srv.metrics,
-		LostBlocksFile: srv.config.LostBlocksFile,
+		Metrics:        srv.Metrics,
+		LostBlocksFile: srv.Cluster.Collections.BlobMissingReport,
 	}
 	var err error
-	srv.runOptions, err = bal.Run(srv.config, srv.runOptions)
+	srv.RunOptions, err = bal.Run(srv.ArvClient, srv.Cluster, srv.RunOptions)
 	return bal, err
 }
 
 // RunForever runs forever, or (for testing purposes) until the given
 // stop channel is ready to receive.
-func (srv *Server) RunForever(stop <-chan interface{}) error {
-	logger := srv.runOptions.Logger
+func (srv *Server) runForever(stop <-chan interface{}) error {
+	logger := srv.Logger
 
-	ticker := time.NewTicker(time.Duration(srv.config.RunPeriod))
+	ticker := time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
 
 	// The unbuffered channel here means we only hear SIGUSR1 if
 	// it arrives while we're waiting in select{}.
 	sigUSR1 := make(chan os.Signal)
 	signal.Notify(sigUSR1, syscall.SIGUSR1)
 
-	logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.config.RunPeriod)
+	logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod)
 
 	for {
-		if !srv.runOptions.CommitPulls && !srv.runOptions.CommitTrash {
+		if !srv.RunOptions.CommitPulls && !srv.RunOptions.CommitTrash {
 			logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
 			logger.Print("=======  Consider using -commit-pulls and -commit-trash flags.")
 		}
 
-		_, err := srv.Run()
+		_, err := srv.run()
 		if err != nil {
 			logger.Print("run failed: ", err)
 		} else {
@@ -199,7 +125,7 @@ func (srv *Server) RunForever(stop <-chan interface{}) error {
 			// run too soon after the Nth run is triggered
 			// by SIGUSR1.
 			ticker.Stop()
-			ticker = time.NewTicker(time.Duration(srv.config.RunPeriod))
+			ticker = time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
 		}
 		logger.Print("starting next run")
 	}
diff --git a/services/keep-balance/usage.go b/services/keep-balance/usage.go
deleted file mode 100644
index b39e83905..000000000
--- a/services/keep-balance/usage.go
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"os"
-)
-
-var exampleConfigFile = []byte(`
-Client:
-    APIHost: zzzzz.arvadosapi.com:443
-    AuthToken: xyzzy
-    Insecure: false
-KeepServiceTypes:
-    - disk
-Listen: ":9005"
-ManagementToken: xyzzy
-RunPeriod: 600s
-CollectionBatchSize: 100000
-CollectionBuffers: 1000
-RequestTimeout: 30m`)
-
-func usage() {
-	fmt.Fprintf(os.Stderr, `
-
-keep-balance rebalances a set of keepstore servers. It creates new
-copies of underreplicated blocks, deletes excess copies of
-overreplicated and unreferenced blocks, and moves blocks to better
-positions (according to the rendezvous hash algorithm) so clients find
-them faster.
-
-Usage: keep-balance [options]
-
-Options:
-`)
-	flag.PrintDefaults()
-	fmt.Fprintf(os.Stderr, `
-Example config file:
-%s
-
-    Client.AuthToken must be recognized by Arvados as an admin token,
-    and must be recognized by all Keep services as a "data manager
-    key".
-
-    Client.Insecure should be true if your Arvados API endpoint uses
-    an unverifiable SSL/TLS certificate.
-
-Periodic scanning:
-
-    By default, keep-balance operates periodically, i.e.: do a
-    scan/balance operation, sleep, repeat.
-
-    RunPeriod determines the interval between start times of
-    successive scan/balance operations. If a scan/balance operation
-    takes longer than RunPeriod, the next one will follow it
-    immediately.
-
-    If SIGUSR1 is received during an idle period between operations,
-    the next operation will start immediately.
-
-One-time scanning:
-
-    Use the -once flag to do a single operation and then exit. The
-    exit code will be zero if the operation was successful.
-
-Committing:
-
-    By default, keep-service computes and reports changes but does not
-    implement them by sending pull and trash lists to the Keep
-    services.
-
-    Use the -commit-pull and -commit-trash flags to implement the
-    computed changes.
-
-Tuning resource usage:
-
-    CollectionBatchSize limits the number of collections retrieved per
-    API transaction. If this is zero or omitted, page size is
-    determined by the API server's own page size limits (see
-    max_items_per_response and max_index_database_read configs).
-
-    CollectionBuffers sets the size of an internal queue of
-    collections. Higher values use more memory, and improve throughput
-    by allowing keep-balance to fetch the next page of collections
-    while the current page is still being processed. If this is zero
-    or omitted, pages are processed serially.
-
-    RequestTimeout is the maximum time keep-balance will spend on a
-    single HTTP request (getting a page of collections, getting the
-    block index from a keepstore server, or sending a trash or pull
-    list to a keepstore server). Defaults to 30 minutes.
-
-Limitations:
-
-    keep-balance does not attempt to discover whether committed pull
-    and trash requests ever get carried out -- only that they are
-    accepted by the Keep services. If some services are full, new
-    copies of underreplicated blocks might never get made, only
-    repeatedly requested.
-
-`, exampleConfigFile)
-}

commit 3b1947092b856e2c3bdb733828b1c951ac158b06
Author: Eric Biagiotti <ebiagiotti at veritasgenetics.com>
Date:   Fri Sep 6 14:39:39 2019 -0400

    14714: Adds keep-balance to cluster config loading
    
    Also adds a deprecated config loading test and fixes the service file
    
    Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti at veritasgenetics.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 572a2558e..4338c18ed 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -382,6 +382,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for the
+      # list of lost block hashes if there are any, one per line. Updated atomically during
+      # each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 100000
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index 0a030fb04..ba2c79acf 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -474,3 +474,71 @@ func (ldr *Loader) loadOldGitHttpdConfig(cfg *arvados.Config) error {
 	cfg.Clusters[cluster.ClusterID] = *cluster
 	return nil
 }
+
+const defaultKeepBalanceConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
+
+type oldKeepBalanceConfig struct {
+	Client              *arvados.Client
+	Listen              *string
+	KeepServiceTypes    *[]string
+	KeepServiceList     *arvados.KeepServiceList
+	RunPeriod           *arvados.Duration
+	CollectionBatchSize *int
+	CollectionBuffers   *int
+	RequestTimeout      *arvados.Duration
+	LostBlocksFile      *string
+	ManagementToken     *string
+}
+
+func (ldr *Loader) loadOldKeepBalanceConfig(cfg *arvados.Config) error {
+	if ldr.KeepBalancePath == "" {
+		return nil
+	}
+	var oc oldKeepBalanceConfig
+	err := ldr.loadOldConfigHelper("keep-balance", ldr.KeepBalancePath, &oc)
+	if os.IsNotExist(err) && ldr.KeepBalancePath == defaultKeepBalanceConfigPath {
+		return nil
+	} else if err != nil {
+		return err
+	}
+
+	cluster, err := cfg.GetCluster("")
+	if err != nil {
+		return err
+	}
+
+	loadOldClientConfig(cluster, oc.Client)
+
+	if oc.Listen != nil {
+		cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: *oc.Listen}] = arvados.ServiceInstance{}
+	}
+	if oc.ManagementToken != nil {
+		cluster.ManagementToken = *oc.ManagementToken
+	}
+	if oc.RunPeriod != nil {
+		cluster.Collections.BalancePeriod = *oc.RunPeriod
+	}
+	if oc.LostBlocksFile != nil {
+		cluster.Collections.BlobMissingReport = *oc.LostBlocksFile
+	}
+	if oc.CollectionBatchSize != nil {
+		cluster.Collections.BalanceCollectionBatch = *oc.CollectionBatchSize
+	}
+	if oc.CollectionBuffers != nil {
+		cluster.Collections.BalanceCollectionBuffers = *oc.CollectionBuffers
+	}
+	if oc.RequestTimeout != nil {
+		cluster.API.KeepServiceRequestTimeout = *oc.RequestTimeout
+	}
+
+	msg := "To balance specfic keep services, please update to the cluster config."
+	if oc.KeepServiceTypes != nil && len(*oc.KeepServiceTypes) > 0 {
+		ldr.Logger.Warnf("The KeepServiceType configuration option is not longer supported and is being ignored. %s", msg)
+	}
+	if oc.KeepServiceList != nil {
+		return fmt.Errorf("The KeepServiceList configuration option is no longer supported. Please remove it from your configuration file. %s", msg)
+	}
+
+	cfg.Clusters[cluster.ClusterID] = *cluster
+	return nil
+}
diff --git a/lib/config/deprecated_test.go b/lib/config/deprecated_test.go
index ea9b50d03..8b80d6275 100644
--- a/lib/config/deprecated_test.go
+++ b/lib/config/deprecated_test.go
@@ -216,3 +216,48 @@ func (s *LoadSuite) TestLegacyArvGitHttpdConfig(c *check.C) {
 	c.Check(cluster.Git.Repositories, check.Equals, "/test/reporoot")
 	c.Check(cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: ":9000"}], check.Equals, arvados.ServiceInstance{})
 }
+
+func (s *LoadSuite) TestLegacyKeepBalanceConfig(c *check.C) {
+	f := "-legacy-keepbalance-config"
+	content := []byte(fmtKeepBalanceConfig(""))
+	cluster, err := testLoadLegacyConfig(content, f, c)
+
+	c.Check(err, check.IsNil)
+	c.Check(cluster, check.NotNil)
+	c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+	c.Check(cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: ":80"}], check.Equals, arvados.ServiceInstance{})
+	c.Check(cluster.Collections.BalanceCollectionBuffers, check.Equals, 1000)
+	c.Check(cluster.Collections.BalanceCollectionBatch, check.Equals, 100000)
+	c.Check(cluster.Collections.BalancePeriod.String(), check.Equals, "10m")
+	c.Check(cluster.Collections.BlobMissingReport, check.Equals, "testfile")
+	c.Check(cluster.API.KeepServiceRequestTimeout.String(), check.Equals, "30m")
+
+	content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":["disk"],`))
+	_, err = testLoadLegacyConfig(content, f, c)
+	c.Check(err, check.IsNil)
+
+	content = []byte(fmtKeepBalanceConfig(`"KeepServiceList":{},`))
+	_, err = testLoadLegacyConfig(content, f, c)
+	c.Check(err, check.NotNil)
+}
+
+func fmtKeepBalanceConfig(param string) string {
+	return fmt.Sprintf(`
+{
+	"Client": {
+		"Scheme": "",
+		"APIHost": "example.com",
+		"AuthToken": "abcdefg",
+		"Insecure": false
+	},
+	"Listen": ":80",
+	%s
+	"RunPeriod": "10m",
+	"CollectionBatchSize": 100000,
+	"CollectionBuffers": 1000,
+	"RequestTimeout": "30m",
+	"ManagementToken": "xyzzy",
+	"LostBlocksFile": "testfile"
+}
+`, param)
+}
diff --git a/lib/config/export.go b/lib/config/export.go
index 8df561c00..5437836f6 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -99,6 +99,10 @@ var whitelist = map[string]bool{
 	"Collections.TrashSweepInterval":               false,
 	"Collections.TrustAllContent":                  false,
 	"Collections.WebDAVCache":                      false,
+	"Collections.BalanceCollectionBatch":           false,
+	"Collections.BalancePeriod":                    false,
+	"Collections.BlobMissingReport":                false,
+	"Collections.BalanceCollectionBuffers":         false,
 	"Containers":                                   true,
 	"Containers.CloudVMs":                          false,
 	"Containers.CrunchRunCommand":                  false,
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 32c101a5a..3806bbd8a 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -388,6 +388,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for the
+      # list of lost block hashes if there are any, one per line. Updated atomically during
+      # each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 100000
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
diff --git a/lib/config/load.go b/lib/config/load.go
index 93c36f69e..8c335f4c7 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -37,6 +37,7 @@ type Loader struct {
 	WebsocketPath           string
 	KeepproxyPath           string
 	GitHttpdPath            string
+	KeepBalancePath         string
 
 	configdata []byte
 }
@@ -69,6 +70,7 @@ func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
 	flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
 	flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
 	flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
+	flagset.StringVar(&ldr.KeepBalancePath, "legacy-keepbalance-config", defaultKeepBalanceConfigPath, "Legacy keep-balance configuration `file`")
 	flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
 }
 
@@ -149,6 +151,9 @@ func (ldr *Loader) MungeLegacyConfigArgs(lgr logrus.FieldLogger, args []string,
 	if legacyConfigArg != "-legacy-git-httpd-config" {
 		ldr.GitHttpdPath = ""
 	}
+	if legacyConfigArg != "-legacy-keepbalance-config" {
+		ldr.KeepBalancePath = ""
+	}
 
 	return munged
 }
@@ -251,6 +256,7 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
 			ldr.loadOldWebsocketConfig(&cfg),
 			ldr.loadOldKeepproxyConfig(&cfg),
 			ldr.loadOldGitHttpdConfig(&cfg),
+			ldr.loadOldKeepBalanceConfig(&cfg),
 		} {
 			if err != nil {
 				return nil, err
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 076a3c44d..7c1c35380 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -119,6 +119,11 @@ type Cluster struct {
 		TrashSweepInterval    Duration
 		TrustAllContent       bool
 
+		BlobMissingReport        string
+		BalancePeriod            Duration
+		BalanceCollectionBatch   int
+		BalanceCollectionBuffers int
+
 		WebDAVCache WebDAVCacheConfig
 	}
 	Git struct {
diff --git a/services/keep-balance/keep-balance.service b/services/keep-balance/keep-balance.service
index 563871607..1b71fb4e4 100644
--- a/services/keep-balance/keep-balance.service
+++ b/services/keep-balance/keep-balance.service
@@ -6,7 +6,6 @@
 Description=Arvados Keep Balance
 Documentation=https://doc.arvados.org/
 After=network.target
-AssertPathExists=/etc/arvados/keep-balance/keep-balance.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list