[arvados] created: 2.7.0-5349-g87c6651f8f

git repository hosting git at public.arvados.org
Wed Nov 15 15:00:24 UTC 2023


        at  87c6651f8f9c278e7202ed12386e818b53098bda (commit)


commit 87c6651f8f9c278e7202ed12386e818b53098bda
Author: Tom Clegg <tom at curii.com>
Date:   Wed Nov 15 10:00:07 2023 -0500

    21189: Replace -commit-X with Balance{Pull,Trash}Limit configs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/cmd/arvados-server/keep-balance.service b/cmd/arvados-server/keep-balance.service
index 6b16b2cfab..c603939bcf 100644
--- a/cmd/arvados-server/keep-balance.service
+++ b/cmd/arvados-server/keep-balance.service
@@ -14,7 +14,7 @@ StartLimitIntervalSec=0
 [Service]
 Type=notify
 EnvironmentFile=-/etc/arvados/environment
-ExecStart=/usr/bin/keep-balance -commit-pulls -commit-trash
+ExecStart=/usr/bin/keep-balance
 # Set a reasonable default for the open file limit
 LimitNOFILE=65536
 Restart=always
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 24d626aae5..4fe75bd680 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -638,6 +638,15 @@ Clusters:
       # once.
       BalanceUpdateLimit: 100000
 
+      # Maximum number of "pull block from other server" and "trash
+      # block" requests to send to each keepstore server at a
+      # time. Smaller values use less memory in keepstore and
+      # keep-balance. Larger values allow more progress per
+      # keep-balance iteration. Setting both limits to zero amounts
+      # to a "dry run".
+      BalancePullLimit: 100000
+      BalanceTrashLimit: 100000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
diff --git a/lib/config/export.go b/lib/config/export.go
index cbd9ff6d7f..674b37473e 100644
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -93,7 +93,9 @@ var whitelist = map[string]bool{
 	"Collections.BalanceCollectionBatch":       false,
 	"Collections.BalanceCollectionBuffers":     false,
 	"Collections.BalancePeriod":                false,
+	"Collections.BalancePullLimit":             false,
 	"Collections.BalanceTimeout":               false,
+	"Collections.BalanceTrashLimit":            false,
 	"Collections.BalanceUpdateLimit":           false,
 	"Collections.BlobDeleteConcurrency":        false,
 	"Collections.BlobMissingReport":            false,
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index ea01cc3c68..e2ad7b089d 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -150,6 +150,8 @@ type Cluster struct {
 		BalanceCollectionBuffers int
 		BalanceTimeout           Duration
 		BalanceUpdateLimit       int
+		BalancePullLimit         int
+		BalanceTrashLimit        int
 
 		WebDAVCache WebDAVCacheConfig
 
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index e44dfeda87..e71eb07efa 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -137,7 +137,7 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a
 	client.Timeout = 0
 
 	rs := bal.rendezvousState()
-	if runOptions.CommitTrash && rs != runOptions.SafeRendezvousState {
+	if cluster.Collections.BalanceTrashLimit > 0 && rs != runOptions.SafeRendezvousState {
 		if runOptions.SafeRendezvousState != "" {
 			bal.logf("notice: KeepServices list has changed since last run")
 		}
@@ -155,6 +155,7 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a
 	if err = bal.GetCurrentState(ctx, client, cluster.Collections.BalanceCollectionBatch, cluster.Collections.BalanceCollectionBuffers); err != nil {
 		return
 	}
+	bal.setupLookupTables(cluster)
 	bal.ComputeChangeSets()
 	bal.PrintStatistics()
 	if err = bal.CheckSanityLate(); err != nil {
@@ -171,14 +172,14 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a
 		}
 		lbFile = nil
 	}
-	if runOptions.CommitPulls {
+	if cluster.Collections.BalancePullLimit > 0 {
 		err = bal.CommitPulls(ctx, client)
 		if err != nil {
 			// Skip trash if we can't pull. (Too cautious?)
 			return
 		}
 	}
-	if runOptions.CommitTrash {
+	if cluster.Collections.BalanceTrashLimit > 0 {
 		err = bal.CommitTrash(ctx, client)
 		if err != nil {
 			return
@@ -542,7 +543,6 @@ func (bal *Balancer) ComputeChangeSets() {
 	// This just calls balanceBlock() once for each block, using a
 	// pool of worker goroutines.
 	defer bal.time("changeset_compute", "wall clock time to compute changesets")()
-	bal.setupLookupTables()
 
 	type balanceTask struct {
 		blkid arvados.SizedDigest
@@ -577,7 +577,7 @@ func (bal *Balancer) ComputeChangeSets() {
 	bal.collectStatistics(results)
 }
 
-func (bal *Balancer) setupLookupTables() {
+func (bal *Balancer) setupLookupTables(cluster *arvados.Cluster) {
 	bal.serviceRoots = make(map[string]string)
 	bal.classes = defaultClasses
 	bal.mountsByClass = map[string]map[*KeepMount]bool{"default": {}}
@@ -609,6 +609,13 @@ func (bal *Balancer) setupLookupTables() {
 	// class" case in balanceBlock depends on the order classes
 	// are considered.
 	sort.Strings(bal.classes)
+
+	for _, srv := range bal.KeepServices {
+		srv.ChangeSet = &ChangeSet{
+			PullLimit:  cluster.Collections.BalancePullLimit,
+			TrashLimit: cluster.Collections.BalanceTrashLimit,
+		}
+	}
 }
 
 const (
@@ -957,19 +964,21 @@ type replicationStats struct {
 }
 
 type balancerStats struct {
-	lost          blocksNBytes
-	overrep       blocksNBytes
-	unref         blocksNBytes
-	garbage       blocksNBytes
-	underrep      blocksNBytes
-	unachievable  blocksNBytes
-	justright     blocksNBytes
-	desired       blocksNBytes
-	current       blocksNBytes
-	pulls         int
-	trashes       int
-	replHistogram []int
-	classStats    map[string]replicationStats
+	lost            blocksNBytes
+	overrep         blocksNBytes
+	unref           blocksNBytes
+	garbage         blocksNBytes
+	underrep        blocksNBytes
+	unachievable    blocksNBytes
+	justright       blocksNBytes
+	desired         blocksNBytes
+	current         blocksNBytes
+	pulls           int
+	pullsDeferred   int
+	trashes         int
+	trashesDeferred int
+	replHistogram   []int
+	classStats      map[string]replicationStats
 
 	// collectionBytes / collectionBlockBytes = deduplication ratio
 	collectionBytes      int64 // sum(bytes in referenced blocks) across all collections
@@ -1092,7 +1101,9 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) {
 	}
 	for _, srv := range bal.KeepServices {
 		s.pulls += len(srv.ChangeSet.Pulls)
+		s.pullsDeferred += srv.ChangeSet.PullsDeferred
 		s.trashes += len(srv.ChangeSet.Trashes)
+		s.trashesDeferred += srv.ChangeSet.TrashesDeferred
 	}
 	bal.stats = s
 	bal.Metrics.UpdateStats(s)
diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index 962bd40ade..fbe9802484 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -396,9 +396,7 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
 	_, err := s.db.Exec(`delete from collections`)
 	c.Assert(err, check.IsNil)
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	s.stub.serveZeroCollections()
@@ -416,8 +414,6 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
 
 func (s *runSuite) TestRefuseBadIndex(c *check.C) {
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
 		ChunkPrefix: "abc",
 		Logger:      ctxlog.TestLogger(c),
 	}
@@ -439,9 +435,7 @@ func (s *runSuite) TestRefuseBadIndex(c *check.C) {
 
 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserNotAdmin()
 	s.stub.serveZeroCollections()
@@ -468,8 +462,6 @@ func (s *runSuite) TestInvalidChunkPrefix(c *check.C) {
 		s.SetUpTest(c)
 		c.Logf("trying invalid prefix %q", trial.prefix)
 		opts := RunOptions{
-			CommitPulls: true,
-			CommitTrash: true,
 			ChunkPrefix: trial.prefix,
 			Logger:      ctxlog.TestLogger(c),
 		}
@@ -489,9 +481,7 @@ func (s *runSuite) TestInvalidChunkPrefix(c *check.C) {
 
 func (s *runSuite) TestRefuseSameDeviceDifferentVolumes(c *check.C) {
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	s.stub.serveZeroCollections()
@@ -519,9 +509,7 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
 	s.config.Collections.BlobMissingReport = lostf.Name()
 	defer os.Remove(lostf.Name())
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	s.stub.serveFooBarFileCollections()
@@ -540,10 +528,10 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
 }
 
 func (s *runSuite) TestDryRun(c *check.C) {
+	s.config.Collections.BalanceTrashLimit = 0
+	s.config.Collections.BalancePullLimit = 0
 	opts := RunOptions{
-		CommitPulls: false,
-		CommitTrash: false,
-		Logger:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	collReqs := s.stub.serveFooBarFileCollections()
@@ -559,9 +547,12 @@ func (s *runSuite) TestDryRun(c *check.C) {
 		c.Check(req.Form.Get("include_trash"), check.Equals, "true")
 		c.Check(req.Form.Get("include_old_versions"), check.Equals, "true")
 	}
-	c.Check(trashReqs.Count(), check.Equals, 0)
-	c.Check(pullReqs.Count(), check.Equals, 0)
-	c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
+	c.Check(trashReqs.Count(), check.Equals, 4)
+	c.Check(pullReqs.Count(), check.Equals, 4)
+	c.Check(bal.stats.pulls, check.Equals, 0)
+	c.Check(bal.stats.pullsDeferred, check.Not(check.Equals), 0)
+	c.Check(bal.stats.trashes, check.Equals, 0)
+	c.Check(bal.stats.trashesDeferred, check.Not(check.Equals), 0)
 	c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
 	c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
 }
@@ -570,10 +561,8 @@ func (s *runSuite) TestCommit(c *check.C) {
 	s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
 	s.config.ManagementToken = "xyzzy"
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
-		Dumper:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
+		Dumper: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	s.stub.serveFooBarFileCollections()
@@ -608,8 +597,6 @@ func (s *runSuite) TestCommit(c *check.C) {
 func (s *runSuite) TestChunkPrefix(c *check.C) {
 	s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
 		ChunkPrefix: "ac", // catch "foo" but not "bar"
 		Logger:      ctxlog.TestLogger(c),
 		Dumper:      ctxlog.TestLogger(c),
@@ -639,10 +626,8 @@ func (s *runSuite) TestChunkPrefix(c *check.C) {
 func (s *runSuite) TestRunForever(c *check.C) {
 	s.config.ManagementToken = "xyzzy"
 	opts := RunOptions{
-		CommitPulls: true,
-		CommitTrash: true,
-		Logger:      ctxlog.TestLogger(c),
-		Dumper:      ctxlog.TestLogger(c),
+		Logger: ctxlog.TestLogger(c),
+		Dumper: ctxlog.TestLogger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
 	s.stub.serveFooBarFileCollections()
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index e5bdf9c023..85d4ff8b5d 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -12,6 +12,7 @@ import (
 	"testing"
 	"time"
 
+	"git.arvados.org/arvados.git/lib/config"
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	check "gopkg.in/check.v1"
@@ -26,6 +27,7 @@ var _ = check.Suite(&balancerSuite{})
 
 type balancerSuite struct {
 	Balancer
+	config          *arvados.Cluster
 	srvs            []*KeepService
 	blks            map[string]tester
 	knownRendezvous [][]int
@@ -72,6 +74,11 @@ func (bal *balancerSuite) SetUpSuite(c *check.C) {
 
 	bal.signatureTTL = 3600
 	bal.Logger = ctxlog.TestLogger(c)
+
+	cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+	c.Assert(err, check.Equals, nil)
+	bal.config, err = cfg.GetCluster("")
+	c.Assert(err, check.Equals, nil)
 }
 
 func (bal *balancerSuite) SetUpTest(c *check.C) {
@@ -743,7 +750,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
 // the appropriate changes for that block have been added to the
 // changesets.
 func (bal *balancerSuite) try(c *check.C, t tester) {
-	bal.setupLookupTables()
+	bal.setupLookupTables(bal.config)
 	blk := &BlockState{
 		Replicas: bal.replList(t.known, t.current),
 		Desired:  t.desired,
@@ -751,9 +758,6 @@ func (bal *balancerSuite) try(c *check.C, t tester) {
 	for i, t := range t.timestamps {
 		blk.Replicas[i].Mtime = t
 	}
-	for _, srv := range bal.srvs {
-		srv.ChangeSet = &ChangeSet{}
-	}
 	result := bal.balanceBlock(knownBlkid(t.known), blk)
 
 	var didPull, didTrash slots
diff --git a/services/keep-balance/change_set.go b/services/keep-balance/change_set.go
index 8e0ba028ac..c3579556bb 100644
--- a/services/keep-balance/change_set.go
+++ b/services/keep-balance/change_set.go
@@ -60,22 +60,35 @@ func (t Trash) MarshalJSON() ([]byte, error) {
 // ChangeSet is a set of change requests that will be sent to a
 // keepstore server.
 type ChangeSet struct {
-	Pulls   []Pull
-	Trashes []Trash
-	mutex   sync.Mutex
+	PullLimit  int
+	TrashLimit int
+
+	Pulls           []Pull
+	PullsDeferred   int // number that weren't added because of PullLimit
+	Trashes         []Trash
+	TrashesDeferred int // number that weren't added because of TrashLimit
+	mutex           sync.Mutex
 }
 
 // AddPull adds a Pull operation.
 func (cs *ChangeSet) AddPull(p Pull) {
 	cs.mutex.Lock()
-	cs.Pulls = append(cs.Pulls, p)
+	if len(cs.Pulls) < cs.PullLimit {
+		cs.Pulls = append(cs.Pulls, p)
+	} else {
+		cs.PullsDeferred++
+	}
 	cs.mutex.Unlock()
 }
 
 // AddTrash adds a Trash operation
 func (cs *ChangeSet) AddTrash(t Trash) {
 	cs.mutex.Lock()
-	cs.Trashes = append(cs.Trashes, t)
+	if len(cs.Trashes) < cs.TrashLimit {
+		cs.Trashes = append(cs.Trashes, t)
+	} else {
+		cs.TrashesDeferred++
+	}
 	cs.mutex.Unlock()
 }
 
@@ -83,5 +96,5 @@ func (cs *ChangeSet) AddTrash(t Trash) {
 func (cs *ChangeSet) String() string {
 	cs.mutex.Lock()
 	defer cs.mutex.Unlock()
-	return fmt.Sprintf("ChangeSet{Pulls:%d, Trashes:%d}", len(cs.Pulls), len(cs.Trashes))
+	return fmt.Sprintf("ChangeSet{Pulls:%d, Trashes:%d} Deferred{Pulls:%d Trashes:%d}", len(cs.Pulls), len(cs.Trashes), cs.PullsDeferred, cs.TrashesDeferred)
 }
diff --git a/services/keep-balance/integration_test.go b/services/keep-balance/integration_test.go
index 42463a002a..2e353c92be 100644
--- a/services/keep-balance/integration_test.go
+++ b/services/keep-balance/integration_test.go
@@ -87,8 +87,6 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) {
 		logger := logrus.New()
 		logger.Out = io.MultiWriter(&logBuf, os.Stderr)
 		opts := RunOptions{
-			CommitPulls:           true,
-			CommitTrash:           true,
 			CommitConfirmedFields: true,
 			Logger:                logger,
 		}
@@ -101,7 +99,6 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) {
 		nextOpts, err := bal.Run(context.Background(), s.client, s.config, opts)
 		c.Check(err, check.IsNil)
 		c.Check(nextOpts.SafeRendezvousState, check.Not(check.Equals), "")
-		c.Check(nextOpts.CommitPulls, check.Equals, true)
 		if iter == 0 {
 			c.Check(logBuf.String(), check.Matches, `(?ms).*ChangeSet{Pulls:1.*`)
 			c.Check(logBuf.String(), check.Not(check.Matches), `(?ms).*ChangeSet{.*Trashes:[^0]}*`)
diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go
index 6bc9989589..f9a3e1701a 100644
--- a/services/keep-balance/main.go
+++ b/services/keep-balance/main.go
@@ -32,10 +32,10 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
 	flags := flag.NewFlagSet(prog, flag.ContinueOnError)
 	flags.BoolVar(&options.Once, "once", false,
 		"balance once and then exit")
-	flags.BoolVar(&options.CommitPulls, "commit-pulls", false,
-		"send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
-	flags.BoolVar(&options.CommitTrash, "commit-trash", false,
-		"send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
+	deprCommitPulls := flags.Bool("commit-pulls", true,
+		"send pull requests (must be true -- configure Collections.BalancePullLimit = 0 to disable.)")
+	deprCommitTrash := flags.Bool("commit-trash", true,
+		"send trash requests (must be true -- configure Collections.BalanceTrashLimit = 0 to disable.)")
 	flags.BoolVar(&options.CommitConfirmedFields, "commit-confirmed-fields", true,
 		"update collection fields (replicas_confirmed, storage_classes_confirmed, etc.)")
 	flags.StringVar(&options.ChunkPrefix, "chunk-prefix", "",
@@ -55,6 +55,13 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
 		return code
 	}
 
+	if !*deprCommitPulls || !*deprCommitTrash {
+		fmt.Fprint(stderr,
+			"Usage error: the -commit-pulls or -commit-trash command line flags are no longer supported.\n",
+			"Use Collections.BalancePullLimit and Collections.BalanceTrashLimit instead.\n")
+		return cmd.EX_USAGE
+	}
+
 	// Drop our custom args that would be rejected by the generic
 	// service.Command
 	args = nil
diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go
index 9bcaec43d8..bb7294c4ba 100644
--- a/services/keep-balance/server.go
+++ b/services/keep-balance/server.go
@@ -27,8 +27,6 @@ import (
 // RunOptions fields are controlled by command line flags.
 type RunOptions struct {
 	Once                  bool
-	CommitPulls           bool
-	CommitTrash           bool
 	CommitConfirmedFields bool
 	ChunkPrefix           string
 	Logger                logrus.FieldLogger
@@ -112,9 +110,9 @@ func (srv *Server) runForever(ctx context.Context) error {
 	logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod)
 
 	for {
-		if !srv.RunOptions.CommitPulls && !srv.RunOptions.CommitTrash {
+		if srv.Cluster.Collections.BalancePullLimit < 1 && srv.Cluster.Collections.BalanceTrashLimit < 1 {
 			logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
-			logger.Print("=======  Consider using -commit-pulls and -commit-trash flags.")
+			logger.Print("=======  Consider using non-zero BalancePullLimit and BalanceTrashLimit configs.")
 		}
 
 		if !dblock.KeepBalanceService.Check() {

commit 13da2e4a451dc0ceb03ae6a8f50ec971cef21d6e
Author: Tom Clegg <tom at curii.com>
Date:   Wed Nov 15 09:49:04 2023 -0500

    21189: On usage error, exit 64 and do not auto-restart.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/cmd/arvados-server/arvados-controller.service b/cmd/arvados-server/arvados-controller.service
index 420cbb035a..cf680f959f 100644
--- a/cmd/arvados-server/arvados-controller.service
+++ b/cmd/arvados-server/arvados-controller.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-controller
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/arvados-dispatch-cloud.service b/cmd/arvados-server/arvados-dispatch-cloud.service
index 8d57e8a161..61e2325d26 100644
--- a/cmd/arvados-server/arvados-dispatch-cloud.service
+++ b/cmd/arvados-server/arvados-dispatch-cloud.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-dispatch-cloud
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/arvados-dispatch-lsf.service b/cmd/arvados-server/arvados-dispatch-lsf.service
index 65d8786670..931ef40562 100644
--- a/cmd/arvados-server/arvados-dispatch-lsf.service
+++ b/cmd/arvados-server/arvados-dispatch-lsf.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-dispatch-lsf
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/arvados-git-httpd.service b/cmd/arvados-server/arvados-git-httpd.service
index b45587ffc0..b86293d977 100644
--- a/cmd/arvados-server/arvados-git-httpd.service
+++ b/cmd/arvados-server/arvados-git-httpd.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-git-httpd
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/arvados-health.service b/cmd/arvados-server/arvados-health.service
index cf246b0ee2..4605681cb8 100644
--- a/cmd/arvados-server/arvados-health.service
+++ b/cmd/arvados-server/arvados-health.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-health
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/arvados-ws.service b/cmd/arvados-server/arvados-ws.service
index f73db5d080..5ba7b54c7d 100644
--- a/cmd/arvados-server/arvados-ws.service
+++ b/cmd/arvados-server/arvados-ws.service
@@ -18,6 +18,7 @@ ExecStart=/usr/bin/arvados-ws
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/crunch-dispatch-slurm.service b/cmd/arvados-server/crunch-dispatch-slurm.service
index 51b4e58c35..53b0157807 100644
--- a/cmd/arvados-server/crunch-dispatch-slurm.service
+++ b/cmd/arvados-server/crunch-dispatch-slurm.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/crunch-dispatch-slurm
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/keep-balance.service b/cmd/arvados-server/keep-balance.service
index 1c5808288b..6b16b2cfab 100644
--- a/cmd/arvados-server/keep-balance.service
+++ b/cmd/arvados-server/keep-balance.service
@@ -20,6 +20,7 @@ LimitNOFILE=65536
 Restart=always
 RestartSec=10s
 Nice=19
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/keep-web.service b/cmd/arvados-server/keep-web.service
index c0e193d6d8..d88dea231f 100644
--- a/cmd/arvados-server/keep-web.service
+++ b/cmd/arvados-server/keep-web.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/keep-web
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/keepproxy.service b/cmd/arvados-server/keepproxy.service
index 7d4d092677..952c7df6a1 100644
--- a/cmd/arvados-server/keepproxy.service
+++ b/cmd/arvados-server/keepproxy.service
@@ -19,6 +19,7 @@ ExecStart=/usr/bin/keepproxy
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/cmd/arvados-server/keepstore.service b/cmd/arvados-server/keepstore.service
index bcfde3a788..14cad20436 100644
--- a/cmd/arvados-server/keepstore.service
+++ b/cmd/arvados-server/keepstore.service
@@ -23,6 +23,7 @@ ExecStart=/usr/bin/keepstore
 LimitNOFILE=65536
 Restart=always
 RestartSec=1
+RestartPreventExitStatus=64
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/lib/cmd/cmd.go b/lib/cmd/cmd.go
index 3d4092e6b8..2059f946cf 100644
--- a/lib/cmd/cmd.go
+++ b/lib/cmd/cmd.go
@@ -20,6 +20,8 @@ import (
 	"github.com/sirupsen/logrus"
 )
 
+const EX_USAGE = 64
+
 type Handler interface {
 	RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int
 }
@@ -86,13 +88,13 @@ func (m Multi) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
 	} else if len(args) < 1 {
 		fmt.Fprintf(stderr, "usage: %s command [args]\n", prog)
 		m.Usage(stderr)
-		return 2
+		return EX_USAGE
 	} else if cmd, ok = m[args[0]]; ok {
 		return cmd.RunCommand(prog+" "+args[0], args[1:], stdin, stdout, stderr)
 	} else {
 		fmt.Fprintf(stderr, "%s: unrecognized command %q\n", prog, args[0])
 		m.Usage(stderr)
-		return 2
+		return EX_USAGE
 	}
 }
 
diff --git a/lib/cmd/parseflags.go b/lib/cmd/parseflags.go
index 707cacbf52..d4158a2195 100644
--- a/lib/cmd/parseflags.go
+++ b/lib/cmd/parseflags.go
@@ -26,7 +26,8 @@ var defaultFlagSet = flag.NewFlagSet("none", flag.ContinueOnError)
 // running normally, or false if it should exit now.
 //
 // If ok is false, the second return value is an appropriate exit
-// code: 0 if "-help" was given, 2 if there was a usage error.
+// code: 0 if "-help" was given, EX_USAGE (64) if there was a usage
+// error.
 func ParseFlags(f FlagSet, prog string, args []string, positional string, stderr io.Writer) (ok bool, exitCode int) {
 	f.Init(prog, flag.ContinueOnError)
 	f.SetOutput(io.Discard)
@@ -35,7 +36,7 @@ func ParseFlags(f FlagSet, prog string, args []string, positional string, stderr
 	case nil:
 		if f.NArg() > 0 && positional == "" {
 			fmt.Fprintf(stderr, "unrecognized command line arguments: %v (try -help)\n", f.Args())
-			return false, 2
+			return false, EX_USAGE
 		}
 		return true, 0
 	case flag.ErrHelp:
@@ -55,6 +56,6 @@ func ParseFlags(f FlagSet, prog string, args []string, positional string, stderr
 		return false, 0
 	default:
 		fmt.Fprintf(stderr, "error parsing command line arguments: %s (try -help)\n", err)
-		return false, 2
+		return false, EX_USAGE
 	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list