[ARVADOS] created: 1.3.0-2460-g45c57dc86

Git user git at public.arvados.org
Fri Apr 10 19:46:35 UTC 2020


        at  45c57dc86010f8fd9ae862d91a710e0752ceda4a (commit)


commit 45c57dc86010f8fd9ae862d91a710e0752ceda4a
Author: Tom Clegg <tom at tomclegg.ca>
Date:   Fri Apr 10 15:44:14 2020 -0400

    16048: arvados-server boot: restart everything when config changes.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at tomclegg.ca>

diff --git a/doc/examples/config/zzzzz.yml b/doc/examples/config/zzzzz.yml
index c63550edf..d1e1336d5 100644
--- a/doc/examples/config/zzzzz.yml
+++ b/doc/examples/config/zzzzz.yml
@@ -1,3 +1,4 @@
+AutoReloadConfig: true
 Clusters:
   zzzzz:
     ManagementToken: e687950a23c3a9bceec28c6223a06c79
diff --git a/go.mod b/go.mod
index 4491b3598..34b7e0779 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
 	github.com/arvados/cgofuse v1.2.0-arvados1
 	github.com/aws/aws-sdk-go v1.25.30
+	github.com/bgentry/speakeasy v0.1.0 // indirect
 	github.com/coreos/go-oidc v2.1.0+incompatible
 	github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7
 	github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect
@@ -21,6 +22,7 @@ require (
 	github.com/docker/go-connections v0.3.0 // indirect
 	github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d // indirect
 	github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect
+	github.com/fsnotify/fsnotify v1.4.9
 	github.com/ghodss/yaml v1.0.0
 	github.com/gliderlabs/ssh v0.2.2 // indirect
 	github.com/gogo/protobuf v1.1.1
diff --git a/go.sum b/go.sum
index 18cf89b0e..03b2f77b6 100644
--- a/go.sum
+++ b/go.sum
@@ -27,6 +27,8 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
+github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
 github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA=
 github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@@ -54,6 +56,8 @@ github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d h1:dVaNRYvaGV23A
 github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
+github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
+github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
@@ -213,6 +217,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd h1:3x5uuvBgE6oaXJjCOvpCC1IpgJogqQ+PqGGU3ZxAgII=
 golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/lib/boot/cmd.go b/lib/boot/cmd.go
index 5147e3ac3..e0e275522 100644
--- a/lib/boot/cmd.go
+++ b/lib/boot/cmd.go
@@ -29,24 +29,33 @@ type supervisedTask interface {
 	String() string
 }
 
+var errNeedConfigReload = errors.New("config changed, restart needed")
+
 type bootCommand struct{}
 
-func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
-	super := &Supervisor{
-		Stderr: stderr,
-		logger: ctxlog.New(stderr, "json", "info"),
+func (bcmd bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+	logger := ctxlog.New(stderr, "json", "info")
+	ctx := ctxlog.Context(context.Background(), logger)
+	for {
+		err := bcmd.run(ctx, prog, args, stdin, stdout, stderr)
+		if err == errNeedConfigReload {
+			continue
+		} else if err != nil {
+			logger.WithError(err).Info("exiting")
+			return 1
+		} else {
+			return 0
+		}
 	}
+}
 
-	ctx := ctxlog.Context(context.Background(), super.logger)
+func (bcmd bootCommand) run(ctx context.Context, prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) error {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()
-
-	var err error
-	defer func() {
-		if err != nil {
-			super.logger.WithError(err).Info("exiting")
-		}
-	}()
+	super := &Supervisor{
+		Stderr: stderr,
+		logger: ctxlog.FromContext(ctx),
+	}
 
 	flags := flag.NewFlagSet(prog, flag.ContinueOnError)
 	flags.SetOutput(stderr)
@@ -60,26 +69,25 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou
 	flags.BoolVar(&super.OwnTemporaryDatabase, "own-temporary-database", false, "bring up a postgres server and create a temporary database")
 	timeout := flags.Duration("timeout", 0, "maximum time to wait for cluster to be ready")
 	shutdown := flags.Bool("shutdown", false, "shut down when the cluster becomes ready")
-	err = flags.Parse(args)
+	err := flags.Parse(args)
 	if err == flag.ErrHelp {
-		err = nil
-		return 0
+		return nil
 	} else if err != nil {
-		return 2
+		return err
 	} else if *versionFlag {
-		return cmd.Version.RunCommand(prog, args, stdin, stdout, stderr)
+		cmd.Version.RunCommand(prog, args, stdin, stdout, stderr)
+		return nil
 	} else if super.ClusterType != "development" && super.ClusterType != "test" && super.ClusterType != "production" {
-		err = fmt.Errorf("cluster type must be 'development', 'test', or 'production'")
-		return 2
+		return fmt.Errorf("cluster type must be 'development', 'test', or 'production'")
 	}
 
 	loader.SkipAPICalls = true
 	cfg, err := loader.Load()
 	if err != nil {
-		return 1
+		return err
 	}
 
-	super.Start(ctx, cfg)
+	super.Start(ctx, cfg, loader.Path)
 	defer super.Stop()
 
 	var timer *time.Timer
@@ -89,20 +97,19 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou
 
 	url, ok := super.WaitReady()
 	if timer != nil && !timer.Stop() {
-		err = errors.New("boot timed out")
-		return 1
+		return errors.New("boot timed out")
 	} else if !ok {
-		err = errors.New("boot failed")
-		return 1
-	}
-	// Write controller URL to stdout. Nothing else goes to
-	// stdout, so this provides an easy way for a calling script
-	// to discover the controller URL when everything is ready.
-	fmt.Fprintln(stdout, url)
-	if *shutdown {
-		super.Stop()
+		super.logger.Error("boot failed")
+	} else {
+		// Write controller URL to stdout. Nothing else goes
+		// to stdout, so this provides an easy way for a
+		// calling script to discover the controller URL when
+		// everything is ready.
+		fmt.Fprintln(stdout, url)
+		if *shutdown {
+			super.Stop()
+		}
 	}
 	// Wait for signal/crash + orderly shutdown
-	<-super.done
-	return 0
+	return super.Wait()
 }
diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go
index 7f5d6a9ba..03ae536da 100644
--- a/lib/boot/supervisor.go
+++ b/lib/boot/supervisor.go
@@ -19,15 +19,18 @@ import (
 	"os/signal"
 	"os/user"
 	"path/filepath"
+	"reflect"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 
+	"git.arvados.org/arvados.git/lib/config"
 	"git.arvados.org/arvados.git/lib/service"
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	"git.arvados.org/arvados.git/sdk/go/health"
+	"github.com/fsnotify/fsnotify"
 	"github.com/sirupsen/logrus"
 )
 
@@ -45,7 +48,8 @@ type Supervisor struct {
 
 	ctx           context.Context
 	cancel        context.CancelFunc
-	done          chan struct{}
+	done          chan struct{} // closed when child procs/services have shut down
+	err           error         // error that caused shutdown (valid when done is closed)
 	healthChecker *health.Aggregator
 	tasksReady    map[string]chan bool
 	waitShutdown  sync.WaitGroup
@@ -55,30 +59,66 @@ type Supervisor struct {
 	environ    []string // for child processes
 }
 
-func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config) {
+func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config, cfgPath string) {
 	super.ctx, super.cancel = context.WithCancel(ctx)
 	super.done = make(chan struct{})
 
 	go func() {
+		defer close(super.done)
+
 		sigch := make(chan os.Signal)
 		signal.Notify(sigch, syscall.SIGINT, syscall.SIGTERM)
 		defer signal.Stop(sigch)
 		go func() {
 			for sig := range sigch {
 				super.logger.WithField("signal", sig).Info("caught signal")
+				if super.err == nil {
+					super.err = fmt.Errorf("caught signal %s", sig)
+				}
+				super.cancel()
+			}
+		}()
+
+		hupch := make(chan os.Signal)
+		signal.Notify(hupch, syscall.SIGHUP)
+		defer signal.Stop(hupch)
+		go func() {
+			for sig := range hupch {
+				super.logger.WithField("signal", sig).Info("caught signal")
+				if super.err == nil {
+					super.err = errNeedConfigReload
+				}
 				super.cancel()
 			}
 		}()
 
+		if cfgPath != "" && cfgPath != "-" && cfg.AutoReloadConfig {
+			go watchConfig(super.ctx, super.logger, cfgPath, copyConfig(cfg), func() {
+				if super.err == nil {
+					super.err = errNeedConfigReload
+				}
+				super.cancel()
+			})
+		}
+
 		err := super.run(cfg)
 		if err != nil {
 			super.logger.WithError(err).Warn("supervisor shut down")
+			if super.err == nil {
+				super.err = err
+			}
 		}
-		close(super.done)
 	}()
 }
 
+func (super *Supervisor) Wait() error {
+	<-super.done
+	return super.err
+}
+
 func (super *Supervisor) run(cfg *arvados.Config) error {
+	defer super.cancel()
+
 	cwd, err := os.Getwd()
 	if err != nil {
 		return err
@@ -706,3 +746,67 @@ func waitForConnect(ctx context.Context, addr string) error {
 	}
 	return ctx.Err()
 }
+
+func copyConfig(cfg *arvados.Config) *arvados.Config {
+	pr, pw := io.Pipe()
+	go func() {
+		err := json.NewEncoder(pw).Encode(cfg)
+		if err != nil {
+			panic(err)
+		}
+		pw.Close()
+	}()
+	cfg2 := new(arvados.Config)
+	err := json.NewDecoder(pr).Decode(cfg2)
+	if err != nil {
+		panic(err)
+	}
+	return cfg2
+}
+
+func watchConfig(ctx context.Context, logger logrus.FieldLogger, cfgPath string, prevcfg *arvados.Config, fn func()) {
+	watcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		logger.WithError(err).Error("fsnotify setup failed")
+		return
+	}
+	defer watcher.Close()
+
+	err = watcher.Add(cfgPath)
+	if err != nil {
+		logger.WithError(err).Error("fsnotify watcher failed")
+		return
+	}
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case err, ok := <-watcher.Errors:
+			if !ok {
+				return
+			}
+			logger.WithError(err).Warn("fsnotify watcher reported error")
+		case _, ok := <-watcher.Events:
+			if !ok {
+				return
+			}
+			for len(watcher.Events) > 0 {
+				<-watcher.Events
+			}
+			loader := config.NewLoader(&bytes.Buffer{}, &logrus.Logger{Out: ioutil.Discard})
+			loader.Path = cfgPath
+			loader.SkipAPICalls = true
+			cfg, err := loader.Load()
+			if err != nil {
+				logger.WithError(err).Warn("error reloading config file after change detected; ignoring new config for now")
+			} else if reflect.DeepEqual(cfg, prevcfg) {
+				logger.Debug("config file changed but is still DeepEqual to the existing config")
+			} else {
+				logger.Debug("config changed, notifying supervisor")
+				fn()
+				prevcfg = cfg
+			}
+		}
+	}
+}
diff --git a/lib/config/cmd_test.go b/lib/config/cmd_test.go
index f4f2d5653..3c420a04e 100644
--- a/lib/config/cmd_test.go
+++ b/lib/config/cmd_test.go
@@ -162,7 +162,7 @@ Clusters:
 	code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
 	c.Check(code, check.Equals, 0)
 	c.Check(stderr.String(), check.Matches, `(?ms).*deprecated or unknown config entry: Clusters.z1234.UnknownKey.*`)
-	c.Check(stdout.String(), check.Matches, `(?ms)Clusters:\n  z1234:\n.*`)
+	c.Check(stdout.String(), check.Matches, `(?ms)(.*\n)?Clusters:\n  z1234:\n.*`)
 	c.Check(stdout.String(), check.Matches, `(?ms).*\n *ManagementToken: secret\n.*`)
 	c.Check(stdout.String(), check.Not(check.Matches), `(?ms).*UnknownKey.*`)
 }
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index a4616d70b..fd59c9c42 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1219,3 +1219,8 @@ Clusters:
     # implementation. Note that it also disables some new federation
     # features and will be removed in a future release.
     ForceLegacyAPI14: false
+
+# (Experimental) Restart services automatically when config file
+# changes are detected. Only supported by `arvados-server boot` in
+# dev/test mode.
+AutoReloadConfig: false
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 0194b02f5..2fce813a7 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -1225,4 +1225,9 @@ Clusters:
     # implementation. Note that it also disables some new federation
     # features and will be removed in a future release.
     ForceLegacyAPI14: false
+
+# (Experimental) Restart services automatically when config file
+# changes are detected. Only supported by ` + "`" + `arvados-server boot` + "`" + ` in
+# dev/test mode.
+AutoReloadConfig: false
 `)
diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go
index 2adb5811e..6472e2742 100644
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -113,7 +113,7 @@ func (s *IntegrationSuite) SetUpSuite(c *check.C) {
 			},
 			config: *cfg,
 		}
-		s.testClusters[id].super.Start(context.Background(), &s.testClusters[id].config)
+		s.testClusters[id].super.Start(context.Background(), &s.testClusters[id].config, "-")
 	}
 	for _, tc := range s.testClusters {
 		au, ok := tc.super.WaitReady()
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 6b83fb96d..38de6b8ea 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -23,7 +23,8 @@ var DefaultConfigFile = func() string {
 }()
 
 type Config struct {
-	Clusters map[string]Cluster
+	Clusters         map[string]Cluster
+	AutoReloadConfig bool
 }
 
 // GetConfig returns the current system config, loading it from
@@ -66,6 +67,7 @@ type WebDAVCacheConfig struct {
 	MaxPermissionEntries int
 	MaxUUIDEntries       int
 }
+
 type Cluster struct {
 	ClusterID       string `json:"-"`
 	ManagementToken string

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list