From: Tom Clegg Date: Fri, 10 Apr 2020 19:44:14 +0000 (-0400) Subject: 16048: arvados-server boot: restart everything when config changes. X-Git-Tag: 2.1.0~244^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/45c57dc86010f8fd9ae862d91a710e0752ceda4a 16048: arvados-server boot: restart everything when config changes. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/doc/examples/config/zzzzz.yml b/doc/examples/config/zzzzz.yml index c63550edf7..d1e1336d54 100644 --- a/doc/examples/config/zzzzz.yml +++ b/doc/examples/config/zzzzz.yml @@ -1,3 +1,4 @@ +AutoReloadConfig: true Clusters: zzzzz: ManagementToken: e687950a23c3a9bceec28c6223a06c79 diff --git a/go.mod b/go.mod index 4491b35981..34b7e07790 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect github.com/arvados/cgofuse v1.2.0-arvados1 github.com/aws/aws-sdk-go v1.25.30 + github.com/bgentry/speakeasy v0.1.0 // indirect github.com/coreos/go-oidc v2.1.0+incompatible github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect @@ -21,6 +22,7 @@ require ( github.com/docker/go-connections v0.3.0 // indirect github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d // indirect github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect + github.com/fsnotify/fsnotify v1.4.9 github.com/ghodss/yaml v1.0.0 github.com/gliderlabs/ssh v0.2.2 // indirect github.com/gogo/protobuf v1.1.1 diff --git a/go.sum b/go.sum index 18cf89b0e1..03b2f77b6d 100644 --- a/go.sum +++ b/go.sum @@ -27,6 +27,8 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA= github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= @@ -54,6 +56,8 @@ github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d h1:dVaNRYvaGV23A github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0= @@ -213,6 +217,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd h1:3x5uuvBgE6oaXJjCOvpCC1IpgJogqQ+PqGGU3ZxAgII= golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/lib/boot/cmd.go b/lib/boot/cmd.go index 5147e3ac33..e0e2755220 100644 --- a/lib/boot/cmd.go +++ b/lib/boot/cmd.go @@ -29,24 +29,33 @@ type supervisedTask interface { String() string } +var errNeedConfigReload = errors.New("config changed, restart needed") + type bootCommand struct{} -func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { - super := &Supervisor{ - Stderr: stderr, - logger: ctxlog.New(stderr, "json", "info"), +func (bcmd bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + logger := ctxlog.New(stderr, "json", "info") + ctx := ctxlog.Context(context.Background(), logger) + for { + err := bcmd.run(ctx, prog, args, stdin, stdout, stderr) + if err == errNeedConfigReload { + continue + } else if err != nil { + logger.WithError(err).Info("exiting") + return 1 + } else { + return 0 + } } +} - ctx := ctxlog.Context(context.Background(), super.logger) +func (bcmd bootCommand) run(ctx context.Context, prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) error { ctx, cancel := context.WithCancel(ctx) defer cancel() - - var err error - defer func() { - if err != nil { - super.logger.WithError(err).Info("exiting") - } - }() + super := &Supervisor{ + Stderr: stderr, + logger: ctxlog.FromContext(ctx), + } flags := flag.NewFlagSet(prog, flag.ContinueOnError) flags.SetOutput(stderr) @@ -60,26 +69,25 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou flags.BoolVar(&super.OwnTemporaryDatabase, "own-temporary-database", false, "bring up a postgres server and create a temporary database") timeout := flags.Duration("timeout", 0, "maximum time to wait for cluster to be ready") shutdown := flags.Bool("shutdown", false, "shut down when the cluster becomes ready") - err = flags.Parse(args) + err := flags.Parse(args) if err == flag.ErrHelp { - err = nil - return 0 + return nil } else if err != nil { - return 2 + return err } else if *versionFlag { - return cmd.Version.RunCommand(prog, args, stdin, stdout, stderr) + cmd.Version.RunCommand(prog, args, stdin, stdout, stderr) + return nil } else if super.ClusterType != "development" && super.ClusterType != "test" && super.ClusterType != "production" { - err = fmt.Errorf("cluster type must be 'development', 'test', or 'production'") - return 2 + return fmt.Errorf("cluster type must be 'development', 'test', or 'production'") } loader.SkipAPICalls = true cfg, err := loader.Load() if err != nil { - return 1 + return err } - super.Start(ctx, cfg) + super.Start(ctx, cfg, loader.Path) defer super.Stop() var timer *time.Timer @@ -89,20 +97,19 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou url, ok := super.WaitReady() if timer != nil && !timer.Stop() { - err = errors.New("boot timed out") - return 1 + return errors.New("boot timed out") } else if !ok { - err = errors.New("boot failed") - return 1 - } - // Write controller URL to stdout. Nothing else goes to - // stdout, so this provides an easy way for a calling script - // to discover the controller URL when everything is ready. - fmt.Fprintln(stdout, url) - if *shutdown { - super.Stop() + super.logger.Error("boot failed") + } else { + // Write controller URL to stdout. Nothing else goes + // to stdout, so this provides an easy way for a + // calling script to discover the controller URL when + // everything is ready. + fmt.Fprintln(stdout, url) + if *shutdown { + super.Stop() + } } // Wait for signal/crash + orderly shutdown - <-super.done - return 0 + return super.Wait() } diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go index 7f5d6a9baa..03ae536dae 100644 --- a/lib/boot/supervisor.go +++ b/lib/boot/supervisor.go @@ -19,15 +19,18 @@ import ( "os/signal" "os/user" "path/filepath" + "reflect" "strings" "sync" "syscall" "time" + "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/lib/service" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" "git.arvados.org/arvados.git/sdk/go/health" + "github.com/fsnotify/fsnotify" "github.com/sirupsen/logrus" ) @@ -45,7 +48,8 @@ type Supervisor struct { ctx context.Context cancel context.CancelFunc - done chan struct{} + done chan struct{} // closed when child procs/services have shut down + err error // error that caused shutdown (valid when done is closed) healthChecker *health.Aggregator tasksReady map[string]chan bool waitShutdown sync.WaitGroup @@ -55,30 +59,66 @@ type Supervisor struct { environ []string // for child processes } -func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config) { +func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config, cfgPath string) { super.ctx, super.cancel = context.WithCancel(ctx) super.done = make(chan struct{}) go func() { + defer close(super.done) + sigch := make(chan os.Signal) signal.Notify(sigch, syscall.SIGINT, syscall.SIGTERM) defer signal.Stop(sigch) go func() { for sig := range sigch { super.logger.WithField("signal", sig).Info("caught signal") + if super.err == nil { + super.err = fmt.Errorf("caught signal %s", sig) + } + super.cancel() + } + }() + + hupch := make(chan os.Signal) + signal.Notify(hupch, syscall.SIGHUP) + defer signal.Stop(hupch) + go func() { + for sig := range hupch { + super.logger.WithField("signal", sig).Info("caught signal") + if super.err == nil { + super.err = errNeedConfigReload + } super.cancel() } }() + if cfgPath != "" && cfgPath != "-" && cfg.AutoReloadConfig { + go watchConfig(super.ctx, super.logger, cfgPath, copyConfig(cfg), func() { + if super.err == nil { + super.err = errNeedConfigReload + } + super.cancel() + }) + } + err := super.run(cfg) if err != nil { super.logger.WithError(err).Warn("supervisor shut down") + if super.err == nil { + super.err = err + } } - close(super.done) }() } +func (super *Supervisor) Wait() error { + <-super.done + return super.err +} + func (super *Supervisor) run(cfg *arvados.Config) error { + defer super.cancel() + cwd, err := os.Getwd() if err != nil { return err @@ -706,3 +746,67 @@ func waitForConnect(ctx context.Context, addr string) error { } return ctx.Err() } + +func copyConfig(cfg *arvados.Config) *arvados.Config { + pr, pw := io.Pipe() + go func() { + err := json.NewEncoder(pw).Encode(cfg) + if err != nil { + panic(err) + } + pw.Close() + }() + cfg2 := new(arvados.Config) + err := json.NewDecoder(pr).Decode(cfg2) + if err != nil { + panic(err) + } + return cfg2 +} + +func watchConfig(ctx context.Context, logger logrus.FieldLogger, cfgPath string, prevcfg *arvados.Config, fn func()) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + logger.WithError(err).Error("fsnotify setup failed") + return + } + defer watcher.Close() + + err = watcher.Add(cfgPath) + if err != nil { + logger.WithError(err).Error("fsnotify watcher failed") + return + } + + for { + select { + case <-ctx.Done(): + return + case err, ok := <-watcher.Errors: + if !ok { + return + } + logger.WithError(err).Warn("fsnotify watcher reported error") + case _, ok := <-watcher.Events: + if !ok { + return + } + for len(watcher.Events) > 0 { + <-watcher.Events + } + loader := config.NewLoader(&bytes.Buffer{}, &logrus.Logger{Out: ioutil.Discard}) + loader.Path = cfgPath + loader.SkipAPICalls = true + cfg, err := loader.Load() + if err != nil { + logger.WithError(err).Warn("error reloading config file after change detected; ignoring new config for now") + } else if reflect.DeepEqual(cfg, prevcfg) { + logger.Debug("config file changed but is still DeepEqual to the existing config") + } else { + logger.Debug("config changed, notifying supervisor") + fn() + prevcfg = cfg + } + } + } +} diff --git a/lib/config/cmd_test.go b/lib/config/cmd_test.go index f4f2d5653b..3c420a04eb 100644 --- a/lib/config/cmd_test.go +++ b/lib/config/cmd_test.go @@ -162,7 +162,7 @@ Clusters: code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr) c.Check(code, check.Equals, 0) c.Check(stderr.String(), check.Matches, `(?ms).*deprecated or unknown config entry: Clusters.z1234.UnknownKey.*`) - c.Check(stdout.String(), check.Matches, `(?ms)Clusters:\n z1234:\n.*`) + c.Check(stdout.String(), check.Matches, `(?ms)(.*\n)?Clusters:\n z1234:\n.*`) c.Check(stdout.String(), check.Matches, `(?ms).*\n *ManagementToken: secret\n.*`) c.Check(stdout.String(), check.Not(check.Matches), `(?ms).*UnknownKey.*`) } diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index a4616d70b9..fd59c9c425 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -1219,3 +1219,8 @@ Clusters: # implementation. Note that it also disables some new federation # features and will be removed in a future release. ForceLegacyAPI14: false + +# (Experimental) Restart services automatically when config file +# changes are detected. Only supported by `arvados-server boot` in +# dev/test mode. +AutoReloadConfig: false diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 0194b02f5e..2fce813a79 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -1225,4 +1225,9 @@ Clusters: # implementation. Note that it also disables some new federation # features and will be removed in a future release. ForceLegacyAPI14: false + +# (Experimental) Restart services automatically when config file +# changes are detected. Only supported by ` + "`" + `arvados-server boot` + "`" + ` in +# dev/test mode. +AutoReloadConfig: false `) diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go index 2adb5811ea..6472e27420 100644 --- a/lib/controller/integration_test.go +++ b/lib/controller/integration_test.go @@ -113,7 +113,7 @@ func (s *IntegrationSuite) SetUpSuite(c *check.C) { }, config: *cfg, } - s.testClusters[id].super.Start(context.Background(), &s.testClusters[id].config) + s.testClusters[id].super.Start(context.Background(), &s.testClusters[id].config, "-") } for _, tc := range s.testClusters { au, ok := tc.super.WaitReady() diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index 6b83fb96d4..38de6b8ea4 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -23,7 +23,8 @@ var DefaultConfigFile = func() string { }() type Config struct { - Clusters map[string]Cluster + Clusters map[string]Cluster + AutoReloadConfig bool } // GetConfig returns the current system config, loading it from @@ -66,6 +67,7 @@ type WebDAVCacheConfig struct { MaxPermissionEntries int MaxUUIDEntries int } + type Cluster struct { ClusterID string `json:"-"` ManagementToken string