X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ef109c816015c97809432b48d8110e171ec89cd0..83898701e9c75661a240cadcf31f80cbccbb698e:/lib/boot/supervisor.go diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go index bc2e4256be..0e4472b8a0 100644 --- a/lib/boot/supervisor.go +++ b/lib/boot/supervisor.go @@ -19,15 +19,18 @@ import ( "os/signal" "os/user" "path/filepath" + "reflect" "strings" "sync" "syscall" "time" + "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/lib/service" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" "git.arvados.org/arvados.git/sdk/go/health" + "github.com/fsnotify/fsnotify" "github.com/sirupsen/logrus" ) @@ -45,7 +48,8 @@ type Supervisor struct { ctx context.Context cancel context.CancelFunc - done chan struct{} + done chan struct{} // closed when child procs/services have shut down + err error // error that caused shutdown (valid when done is closed) healthChecker *health.Aggregator tasksReady map[string]chan bool waitShutdown sync.WaitGroup @@ -55,30 +59,66 @@ type Supervisor struct { environ []string // for child processes } -func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config) { +func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config, cfgPath string) { super.ctx, super.cancel = context.WithCancel(ctx) super.done = make(chan struct{}) go func() { + defer close(super.done) + sigch := make(chan os.Signal) signal.Notify(sigch, syscall.SIGINT, syscall.SIGTERM) defer signal.Stop(sigch) go func() { for sig := range sigch { super.logger.WithField("signal", sig).Info("caught signal") + if super.err == nil { + super.err = fmt.Errorf("caught signal %s", sig) + } + super.cancel() + } + }() + + hupch := make(chan os.Signal) + signal.Notify(hupch, syscall.SIGHUP) + defer signal.Stop(hupch) + go func() { + for sig := range hupch { + super.logger.WithField("signal", sig).Info("caught signal") + if super.err == nil { + super.err = errNeedConfigReload + } super.cancel() } }() + if cfgPath != "" && cfgPath != "-" && cfg.AutoReloadConfig { + go watchConfig(super.ctx, super.logger, cfgPath, copyConfig(cfg), func() { + if super.err == nil { + super.err = errNeedConfigReload + } + super.cancel() + }) + } + err := super.run(cfg) if err != nil { super.logger.WithError(err).Warn("supervisor shut down") + if super.err == nil { + super.err = err + } } - close(super.done) }() } +func (super *Supervisor) Wait() error { + <-super.done + return super.err +} + func (super *Supervisor) run(cfg *arvados.Config) error { + defer super.cancel() + cwd, err := os.Getwd() if err != nil { return err @@ -126,7 +166,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error { super.setEnv("ARVADOS_CONFIG", super.configfile) super.setEnv("RAILS_ENV", super.ClusterType) super.setEnv("TMPDIR", super.tempdir) - super.prependEnv("PATH", filepath.Join(super.tempdir, "bin")+":") + super.prependEnv("PATH", super.tempdir+"/bin:/var/lib/arvados/bin:") super.cluster, err = cfg.GetCluster("") if err != nil { @@ -182,7 +222,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error { runGoProgram{src: "services/keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}}, runGoProgram{src: "services/keepstore", svc: super.cluster.Services.Keepstore}, runGoProgram{src: "services/keep-web", svc: super.cluster.Services.WebDAV}, - runGoProgram{src: "services/ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{runPostgreSQL{}}}, + runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{runPostgreSQL{}}}, installPassenger{src: "services/api"}, runPassenger{src: "services/api", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, runPostgreSQL{}, installPassenger{src: "services/api"}}}, installPassenger{src: "apps/workbench", depends: []supervisedTask{installPassenger{src: "services/api"}}}, // dependency ensures workbench doesn't delay api startup @@ -360,7 +400,11 @@ func (super *Supervisor) setupRubyEnv() error { "GEM_HOME=", "GEM_PATH=", }) - cmd := exec.Command("gem", "env", "gempath") + gem := "gem" + if _, err := os.Stat("/var/lib/arvados/bin/gem"); err == nil { + gem = "/var/lib/arvados/bin/gem" + } + cmd := exec.Command(gem, "env", "gempath") cmd.Env = super.environ buf, err := cmd.Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:... if err != nil || len(buf) == 0 { @@ -394,9 +438,9 @@ func (super *Supervisor) lookPath(prog string) string { return prog } -// Run prog with args, using dir as working directory. If ctx is -// cancelled while the child is running, RunProgram terminates the -// child, waits for it to exit, then returns. +// RunProgram runs prog with args, using dir as working directory. If ctx is +// cancelled while the child is running, RunProgram terminates the child, waits +// for it to exit, then returns. // // Child's environment will have our env vars, plus any given in env. // @@ -406,7 +450,11 @@ func (super *Supervisor) RunProgram(ctx context.Context, dir string, output io.W cmdline := fmt.Sprintf("%s", append([]string{prog}, args...)) super.logger.WithField("command", cmdline).WithField("dir", dir).Info("executing") - logprefix := strings.TrimPrefix(prog, super.tempdir+"/bin/") + logprefix := prog + if logprefix == "setuidgid" && len(args) >= 3 { + logprefix = args[2] + } + logprefix = strings.TrimPrefix(logprefix, super.tempdir+"/bin/") if logprefix == "bundle" && len(args) > 2 && args[0] == "exec" { logprefix = args[1] } else if logprefix == "arvados-server" && len(args) > 1 { @@ -520,7 +568,7 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { if p == "0" { p = nextPort(h) } - cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p)} + cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p), Path: "/"} } for _, svc := range []*arvados.Service{ &cluster.Services.Controller, @@ -539,20 +587,21 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { continue } if svc.ExternalURL.Host == "" { - if (svc == &cluster.Services.Controller || + if svc == &cluster.Services.Controller || svc == &cluster.Services.GitHTTP || + svc == &cluster.Services.Health || svc == &cluster.Services.Keepproxy || svc == &cluster.Services.WebDAV || svc == &cluster.Services.WebDAVDownload || - svc == &cluster.Services.Workbench1) { - svc.ExternalURL = arvados.URL{Scheme: "https", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))} + svc == &cluster.Services.Workbench1 { + svc.ExternalURL = arvados.URL{Scheme: "https", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"} } else if svc == &cluster.Services.Websocket { - svc.ExternalURL = arvados.URL{Scheme: "wss", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))} + svc.ExternalURL = arvados.URL{Scheme: "wss", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/websocket"} } } if len(svc.InternalURLs) == 0 { svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{ - arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))}: arvados.ServiceInstance{}, + {Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: {}, } } } @@ -562,12 +611,13 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { if cluster.ManagementToken == "" { cluster.ManagementToken = randomHexString(64) } - if cluster.API.RailsSessionSecretToken == "" { - cluster.API.RailsSessionSecretToken = randomHexString(64) - } if cluster.Collections.BlobSigningKey == "" { cluster.Collections.BlobSigningKey = randomHexString(64) } + if cluster.Users.AnonymousUserToken == "" { + cluster.Users.AnonymousUserToken = randomHexString(64) + } + if super.ClusterType != "production" && cluster.Containers.DispatchPrivateKey == "" { buf, err := ioutil.ReadFile(filepath.Join(super.SourcePath, "lib", "dispatchcloud", "test", "sshkey_dispatch")) if err != nil { @@ -580,7 +630,7 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { } if super.ClusterType == "test" { // Add a second keepstore process. - cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))}] = arvados.ServiceInstance{} + cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}] = arvados.ServiceInstance{} // Create a directory-backed volume for each keepstore // process. @@ -698,3 +748,67 @@ func waitForConnect(ctx context.Context, addr string) error { } return ctx.Err() } + +func copyConfig(cfg *arvados.Config) *arvados.Config { + pr, pw := io.Pipe() + go func() { + err := json.NewEncoder(pw).Encode(cfg) + if err != nil { + panic(err) + } + pw.Close() + }() + cfg2 := new(arvados.Config) + err := json.NewDecoder(pr).Decode(cfg2) + if err != nil { + panic(err) + } + return cfg2 +} + +func watchConfig(ctx context.Context, logger logrus.FieldLogger, cfgPath string, prevcfg *arvados.Config, fn func()) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + logger.WithError(err).Error("fsnotify setup failed") + return + } + defer watcher.Close() + + err = watcher.Add(cfgPath) + if err != nil { + logger.WithError(err).Error("fsnotify watcher failed") + return + } + + for { + select { + case <-ctx.Done(): + return + case err, ok := <-watcher.Errors: + if !ok { + return + } + logger.WithError(err).Warn("fsnotify watcher reported error") + case _, ok := <-watcher.Events: + if !ok { + return + } + for len(watcher.Events) > 0 { + <-watcher.Events + } + loader := config.NewLoader(&bytes.Buffer{}, &logrus.Logger{Out: ioutil.Discard}) + loader.Path = cfgPath + loader.SkipAPICalls = true + cfg, err := loader.Load() + if err != nil { + logger.WithError(err).Warn("error reloading config file after change detected; ignoring new config for now") + } else if reflect.DeepEqual(cfg, prevcfg) { + logger.Debug("config file changed but is still DeepEqual to the existing config") + } else { + logger.Debug("config changed, notifying supervisor") + fn() + prevcfg = cfg + } + } + } +}