X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/13cd15c1ca08ebf37fdc600dfd812159b37e8389..00f65f60e69326839447f431146312481db05f01:/lib/boot/supervisor.go diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go index c444ec3001..1fdb8eba31 100644 --- a/lib/boot/supervisor.go +++ b/lib/boot/supervisor.go @@ -14,12 +14,14 @@ import ( "io" "io/ioutil" "net" + "net/url" "os" "os/exec" "os/signal" "os/user" "path/filepath" "reflect" + "strconv" "strings" "sync" "syscall" @@ -40,6 +42,9 @@ type Supervisor struct { ClusterType string // e.g., production ListenHost string // e.g., localhost ControllerAddr string // e.g., 127.0.0.1:8000 + Workbench2Source string // e.g., /home/username/src/arvados-workbench2 + NoWorkbench1 bool + NoWorkbench2 bool OwnTemporaryDatabase bool Stderr io.Writer @@ -54,11 +59,15 @@ type Supervisor struct { tasksReady map[string]chan bool waitShutdown sync.WaitGroup - tempdir string + bindir string + tempdir string // in production mode, this is accessible only to root + wwwtempdir string // in production mode, this is accessible only to www-data configfile string environ []string // for child processes } +func (super *Supervisor) Cluster() *arvados.Cluster { return super.cluster } + func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config, cfgPath string) { super.ctx, super.cancel = context.WithCancel(ctx) super.done = make(chan struct{}) @@ -131,13 +140,26 @@ func (super *Supervisor) run(cfg *arvados.Config) error { return err } - super.tempdir, err = ioutil.TempDir("", "arvados-server-boot-") - if err != nil { - return err - } - defer os.RemoveAll(super.tempdir) - if err := os.Mkdir(filepath.Join(super.tempdir, "bin"), 0755); err != nil { - return err + // Choose bin and temp dirs: /var/lib/arvados/... in + // production, transient tempdir otherwise. + if super.ClusterType == "production" { + // These dirs have already been created by + // "arvados-server install" (or by extracting a + // package). + super.tempdir = "/var/lib/arvados/tmp" + super.wwwtempdir = "/var/lib/arvados/wwwtmp" + super.bindir = "/var/lib/arvados/bin" + } else { + super.tempdir, err = ioutil.TempDir("", "arvados-server-boot-") + if err != nil { + return err + } + defer os.RemoveAll(super.tempdir) + super.wwwtempdir = super.tempdir + super.bindir = filepath.Join(super.tempdir, "bin") + if err := os.Mkdir(super.bindir, 0755); err != nil { + return err + } } // Fill in any missing config keys, and write the resulting @@ -146,7 +168,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error { if err != nil { return err } - conffile, err := os.OpenFile(filepath.Join(super.tempdir, "config.yml"), os.O_CREATE|os.O_WRONLY, 0644) + conffile, err := os.OpenFile(filepath.Join(super.wwwtempdir, "config.yml"), os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } @@ -166,7 +188,10 @@ func (super *Supervisor) run(cfg *arvados.Config) error { super.setEnv("ARVADOS_CONFIG", super.configfile) super.setEnv("RAILS_ENV", super.ClusterType) super.setEnv("TMPDIR", super.tempdir) - super.prependEnv("PATH", super.tempdir+"/bin:/var/lib/arvados/bin:") + super.prependEnv("PATH", "/var/lib/arvados/bin:") + if super.ClusterType != "production" { + super.prependEnv("PATH", super.tempdir+"/bin:") + } super.cluster, err = cfg.GetCluster("") if err != nil { @@ -182,16 +207,18 @@ func (super *Supervisor) run(cfg *arvados.Config) error { "PID": os.Getpid(), }) - if super.SourceVersion == "" { + if super.SourceVersion == "" && super.ClusterType == "production" { + // don't need SourceVersion + } else if super.SourceVersion == "" { // Find current source tree version. var buf bytes.Buffer - err = super.RunProgram(super.ctx, ".", &buf, nil, "git", "diff", "--shortstat") + err = super.RunProgram(super.ctx, ".", runOptions{output: &buf}, "git", "diff", "--shortstat") if err != nil { return err } dirty := buf.Len() > 0 buf.Reset() - err = super.RunProgram(super.ctx, ".", &buf, nil, "git", "log", "-n1", "--format=%H") + err = super.RunProgram(super.ctx, ".", runOptions{output: &buf}, "git", "log", "-n1", "--format=%H") if err != nil { return err } @@ -216,23 +243,32 @@ func (super *Supervisor) run(cfg *arvados.Config) error { createCertificates{}, runPostgreSQL{}, runNginx{}, - runServiceCommand{name: "controller", svc: super.cluster.Services.Controller, depends: []supervisedTask{runPostgreSQL{}}}, - runGoProgram{src: "services/arv-git-httpd", svc: super.cluster.Services.GitHTTP}, + runServiceCommand{name: "controller", svc: super.cluster.Services.Controller, depends: []supervisedTask{seedDatabase{}}}, + runServiceCommand{name: "git-httpd", svc: super.cluster.Services.GitHTTP}, runGoProgram{src: "services/health", svc: super.cluster.Services.Health}, - runGoProgram{src: "services/keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}}, - runGoProgram{src: "services/keepstore", svc: super.cluster.Services.Keepstore}, + runServiceCommand{name: "keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}}, + runServiceCommand{name: "keepstore", svc: super.cluster.Services.Keepstore}, runGoProgram{src: "services/keep-web", svc: super.cluster.Services.WebDAV}, - runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{runPostgreSQL{}}}, + runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{seedDatabase{}}}, installPassenger{src: "services/api"}, - runPassenger{src: "services/api", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, runPostgreSQL{}, installPassenger{src: "services/api"}}}, - installPassenger{src: "apps/workbench", depends: []supervisedTask{installPassenger{src: "services/api"}}}, // dependency ensures workbench doesn't delay api startup - runPassenger{src: "apps/workbench", svc: super.cluster.Services.Workbench1, depends: []supervisedTask{installPassenger{src: "apps/workbench"}}}, + runPassenger{src: "services/api", varlibdir: "railsapi", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, seedDatabase{}, installPassenger{src: "services/api"}}}, seedDatabase{}, } + if !super.NoWorkbench1 { + tasks = append(tasks, + installPassenger{src: "apps/workbench", depends: []supervisedTask{seedDatabase{}}}, // dependency ensures workbench doesn't delay api install/startup + runPassenger{src: "apps/workbench", varlibdir: "workbench1", svc: super.cluster.Services.Workbench1, depends: []supervisedTask{installPassenger{src: "apps/workbench"}}}, + ) + } + if !super.NoWorkbench2 { + tasks = append(tasks, + runWorkbench2{svc: super.cluster.Services.Workbench2}, + ) + } if super.ClusterType != "test" { tasks = append(tasks, - runServiceCommand{name: "dispatch-cloud", svc: super.cluster.Services.Controller}, - runGoProgram{src: "services/keep-balance"}, + runServiceCommand{name: "dispatch-cloud", svc: super.cluster.Services.DispatchCloud}, + runGoProgram{src: "services/keep-balance", svc: super.cluster.Services.Keepbalance}, ) } super.tasksReady = map[string]chan bool{} @@ -382,9 +418,11 @@ func dedupEnv(in []string) []string { func (super *Supervisor) installGoProgram(ctx context.Context, srcpath string) (string, error) { _, basename := filepath.Split(srcpath) - bindir := filepath.Join(super.tempdir, "bin") - binfile := filepath.Join(bindir, basename) - err := super.RunProgram(ctx, filepath.Join(super.SourcePath, srcpath), nil, []string{"GOBIN=" + bindir}, "go", "install", "-ldflags", "-X git.arvados.org/arvados.git/lib/cmd.version="+super.SourceVersion+" -X main.version="+super.SourceVersion) + binfile := filepath.Join(super.bindir, basename) + if super.ClusterType == "production" { + return binfile, nil + } + err := super.RunProgram(ctx, filepath.Join(super.SourcePath, srcpath), runOptions{env: []string{"GOBIN=" + super.bindir}}, "go", "install", "-ldflags", "-X git.arvados.org/arvados.git/lib/cmd.version="+super.SourceVersion+" -X main.version="+super.SourceVersion) return binfile, err } @@ -401,14 +439,23 @@ func (super *Supervisor) setupRubyEnv() error { "GEM_PATH=", }) gem := "gem" - if _, err := os.Stat("/var/lib/arvados/bin/gem"); err == nil { + if _, err := os.Stat("/var/lib/arvados/bin/gem"); err == nil || super.ClusterType == "production" { gem = "/var/lib/arvados/bin/gem" } cmd := exec.Command(gem, "env", "gempath") + if super.ClusterType == "production" { + cmd.Args = append([]string{"sudo", "-u", "www-data", "-E", "HOME=/var/www"}, cmd.Args...) + path, err := exec.LookPath("sudo") + if err != nil { + return fmt.Errorf("LookPath(\"sudo\"): %w", err) + } + cmd.Path = path + } + cmd.Stderr = super.Stderr cmd.Env = super.environ buf, err := cmd.Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:... if err != nil || len(buf) == 0 { - return fmt.Errorf("gem env gempath: %v", err) + return fmt.Errorf("gem env gempath: %w", err) } gempath := string(bytes.Split(buf, []byte{':'})[0]) super.prependEnv("PATH", gempath+"/bin:") @@ -438,33 +485,55 @@ func (super *Supervisor) lookPath(prog string) string { return prog } -// Run prog with args, using dir as working directory. If ctx is -// cancelled while the child is running, RunProgram terminates the -// child, waits for it to exit, then returns. +type runOptions struct { + output io.Writer // attach stdout + env []string // add/replace environment variables + user string // run as specified user + stdin io.Reader +} + +// RunProgram runs prog with args, using dir as working directory. If ctx is +// cancelled while the child is running, RunProgram terminates the child, waits +// for it to exit, then returns. // // Child's environment will have our env vars, plus any given in env. // // Child's stdout will be written to output if non-nil, otherwise the // boot command's stderr. -func (super *Supervisor) RunProgram(ctx context.Context, dir string, output io.Writer, env []string, prog string, args ...string) error { +func (super *Supervisor) RunProgram(ctx context.Context, dir string, opts runOptions, prog string, args ...string) error { cmdline := fmt.Sprintf("%s", append([]string{prog}, args...)) super.logger.WithField("command", cmdline).WithField("dir", dir).Info("executing") logprefix := prog - if logprefix == "setuidgid" && len(args) >= 3 { - logprefix = args[2] - } - logprefix = strings.TrimPrefix(logprefix, super.tempdir+"/bin/") - if logprefix == "bundle" && len(args) > 2 && args[0] == "exec" { - logprefix = args[1] - } else if logprefix == "arvados-server" && len(args) > 1 { - logprefix = args[0] - } - if !strings.HasPrefix(dir, "/") { - logprefix = dir + ": " + logprefix + { + innerargs := args + if logprefix == "sudo" { + for i := 0; i < len(args); i++ { + if args[i] == "-u" { + i++ + } else if args[i] == "-E" || strings.Contains(args[i], "=") { + } else { + logprefix = args[i] + innerargs = args[i+1:] + break + } + } + } + logprefix = strings.TrimPrefix(logprefix, "/var/lib/arvados/bin/") + logprefix = strings.TrimPrefix(logprefix, super.tempdir+"/bin/") + if logprefix == "bundle" && len(innerargs) > 2 && innerargs[0] == "exec" { + _, dirbase := filepath.Split(dir) + logprefix = innerargs[1] + "@" + dirbase + } else if logprefix == "arvados-server" && len(args) > 1 { + logprefix = args[0] + } + if !strings.HasPrefix(dir, "/") { + logprefix = dir + ": " + logprefix + } } cmd := exec.Command(super.lookPath(prog), args...) + cmd.Stdin = opts.stdin stdout, err := cmd.StdoutPipe() if err != nil { return err @@ -482,10 +551,10 @@ func (super *Supervisor) RunProgram(ctx context.Context, dir string, output io.W }() copiers.Add(1) go func() { - if output == nil { + if opts.output == nil { io.Copy(logwriter, stdout) } else { - io.Copy(output, stdout) + io.Copy(opts.output, stdout) } copiers.Done() }() @@ -495,10 +564,34 @@ func (super *Supervisor) RunProgram(ctx context.Context, dir string, output io.W } else { cmd.Dir = filepath.Join(super.SourcePath, dir) } - env = append([]string(nil), env...) + env := append([]string(nil), opts.env...) env = append(env, super.environ...) cmd.Env = dedupEnv(env) + if opts.user != "" { + // Note: We use this approach instead of "sudo" + // because in certain circumstances (we are pid 1 in a + // docker container, and our passenger child process + // changes to pgid 1) the intermediate sudo process + // notices we have the same pgid as our child and + // refuses to propagate signals from us to our child, + // so we can't signal/shutdown our passenger/rails + // apps. "chpst" or "setuidgid" would work, but these + // few lines avoid depending on runit/daemontools. + u, err := user.Lookup(opts.user) + if err != nil { + return fmt.Errorf("user.Lookup(%q): %w", opts.user, err) + } + uid, _ := strconv.Atoi(u.Uid) + gid, _ := strconv.Atoi(u.Gid) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: uint32(uid), + Gid: uint32(gid), + }, + } + } + exited := false defer func() { exited = true }() go func() { @@ -544,32 +637,40 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { return err } usedPort := map[string]bool{} - nextPort := func(host string) string { + nextPort := func(host string) (string, error) { for { port, err := availablePort(host) if err != nil { - panic(err) + port, err = availablePort(super.ListenHost) + } + if err != nil { + return "", err } if usedPort[port] { continue } usedPort[port] = true - return port + return port, nil } } if cluster.Services.Controller.ExternalURL.Host == "" { h, p, err := net.SplitHostPort(super.ControllerAddr) if err != nil { - return err + return fmt.Errorf("SplitHostPort(ControllerAddr): %w", err) } if h == "" { h = super.ListenHost } if p == "0" { - p = nextPort(h) + p, err = nextPort(h) + if err != nil { + return err + } } - cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p)} + cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p), Path: "/"} } + u := url.URL(cluster.Services.Controller.ExternalURL) + defaultExtHost := u.Hostname() for _, svc := range []*arvados.Service{ &cluster.Services.Controller, &cluster.Services.DispatchCloud, @@ -582,54 +683,80 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { &cluster.Services.WebDAVDownload, &cluster.Services.Websocket, &cluster.Services.Workbench1, + &cluster.Services.Workbench2, } { if svc == &cluster.Services.DispatchCloud && super.ClusterType == "test" { continue } if svc.ExternalURL.Host == "" { + port, err := nextPort(defaultExtHost) + if err != nil { + return err + } + host := net.JoinHostPort(defaultExtHost, port) if svc == &cluster.Services.Controller || svc == &cluster.Services.GitHTTP || svc == &cluster.Services.Health || svc == &cluster.Services.Keepproxy || svc == &cluster.Services.WebDAV || svc == &cluster.Services.WebDAVDownload || - svc == &cluster.Services.Workbench1 { - svc.ExternalURL = arvados.URL{Scheme: "https", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))} + svc == &cluster.Services.Workbench1 || + svc == &cluster.Services.Workbench2 { + svc.ExternalURL = arvados.URL{Scheme: "https", Host: host, Path: "/"} } else if svc == &cluster.Services.Websocket { - svc.ExternalURL = arvados.URL{Scheme: "wss", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))} + svc.ExternalURL = arvados.URL{Scheme: "wss", Host: host, Path: "/websocket"} } } + if super.NoWorkbench1 && svc == &cluster.Services.Workbench1 || + super.NoWorkbench2 && svc == &cluster.Services.Workbench2 { + // When workbench1 is disabled, it gets an + // ExternalURL (so we have a valid listening + // port to write in our Nginx config) but no + // InternalURLs (so health checker doesn't + // complain). + continue + } if len(svc.InternalURLs) == 0 { + port, err := nextPort(super.ListenHost) + if err != nil { + return err + } + host := net.JoinHostPort(super.ListenHost, port) svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{ - arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))}: arvados.ServiceInstance{}, + {Scheme: "http", Host: host, Path: "/"}: {}, } } } - if cluster.SystemRootToken == "" { - cluster.SystemRootToken = randomHexString(64) - } - if cluster.ManagementToken == "" { - cluster.ManagementToken = randomHexString(64) - } - if cluster.API.RailsSessionSecretToken == "" { - cluster.API.RailsSessionSecretToken = randomHexString(64) - } - if cluster.Collections.BlobSigningKey == "" { - cluster.Collections.BlobSigningKey = randomHexString(64) - } - if super.ClusterType != "production" && cluster.Containers.DispatchPrivateKey == "" { - buf, err := ioutil.ReadFile(filepath.Join(super.SourcePath, "lib", "dispatchcloud", "test", "sshkey_dispatch")) - if err != nil { - return err - } - cluster.Containers.DispatchPrivateKey = string(buf) - } if super.ClusterType != "production" { + if cluster.SystemRootToken == "" { + cluster.SystemRootToken = randomHexString(64) + } + if cluster.ManagementToken == "" { + cluster.ManagementToken = randomHexString(64) + } + if cluster.Collections.BlobSigningKey == "" { + cluster.Collections.BlobSigningKey = randomHexString(64) + } + if cluster.Users.AnonymousUserToken == "" { + cluster.Users.AnonymousUserToken = randomHexString(64) + } + if cluster.Containers.DispatchPrivateKey == "" { + buf, err := ioutil.ReadFile(filepath.Join(super.SourcePath, "lib", "dispatchcloud", "test", "sshkey_dispatch")) + if err != nil { + return err + } + cluster.Containers.DispatchPrivateKey = string(buf) + } cluster.TLS.Insecure = true } if super.ClusterType == "test" { // Add a second keepstore process. - cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost))}] = arvados.ServiceInstance{} + port, err := nextPort(super.ListenHost) + if err != nil { + return err + } + host := net.JoinHostPort(super.ListenHost, port) + cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: host, Path: "/"}] = arvados.ServiceInstance{} // Create a directory-backed volume for each keepstore // process. @@ -649,14 +776,28 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { AccessViaHosts: map[arvados.URL]arvados.VolumeAccess{ url: {}, }, + StorageClasses: map[string]bool{ + "default": true, + "foo": true, + "bar": true, + }, } } + cluster.StorageClasses = map[string]arvados.StorageClassConfig{ + "default": {Default: true}, + "foo": {}, + "bar": {}, + } } if super.OwnTemporaryDatabase { + port, err := nextPort("localhost") + if err != nil { + return err + } cluster.PostgreSQL.Connection = arvados.PostgreSQLConnection{ "client_encoding": "utf8", "host": "localhost", - "port": nextPort(super.ListenHost), + "port": port, "dbname": "arvados_test", "user": "arvados", "password": "insecure_arvados_test", @@ -689,30 +830,30 @@ func randomHexString(chars int) string { return fmt.Sprintf("%x", b) } -func internalPort(svc arvados.Service) (string, error) { +func internalPort(svc arvados.Service) (host, port string, err error) { if len(svc.InternalURLs) > 1 { - return "", errors.New("internalPort() doesn't work with multiple InternalURLs") + return "", "", errors.New("internalPort() doesn't work with multiple InternalURLs") } for u := range svc.InternalURLs { - if _, p, err := net.SplitHostPort(u.Host); err != nil { - return "", err - } else if p != "" { - return p, nil - } else if u.Scheme == "https" { - return "443", nil - } else { - return "80", nil + u := url.URL(u) + host, port = u.Hostname(), u.Port() + switch { + case port != "": + case u.Scheme == "https", u.Scheme == "ws": + port = "443" + default: + port = "80" } + return } - return "", fmt.Errorf("service has no InternalURLs") + return "", "", fmt.Errorf("service has no InternalURLs") } func externalPort(svc arvados.Service) (string, error) { - if _, p, err := net.SplitHostPort(svc.ExternalURL.Host); err != nil { - return "", err - } else if p != "" { + u := url.URL(svc.ExternalURL) + if p := u.Port(); p != "" { return p, nil - } else if svc.ExternalURL.Scheme == "https" { + } else if u.Scheme == "https" || u.Scheme == "wss" { return "443", nil } else { return "80", nil