18700: Add workbench2 to arvados-boot.
[arvados.git] / lib / boot / supervisor.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package boot
6
7 import (
8         "bytes"
9         "context"
10         "crypto/rand"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/ioutil"
16         "net"
17         "net/url"
18         "os"
19         "os/exec"
20         "os/signal"
21         "os/user"
22         "path/filepath"
23         "reflect"
24         "strconv"
25         "strings"
26         "sync"
27         "syscall"
28         "time"
29
30         "git.arvados.org/arvados.git/lib/config"
31         "git.arvados.org/arvados.git/lib/service"
32         "git.arvados.org/arvados.git/sdk/go/arvados"
33         "git.arvados.org/arvados.git/sdk/go/ctxlog"
34         "git.arvados.org/arvados.git/sdk/go/health"
35         "github.com/fsnotify/fsnotify"
36         "github.com/sirupsen/logrus"
37 )
38
39 type Supervisor struct {
40         SourcePath           string // e.g., /home/username/src/arvados
41         SourceVersion        string // e.g., acbd1324...
42         ClusterType          string // e.g., production
43         ListenHost           string // e.g., localhost
44         ControllerAddr       string // e.g., 127.0.0.1:8000
45         Workbench2Source     string // e.g., /home/username/src/arvados-workbench2
46         NoWorkbench1         bool
47         OwnTemporaryDatabase bool
48         Stderr               io.Writer
49
50         logger  logrus.FieldLogger
51         cluster *arvados.Cluster
52
53         ctx           context.Context
54         cancel        context.CancelFunc
55         done          chan struct{} // closed when child procs/services have shut down
56         err           error         // error that caused shutdown (valid when done is closed)
57         healthChecker *health.Aggregator
58         tasksReady    map[string]chan bool
59         waitShutdown  sync.WaitGroup
60
61         bindir     string
62         tempdir    string
63         wwwtempdir string
64         configfile string
65         environ    []string // for child processes
66 }
67
68 func (super *Supervisor) Cluster() *arvados.Cluster { return super.cluster }
69
70 func (super *Supervisor) Start(ctx context.Context, cfg *arvados.Config, cfgPath string) {
71         super.ctx, super.cancel = context.WithCancel(ctx)
72         super.done = make(chan struct{})
73
74         go func() {
75                 defer close(super.done)
76
77                 sigch := make(chan os.Signal)
78                 signal.Notify(sigch, syscall.SIGINT, syscall.SIGTERM)
79                 defer signal.Stop(sigch)
80                 go func() {
81                         for sig := range sigch {
82                                 super.logger.WithField("signal", sig).Info("caught signal")
83                                 if super.err == nil {
84                                         super.err = fmt.Errorf("caught signal %s", sig)
85                                 }
86                                 super.cancel()
87                         }
88                 }()
89
90                 hupch := make(chan os.Signal)
91                 signal.Notify(hupch, syscall.SIGHUP)
92                 defer signal.Stop(hupch)
93                 go func() {
94                         for sig := range hupch {
95                                 super.logger.WithField("signal", sig).Info("caught signal")
96                                 if super.err == nil {
97                                         super.err = errNeedConfigReload
98                                 }
99                                 super.cancel()
100                         }
101                 }()
102
103                 if cfgPath != "" && cfgPath != "-" && cfg.AutoReloadConfig {
104                         go watchConfig(super.ctx, super.logger, cfgPath, copyConfig(cfg), func() {
105                                 if super.err == nil {
106                                         super.err = errNeedConfigReload
107                                 }
108                                 super.cancel()
109                         })
110                 }
111
112                 err := super.run(cfg)
113                 if err != nil {
114                         super.logger.WithError(err).Warn("supervisor shut down")
115                         if super.err == nil {
116                                 super.err = err
117                         }
118                 }
119         }()
120 }
121
122 func (super *Supervisor) Wait() error {
123         <-super.done
124         return super.err
125 }
126
127 func (super *Supervisor) run(cfg *arvados.Config) error {
128         defer super.cancel()
129
130         cwd, err := os.Getwd()
131         if err != nil {
132                 return err
133         }
134         if !strings.HasPrefix(super.SourcePath, "/") {
135                 super.SourcePath = filepath.Join(cwd, super.SourcePath)
136         }
137         super.SourcePath, err = filepath.EvalSymlinks(super.SourcePath)
138         if err != nil {
139                 return err
140         }
141
142         // Choose bin and temp dirs: /var/lib/arvados/... in
143         // production, transient tempdir otherwise.
144         if super.ClusterType == "production" {
145                 // These dirs have already been created by
146                 // "arvados-server install" (or by extracting a
147                 // package).
148                 super.tempdir = "/var/lib/arvados/tmp"
149                 super.wwwtempdir = "/var/lib/arvados/wwwtmp"
150                 super.bindir = "/var/lib/arvados/bin"
151         } else {
152                 super.tempdir, err = ioutil.TempDir("", "arvados-server-boot-")
153                 if err != nil {
154                         return err
155                 }
156                 defer os.RemoveAll(super.tempdir)
157                 super.wwwtempdir = super.tempdir
158                 super.bindir = filepath.Join(super.tempdir, "bin")
159                 if err := os.Mkdir(super.bindir, 0755); err != nil {
160                         return err
161                 }
162         }
163
164         // Fill in any missing config keys, and write the resulting
165         // config in the temp dir for child services to use.
166         err = super.autofillConfig(cfg)
167         if err != nil {
168                 return err
169         }
170         conffile, err := os.OpenFile(filepath.Join(super.wwwtempdir, "config.yml"), os.O_CREATE|os.O_WRONLY, 0644)
171         if err != nil {
172                 return err
173         }
174         defer conffile.Close()
175         err = json.NewEncoder(conffile).Encode(cfg)
176         if err != nil {
177                 return err
178         }
179         err = conffile.Close()
180         if err != nil {
181                 return err
182         }
183         super.configfile = conffile.Name()
184
185         super.environ = os.Environ()
186         super.cleanEnv([]string{"ARVADOS_"})
187         super.setEnv("ARVADOS_CONFIG", super.configfile)
188         super.setEnv("RAILS_ENV", super.ClusterType)
189         super.setEnv("TMPDIR", super.tempdir)
190         super.prependEnv("PATH", "/var/lib/arvados/bin:")
191         if super.ClusterType != "production" {
192                 super.prependEnv("PATH", super.tempdir+"/bin:")
193         }
194
195         super.cluster, err = cfg.GetCluster("")
196         if err != nil {
197                 return err
198         }
199         // Now that we have the config, replace the bootstrap logger
200         // with a new one according to the logging config.
201         loglevel := super.cluster.SystemLogs.LogLevel
202         if s := os.Getenv("ARVADOS_DEBUG"); s != "" && s != "0" {
203                 loglevel = "debug"
204         }
205         super.logger = ctxlog.New(super.Stderr, super.cluster.SystemLogs.Format, loglevel).WithFields(logrus.Fields{
206                 "PID": os.Getpid(),
207         })
208
209         if super.SourceVersion == "" && super.ClusterType == "production" {
210                 // don't need SourceVersion
211         } else if super.SourceVersion == "" {
212                 // Find current source tree version.
213                 var buf bytes.Buffer
214                 err = super.RunProgram(super.ctx, ".", runOptions{output: &buf}, "git", "diff", "--shortstat")
215                 if err != nil {
216                         return err
217                 }
218                 dirty := buf.Len() > 0
219                 buf.Reset()
220                 err = super.RunProgram(super.ctx, ".", runOptions{output: &buf}, "git", "log", "-n1", "--format=%H")
221                 if err != nil {
222                         return err
223                 }
224                 super.SourceVersion = strings.TrimSpace(buf.String())
225                 if dirty {
226                         super.SourceVersion += "+uncommitted"
227                 }
228         } else {
229                 return errors.New("specifying a version to run is not yet supported")
230         }
231
232         _, err = super.installGoProgram(super.ctx, "cmd/arvados-server")
233         if err != nil {
234                 return err
235         }
236         err = super.setupRubyEnv()
237         if err != nil {
238                 return err
239         }
240
241         tasks := []supervisedTask{
242                 createCertificates{},
243                 runPostgreSQL{},
244                 runNginx{},
245                 runServiceCommand{name: "controller", svc: super.cluster.Services.Controller, depends: []supervisedTask{seedDatabase{}}},
246                 runGoProgram{src: "services/arv-git-httpd", svc: super.cluster.Services.GitHTTP},
247                 runGoProgram{src: "services/health", svc: super.cluster.Services.Health},
248                 runGoProgram{src: "services/keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}},
249                 runServiceCommand{name: "keepstore", svc: super.cluster.Services.Keepstore},
250                 runGoProgram{src: "services/keep-web", svc: super.cluster.Services.WebDAV},
251                 runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{seedDatabase{}}},
252                 installPassenger{src: "services/api"},
253                 runPassenger{src: "services/api", varlibdir: "railsapi", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, seedDatabase{}, installPassenger{src: "services/api"}}},
254                 runWorkbench2{svc: super.cluster.Services.Workbench2},
255                 seedDatabase{},
256         }
257         if !super.NoWorkbench1 {
258                 tasks = append(tasks,
259                         installPassenger{src: "apps/workbench", depends: []supervisedTask{seedDatabase{}}}, // dependency ensures workbench doesn't delay api install/startup
260                         runPassenger{src: "apps/workbench", varlibdir: "workbench1", svc: super.cluster.Services.Workbench1, depends: []supervisedTask{installPassenger{src: "apps/workbench"}}},
261                 )
262         }
263         if super.ClusterType != "test" {
264                 tasks = append(tasks,
265                         runServiceCommand{name: "dispatch-cloud", svc: super.cluster.Services.DispatchCloud},
266                         runGoProgram{src: "services/keep-balance", svc: super.cluster.Services.Keepbalance},
267                 )
268         }
269         super.tasksReady = map[string]chan bool{}
270         for _, task := range tasks {
271                 super.tasksReady[task.String()] = make(chan bool)
272         }
273         for _, task := range tasks {
274                 task := task
275                 fail := func(err error) {
276                         if super.ctx.Err() != nil {
277                                 return
278                         }
279                         super.cancel()
280                         super.logger.WithField("task", task.String()).WithError(err).Error("task failed")
281                 }
282                 go func() {
283                         super.logger.WithField("task", task.String()).Info("starting")
284                         err := task.Run(super.ctx, fail, super)
285                         if err != nil {
286                                 fail(err)
287                                 return
288                         }
289                         close(super.tasksReady[task.String()])
290                 }()
291         }
292         err = super.wait(super.ctx, tasks...)
293         if err != nil {
294                 return err
295         }
296         super.logger.Info("all startup tasks are complete; starting health checks")
297         super.healthChecker = &health.Aggregator{Cluster: super.cluster}
298         <-super.ctx.Done()
299         super.logger.Info("shutting down")
300         super.waitShutdown.Wait()
301         return super.ctx.Err()
302 }
303
304 func (super *Supervisor) wait(ctx context.Context, tasks ...supervisedTask) error {
305         for _, task := range tasks {
306                 ch, ok := super.tasksReady[task.String()]
307                 if !ok {
308                         return fmt.Errorf("no such task: %s", task)
309                 }
310                 super.logger.WithField("task", task.String()).Info("waiting")
311                 select {
312                 case <-ch:
313                         super.logger.WithField("task", task.String()).Info("ready")
314                 case <-ctx.Done():
315                         super.logger.WithField("task", task.String()).Info("task was never ready")
316                         return ctx.Err()
317                 }
318         }
319         return nil
320 }
321
322 func (super *Supervisor) Stop() {
323         super.cancel()
324         <-super.done
325 }
326
327 func (super *Supervisor) WaitReady() (*arvados.URL, bool) {
328         ticker := time.NewTicker(time.Second)
329         defer ticker.Stop()
330         for waiting := "all"; waiting != ""; {
331                 select {
332                 case <-ticker.C:
333                 case <-super.ctx.Done():
334                         return nil, false
335                 }
336                 if super.healthChecker == nil {
337                         // not set up yet
338                         continue
339                 }
340                 resp := super.healthChecker.ClusterHealth()
341                 // The overall health check (resp.Health=="OK") might
342                 // never pass due to missing components (like
343                 // arvados-dispatch-cloud in a test cluster), so
344                 // instead we wait for all configured components to
345                 // pass.
346                 waiting = ""
347                 for target, check := range resp.Checks {
348                         if check.Health != "OK" {
349                                 waiting += " " + target
350                         }
351                 }
352                 if waiting != "" {
353                         super.logger.WithField("targets", waiting[1:]).Info("waiting")
354                 }
355         }
356         u := super.cluster.Services.Controller.ExternalURL
357         return &u, true
358 }
359
360 func (super *Supervisor) prependEnv(key, prepend string) {
361         for i, s := range super.environ {
362                 if strings.HasPrefix(s, key+"=") {
363                         super.environ[i] = key + "=" + prepend + s[len(key)+1:]
364                         return
365                 }
366         }
367         super.environ = append(super.environ, key+"="+prepend)
368 }
369
370 func (super *Supervisor) cleanEnv(prefixes []string) {
371         var cleaned []string
372         for _, s := range super.environ {
373                 drop := false
374                 for _, p := range prefixes {
375                         if strings.HasPrefix(s, p) {
376                                 drop = true
377                                 break
378                         }
379                 }
380                 if !drop {
381                         cleaned = append(cleaned, s)
382                 }
383         }
384         super.environ = cleaned
385 }
386
387 func (super *Supervisor) setEnv(key, val string) {
388         for i, s := range super.environ {
389                 if strings.HasPrefix(s, key+"=") {
390                         super.environ[i] = key + "=" + val
391                         return
392                 }
393         }
394         super.environ = append(super.environ, key+"="+val)
395 }
396
397 // Remove all but the first occurrence of each env var.
398 func dedupEnv(in []string) []string {
399         saw := map[string]bool{}
400         var out []string
401         for _, kv := range in {
402                 if split := strings.Index(kv, "="); split < 1 {
403                         panic("invalid environment var: " + kv)
404                 } else if saw[kv[:split]] {
405                         continue
406                 } else {
407                         saw[kv[:split]] = true
408                         out = append(out, kv)
409                 }
410         }
411         return out
412 }
413
414 func (super *Supervisor) installGoProgram(ctx context.Context, srcpath string) (string, error) {
415         _, basename := filepath.Split(srcpath)
416         binfile := filepath.Join(super.bindir, basename)
417         if super.ClusterType == "production" {
418                 return binfile, nil
419         }
420         err := super.RunProgram(ctx, filepath.Join(super.SourcePath, srcpath), runOptions{env: []string{"GOBIN=" + super.bindir}}, "go", "install", "-ldflags", "-X git.arvados.org/arvados.git/lib/cmd.version="+super.SourceVersion+" -X main.version="+super.SourceVersion)
421         return binfile, err
422 }
423
424 func (super *Supervisor) usingRVM() bool {
425         return os.Getenv("rvm_path") != ""
426 }
427
428 func (super *Supervisor) setupRubyEnv() error {
429         if !super.usingRVM() {
430                 // (If rvm is in use, assume the caller has everything
431                 // set up as desired)
432                 super.cleanEnv([]string{
433                         "GEM_HOME=",
434                         "GEM_PATH=",
435                 })
436                 gem := "gem"
437                 if _, err := os.Stat("/var/lib/arvados/bin/gem"); err == nil || super.ClusterType == "production" {
438                         gem = "/var/lib/arvados/bin/gem"
439                 }
440                 cmd := exec.Command(gem, "env", "gempath")
441                 if super.ClusterType == "production" {
442                         cmd.Args = append([]string{"sudo", "-u", "www-data", "-E", "HOME=/var/www"}, cmd.Args...)
443                         path, err := exec.LookPath("sudo")
444                         if err != nil {
445                                 return fmt.Errorf("LookPath(\"sudo\"): %w", err)
446                         }
447                         cmd.Path = path
448                 }
449                 cmd.Stderr = super.Stderr
450                 cmd.Env = super.environ
451                 buf, err := cmd.Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:...
452                 if err != nil || len(buf) == 0 {
453                         return fmt.Errorf("gem env gempath: %w", err)
454                 }
455                 gempath := string(bytes.Split(buf, []byte{':'})[0])
456                 super.prependEnv("PATH", gempath+"/bin:")
457                 super.setEnv("GEM_HOME", gempath)
458                 super.setEnv("GEM_PATH", gempath)
459         }
460         // Passenger install doesn't work unless $HOME is ~user
461         u, err := user.Current()
462         if err != nil {
463                 return err
464         }
465         super.setEnv("HOME", u.HomeDir)
466         return nil
467 }
468
469 func (super *Supervisor) lookPath(prog string) string {
470         for _, val := range super.environ {
471                 if strings.HasPrefix(val, "PATH=") {
472                         for _, dir := range filepath.SplitList(val[5:]) {
473                                 path := filepath.Join(dir, prog)
474                                 if fi, err := os.Stat(path); err == nil && fi.Mode()&0111 != 0 {
475                                         return path
476                                 }
477                         }
478                 }
479         }
480         return prog
481 }
482
483 type runOptions struct {
484         output io.Writer // attach stdout
485         env    []string  // add/replace environment variables
486         user   string    // run as specified user
487         stdin  io.Reader
488 }
489
490 // RunProgram runs prog with args, using dir as working directory. If ctx is
491 // cancelled while the child is running, RunProgram terminates the child, waits
492 // for it to exit, then returns.
493 //
494 // Child's environment will have our env vars, plus any given in env.
495 //
496 // Child's stdout will be written to output if non-nil, otherwise the
497 // boot command's stderr.
498 func (super *Supervisor) RunProgram(ctx context.Context, dir string, opts runOptions, prog string, args ...string) error {
499         cmdline := fmt.Sprintf("%s", append([]string{prog}, args...))
500         super.logger.WithField("command", cmdline).WithField("dir", dir).Info("executing")
501
502         logprefix := prog
503         {
504                 innerargs := args
505                 if logprefix == "sudo" {
506                         for i := 0; i < len(args); i++ {
507                                 if args[i] == "-u" {
508                                         i++
509                                 } else if args[i] == "-E" || strings.Contains(args[i], "=") {
510                                 } else {
511                                         logprefix = args[i]
512                                         innerargs = args[i+1:]
513                                         break
514                                 }
515                         }
516                 }
517                 logprefix = strings.TrimPrefix(logprefix, "/var/lib/arvados/bin/")
518                 logprefix = strings.TrimPrefix(logprefix, super.tempdir+"/bin/")
519                 if logprefix == "bundle" && len(innerargs) > 2 && innerargs[0] == "exec" {
520                         _, dirbase := filepath.Split(dir)
521                         logprefix = innerargs[1] + "@" + dirbase
522                 } else if logprefix == "arvados-server" && len(args) > 1 {
523                         logprefix = args[0]
524                 }
525                 if !strings.HasPrefix(dir, "/") {
526                         logprefix = dir + ": " + logprefix
527                 }
528         }
529
530         cmd := exec.Command(super.lookPath(prog), args...)
531         cmd.Stdin = opts.stdin
532         stdout, err := cmd.StdoutPipe()
533         if err != nil {
534                 return err
535         }
536         stderr, err := cmd.StderrPipe()
537         if err != nil {
538                 return err
539         }
540         logwriter := &service.LogPrefixer{Writer: super.Stderr, Prefix: []byte("[" + logprefix + "] ")}
541         var copiers sync.WaitGroup
542         copiers.Add(1)
543         go func() {
544                 io.Copy(logwriter, stderr)
545                 copiers.Done()
546         }()
547         copiers.Add(1)
548         go func() {
549                 if opts.output == nil {
550                         io.Copy(logwriter, stdout)
551                 } else {
552                         io.Copy(opts.output, stdout)
553                 }
554                 copiers.Done()
555         }()
556
557         if strings.HasPrefix(dir, "/") {
558                 cmd.Dir = dir
559         } else {
560                 cmd.Dir = filepath.Join(super.SourcePath, dir)
561         }
562         env := append([]string(nil), opts.env...)
563         env = append(env, super.environ...)
564         cmd.Env = dedupEnv(env)
565
566         if opts.user != "" {
567                 // Note: We use this approach instead of "sudo"
568                 // because in certain circumstances (we are pid 1 in a
569                 // docker container, and our passenger child process
570                 // changes to pgid 1) the intermediate sudo process
571                 // notices we have the same pgid as our child and
572                 // refuses to propagate signals from us to our child,
573                 // so we can't signal/shutdown our passenger/rails
574                 // apps. "chpst" or "setuidgid" would work, but these
575                 // few lines avoid depending on runit/daemontools.
576                 u, err := user.Lookup(opts.user)
577                 if err != nil {
578                         return fmt.Errorf("user.Lookup(%q): %w", opts.user, err)
579                 }
580                 uid, _ := strconv.Atoi(u.Uid)
581                 gid, _ := strconv.Atoi(u.Gid)
582                 cmd.SysProcAttr = &syscall.SysProcAttr{
583                         Credential: &syscall.Credential{
584                                 Uid: uint32(uid),
585                                 Gid: uint32(gid),
586                         },
587                 }
588         }
589
590         exited := false
591         defer func() { exited = true }()
592         go func() {
593                 <-ctx.Done()
594                 log := ctxlog.FromContext(ctx).WithFields(logrus.Fields{"dir": dir, "cmdline": cmdline})
595                 for !exited {
596                         if cmd.Process == nil {
597                                 log.Debug("waiting for child process to start")
598                                 time.Sleep(time.Second / 2)
599                         } else {
600                                 log.WithField("PID", cmd.Process.Pid).Debug("sending SIGTERM")
601                                 cmd.Process.Signal(syscall.SIGTERM)
602                                 time.Sleep(5 * time.Second)
603                                 if !exited {
604                                         stdout.Close()
605                                         stderr.Close()
606                                         log.WithField("PID", cmd.Process.Pid).Warn("still waiting for child process to exit 5s after SIGTERM")
607                                 }
608                         }
609                 }
610         }()
611
612         err = cmd.Start()
613         if err != nil {
614                 return err
615         }
616         copiers.Wait()
617         err = cmd.Wait()
618         if ctx.Err() != nil {
619                 // Return "context canceled", instead of the "killed"
620                 // error that was probably caused by the context being
621                 // canceled.
622                 return ctx.Err()
623         } else if err != nil {
624                 return fmt.Errorf("%s: error: %v", cmdline, err)
625         }
626         return nil
627 }
628
629 func (super *Supervisor) autofillConfig(cfg *arvados.Config) error {
630         cluster, err := cfg.GetCluster("")
631         if err != nil {
632                 return err
633         }
634         usedPort := map[string]bool{}
635         nextPort := func(host string) (string, error) {
636                 for {
637                         port, err := availablePort(host)
638                         if err != nil {
639                                 port, err = availablePort(super.ListenHost)
640                         }
641                         if err != nil {
642                                 return "", err
643                         }
644                         if usedPort[port] {
645                                 continue
646                         }
647                         usedPort[port] = true
648                         return port, nil
649                 }
650         }
651         if cluster.Services.Controller.ExternalURL.Host == "" {
652                 h, p, err := net.SplitHostPort(super.ControllerAddr)
653                 if err != nil {
654                         return fmt.Errorf("SplitHostPort(ControllerAddr): %w", err)
655                 }
656                 if h == "" {
657                         h = super.ListenHost
658                 }
659                 if p == "0" {
660                         p, err = nextPort(h)
661                         if err != nil {
662                                 return err
663                         }
664                 }
665                 cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p), Path: "/"}
666         }
667         defaultExtHost, _, err := net.SplitHostPort(cluster.Services.Controller.ExternalURL.Host)
668         if err != nil {
669                 return fmt.Errorf("SplitHostPort(Controller.ExternalURL.Host): %w", err)
670         }
671         for _, svc := range []*arvados.Service{
672                 &cluster.Services.Controller,
673                 &cluster.Services.DispatchCloud,
674                 &cluster.Services.GitHTTP,
675                 &cluster.Services.Health,
676                 &cluster.Services.Keepproxy,
677                 &cluster.Services.Keepstore,
678                 &cluster.Services.RailsAPI,
679                 &cluster.Services.WebDAV,
680                 &cluster.Services.WebDAVDownload,
681                 &cluster.Services.Websocket,
682                 &cluster.Services.Workbench1,
683                 &cluster.Services.Workbench2,
684         } {
685                 if svc == &cluster.Services.DispatchCloud && super.ClusterType == "test" {
686                         continue
687                 }
688                 if svc.ExternalURL.Host == "" {
689                         port, err := nextPort(defaultExtHost)
690                         if err != nil {
691                                 return err
692                         }
693                         host := net.JoinHostPort(defaultExtHost, port)
694                         if svc == &cluster.Services.Controller ||
695                                 svc == &cluster.Services.GitHTTP ||
696                                 svc == &cluster.Services.Health ||
697                                 svc == &cluster.Services.Keepproxy ||
698                                 svc == &cluster.Services.WebDAV ||
699                                 svc == &cluster.Services.WebDAVDownload ||
700                                 svc == &cluster.Services.Workbench1 ||
701                                 svc == &cluster.Services.Workbench2 {
702                                 svc.ExternalURL = arvados.URL{Scheme: "https", Host: host, Path: "/"}
703                         } else if svc == &cluster.Services.Websocket {
704                                 svc.ExternalURL = arvados.URL{Scheme: "wss", Host: host, Path: "/websocket"}
705                         }
706                 }
707                 if super.NoWorkbench1 && svc == &cluster.Services.Workbench1 {
708                         // When workbench1 is disabled, it gets an
709                         // ExternalURL (so we have a valid listening
710                         // port to write in our Nginx config) but no
711                         // InternalURLs (so health checker doesn't
712                         // complain).
713                         continue
714                 }
715                 if len(svc.InternalURLs) == 0 {
716                         port, err := nextPort(super.ListenHost)
717                         if err != nil {
718                                 return err
719                         }
720                         host := net.JoinHostPort(super.ListenHost, port)
721                         svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{
722                                 {Scheme: "http", Host: host, Path: "/"}: {},
723                         }
724                 }
725         }
726         if super.ClusterType != "production" {
727                 if cluster.SystemRootToken == "" {
728                         cluster.SystemRootToken = randomHexString(64)
729                 }
730                 if cluster.ManagementToken == "" {
731                         cluster.ManagementToken = randomHexString(64)
732                 }
733                 if cluster.Collections.BlobSigningKey == "" {
734                         cluster.Collections.BlobSigningKey = randomHexString(64)
735                 }
736                 if cluster.Users.AnonymousUserToken == "" {
737                         cluster.Users.AnonymousUserToken = randomHexString(64)
738                 }
739                 if cluster.Containers.DispatchPrivateKey == "" {
740                         buf, err := ioutil.ReadFile(filepath.Join(super.SourcePath, "lib", "dispatchcloud", "test", "sshkey_dispatch"))
741                         if err != nil {
742                                 return err
743                         }
744                         cluster.Containers.DispatchPrivateKey = string(buf)
745                 }
746                 cluster.TLS.Insecure = true
747         }
748         if super.ClusterType == "test" {
749                 // Add a second keepstore process.
750                 port, err := nextPort(super.ListenHost)
751                 if err != nil {
752                         return err
753                 }
754                 host := net.JoinHostPort(super.ListenHost, port)
755                 cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: host, Path: "/"}] = arvados.ServiceInstance{}
756
757                 // Create a directory-backed volume for each keepstore
758                 // process.
759                 cluster.Volumes = map[string]arvados.Volume{}
760                 for url := range cluster.Services.Keepstore.InternalURLs {
761                         volnum := len(cluster.Volumes)
762                         datadir := fmt.Sprintf("%s/keep%d.data", super.tempdir, volnum)
763                         if _, err = os.Stat(datadir + "/."); err == nil {
764                         } else if !os.IsNotExist(err) {
765                                 return err
766                         } else if err = os.Mkdir(datadir, 0755); err != nil {
767                                 return err
768                         }
769                         cluster.Volumes[fmt.Sprintf(cluster.ClusterID+"-nyw5e-%015d", volnum)] = arvados.Volume{
770                                 Driver:           "Directory",
771                                 DriverParameters: json.RawMessage(fmt.Sprintf(`{"Root":%q}`, datadir)),
772                                 AccessViaHosts: map[arvados.URL]arvados.VolumeAccess{
773                                         url: {},
774                                 },
775                                 StorageClasses: map[string]bool{
776                                         "default": true,
777                                         "foo":     true,
778                                         "bar":     true,
779                                 },
780                         }
781                 }
782                 cluster.StorageClasses = map[string]arvados.StorageClassConfig{
783                         "default": {Default: true},
784                         "foo":     {},
785                         "bar":     {},
786                 }
787         }
788         if super.OwnTemporaryDatabase {
789                 port, err := nextPort("localhost")
790                 if err != nil {
791                         return err
792                 }
793                 cluster.PostgreSQL.Connection = arvados.PostgreSQLConnection{
794                         "client_encoding": "utf8",
795                         "host":            "localhost",
796                         "port":            port,
797                         "dbname":          "arvados_test",
798                         "user":            "arvados",
799                         "password":        "insecure_arvados_test",
800                 }
801         }
802
803         cfg.Clusters[cluster.ClusterID] = *cluster
804         return nil
805 }
806
807 func addrIsLocal(addr string) (bool, error) {
808         return true, nil
809         listener, err := net.Listen("tcp", addr)
810         if err == nil {
811                 listener.Close()
812                 return true, nil
813         } else if strings.Contains(err.Error(), "cannot assign requested address") {
814                 return false, nil
815         } else {
816                 return false, err
817         }
818 }
819
820 func randomHexString(chars int) string {
821         b := make([]byte, chars/2)
822         _, err := rand.Read(b)
823         if err != nil {
824                 panic(err)
825         }
826         return fmt.Sprintf("%x", b)
827 }
828
829 func internalPort(svc arvados.Service) (host, port string, err error) {
830         if len(svc.InternalURLs) > 1 {
831                 return "", "", errors.New("internalPort() doesn't work with multiple InternalURLs")
832         }
833         for u := range svc.InternalURLs {
834                 u := url.URL(u)
835                 host, port = u.Hostname(), u.Port()
836                 switch {
837                 case port != "":
838                 case u.Scheme == "https", u.Scheme == "ws":
839                         port = "443"
840                 default:
841                         port = "80"
842                 }
843                 return
844         }
845         return "", "", fmt.Errorf("service has no InternalURLs")
846 }
847
848 func externalPort(svc arvados.Service) (string, error) {
849         u := url.URL(svc.ExternalURL)
850         if p := u.Port(); p != "" {
851                 return p, nil
852         } else if u.Scheme == "https" || u.Scheme == "wss" {
853                 return "443", nil
854         } else {
855                 return "80", nil
856         }
857 }
858
859 func availablePort(host string) (string, error) {
860         ln, err := net.Listen("tcp", net.JoinHostPort(host, "0"))
861         if err != nil {
862                 return "", err
863         }
864         defer ln.Close()
865         _, port, err := net.SplitHostPort(ln.Addr().String())
866         if err != nil {
867                 return "", err
868         }
869         return port, nil
870 }
871
872 // Try to connect to addr until it works, then close ch. Give up if
873 // ctx cancels.
874 func waitForConnect(ctx context.Context, addr string) error {
875         dialer := net.Dialer{Timeout: time.Second}
876         for ctx.Err() == nil {
877                 conn, err := dialer.DialContext(ctx, "tcp", addr)
878                 if err != nil {
879                         time.Sleep(time.Second / 10)
880                         continue
881                 }
882                 conn.Close()
883                 return nil
884         }
885         return ctx.Err()
886 }
887
888 func copyConfig(cfg *arvados.Config) *arvados.Config {
889         pr, pw := io.Pipe()
890         go func() {
891                 err := json.NewEncoder(pw).Encode(cfg)
892                 if err != nil {
893                         panic(err)
894                 }
895                 pw.Close()
896         }()
897         cfg2 := new(arvados.Config)
898         err := json.NewDecoder(pr).Decode(cfg2)
899         if err != nil {
900                 panic(err)
901         }
902         return cfg2
903 }
904
905 func watchConfig(ctx context.Context, logger logrus.FieldLogger, cfgPath string, prevcfg *arvados.Config, fn func()) {
906         watcher, err := fsnotify.NewWatcher()
907         if err != nil {
908                 logger.WithError(err).Error("fsnotify setup failed")
909                 return
910         }
911         defer watcher.Close()
912
913         err = watcher.Add(cfgPath)
914         if err != nil {
915                 logger.WithError(err).Error("fsnotify watcher failed")
916                 return
917         }
918
919         for {
920                 select {
921                 case <-ctx.Done():
922                         return
923                 case err, ok := <-watcher.Errors:
924                         if !ok {
925                                 return
926                         }
927                         logger.WithError(err).Warn("fsnotify watcher reported error")
928                 case _, ok := <-watcher.Events:
929                         if !ok {
930                                 return
931                         }
932                         for len(watcher.Events) > 0 {
933                                 <-watcher.Events
934                         }
935                         loader := config.NewLoader(&bytes.Buffer{}, &logrus.Logger{Out: ioutil.Discard})
936                         loader.Path = cfgPath
937                         loader.SkipAPICalls = true
938                         cfg, err := loader.Load()
939                         if err != nil {
940                                 logger.WithError(err).Warn("error reloading config file after change detected; ignoring new config for now")
941                         } else if reflect.DeepEqual(cfg, prevcfg) {
942                                 logger.Debug("config file changed but is still DeepEqual to the existing config")
943                         } else {
944                                 logger.Debug("config changed, notifying supervisor")
945                                 fn()
946                                 prevcfg = cfg
947                         }
948                 }
949         }
950 }