"os"
"os/exec"
"os/signal"
+ "os/user"
"path/filepath"
"strings"
"sync"
"git.arvados.org/arvados.git/lib/cmd"
"git.arvados.org/arvados.git/lib/config"
- "git.arvados.org/arvados.git/lib/controller"
- "git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/health"
var Command cmd.Handler = bootCommand{}
+type bootTask interface {
+ // Execute the task. Run should return nil when the task is
+ // done enough to satisfy a dependency relationship (e.g., the
+ // service is running and ready). If the task starts a
+ // goroutine that fails after Run returns (e.g., the service
+ // shuts down), it should call cancel.
+ Run(ctx context.Context, fail func(error), boot *Booter) error
+ String() string
+}
+
type bootCommand struct{}
func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
flags.StringVar(&boot.SourcePath, "source", ".", "arvados source tree `directory`")
flags.StringVar(&boot.LibPath, "lib", "/var/lib/arvados", "`directory` to install dependencies and library files")
flags.StringVar(&boot.ClusterType, "type", "production", "cluster `type`: development, test, or production")
+ flags.StringVar(&boot.ListenHost, "listen-host", "localhost", "host name or interface address for service listeners")
+ flags.StringVar(&boot.ControllerAddr, "controller-address", ":0", "desired controller address, `host:port` or `:port`")
+ flags.BoolVar(&boot.OwnTemporaryDatabase, "own-temporary-database", false, "bring up a postgres server and create a temporary database")
err = flags.Parse(args)
if err == flag.ErrHelp {
err = nil
return 2
}
- boot.Start(ctx, loader)
+ loader.SkipAPICalls = true
+ cfg, err := loader.Load()
+ if err != nil {
+ return 1
+ }
+
+ boot.Start(ctx, cfg)
defer boot.Stop()
- if boot.WaitReady() {
- fmt.Fprintln(stdout, boot.cluster.Services.Controller.ExternalURL)
+ if url, ok := boot.WaitReady(); ok {
+ fmt.Fprintln(stdout, url)
<-ctx.Done() // wait for signal
return 0
} else {
}
type Booter struct {
- SourcePath string // e.g., /home/username/src/arvados
- LibPath string // e.g., /var/lib/arvados
- ClusterType string // e.g., production
- Stderr io.Writer
+ SourcePath string // e.g., /home/username/src/arvados
+ LibPath string // e.g., /var/lib/arvados
+ ClusterType string // e.g., production
+ ListenHost string // e.g., localhost
+ ControllerAddr string // e.g., 127.0.0.1:8000
+ OwnTemporaryDatabase bool
+ Stderr io.Writer
logger logrus.FieldLogger
cluster *arvados.Cluster
cancel context.CancelFunc
done chan struct{}
healthChecker *health.Aggregator
+ tasksReady map[string]chan bool
tempdir string
configfile string
goMutex sync.Mutex
}
-func (boot *Booter) Start(ctx context.Context, loader *config.Loader) {
+func (boot *Booter) Start(ctx context.Context, cfg *arvados.Config) {
boot.ctx, boot.cancel = context.WithCancel(ctx)
boot.done = make(chan struct{})
go func() {
- err := boot.run(loader)
+ err := boot.run(cfg)
if err != nil {
fmt.Fprintln(boot.Stderr, err)
}
}()
}
-func (boot *Booter) run(loader *config.Loader) error {
+func (boot *Booter) run(cfg *arvados.Config) error {
cwd, err := os.Getwd()
if err != nil {
return err
}
defer os.RemoveAll(boot.tempdir)
- loader.SkipAPICalls = true
- cfg, err := loader.Load()
- if err != nil {
- return err
- }
-
// Fill in any missing config keys, and write the resulting
// config in the temp dir for child services to use.
err = boot.autofillConfig(cfg, boot.logger)
boot.configfile = conffile.Name()
boot.environ = os.Environ()
+ boot.cleanEnv()
boot.setEnv("ARVADOS_CONFIG", boot.configfile)
boot.setEnv("RAILS_ENV", boot.ClusterType)
+ boot.setEnv("TMPDIR", boot.tempdir)
boot.prependEnv("PATH", filepath.Join(boot.LibPath, "bin")+":")
boot.cluster, err = cfg.GetCluster("")
}
// Now that we have the config, replace the bootstrap logger
// with a new one according to the logging config.
- boot.logger = ctxlog.New(boot.Stderr, boot.cluster.SystemLogs.Format, boot.cluster.SystemLogs.LogLevel).WithFields(logrus.Fields{
+ loglevel := boot.cluster.SystemLogs.LogLevel
+ if s := os.Getenv("ARVADOS_DEBUG"); s != "" && s != "0" {
+ loglevel = "debug"
+ }
+ boot.logger = ctxlog.New(boot.Stderr, boot.cluster.SystemLogs.Format, loglevel).WithFields(logrus.Fields{
"PID": os.Getpid(),
})
- boot.healthChecker = &health.Aggregator{Cluster: boot.cluster}
for _, dir := range []string{boot.LibPath, filepath.Join(boot.LibPath, "bin")} {
if _, err = os.Stat(filepath.Join(dir, ".")); os.IsNotExist(err) {
return err
}
- var wg sync.WaitGroup
- for _, cmpt := range []component{
- {name: "nginx", runFunc: runNginx},
- {name: "controller", cmdHandler: controller.Command},
- {name: "dispatchcloud", cmdHandler: dispatchcloud.Command, notIfTest: true},
- {name: "git-httpd", goProg: "services/arv-git-httpd"},
- {name: "health", goProg: "services/health"},
- {name: "keep-balance", goProg: "services/keep-balance", notIfTest: true},
- {name: "keepproxy", goProg: "services/keepproxy"},
- {name: "keepstore", goProg: "services/keepstore", svc: boot.cluster.Services.Keepstore},
- {name: "keep-web", goProg: "services/keep-web"},
- {name: "railsAPI", svc: boot.cluster.Services.RailsAPI, railsApp: "services/api"},
- {name: "ws", goProg: "services/ws"},
- } {
- cmpt := cmpt
- wg.Add(1)
+ tasks := []bootTask{
+ createCertificates{},
+ runPostgreSQL{},
+ runNginx{},
+ runServiceCommand{name: "controller", svc: boot.cluster.Services.Controller, depends: []bootTask{runPostgreSQL{}}},
+ runGoProgram{src: "services/arv-git-httpd"},
+ runGoProgram{src: "services/health"},
+ runGoProgram{src: "services/keepproxy", depends: []bootTask{runPassenger{src: "services/api"}}},
+ runGoProgram{src: "services/keepstore", svc: boot.cluster.Services.Keepstore},
+ runGoProgram{src: "services/keep-web"},
+ runGoProgram{src: "services/ws", depends: []bootTask{runPostgreSQL{}}},
+ installPassenger{src: "services/api"},
+ runPassenger{src: "services/api", svc: boot.cluster.Services.RailsAPI, depends: []bootTask{createCertificates{}, runPostgreSQL{}, installPassenger{src: "services/api"}}},
+ installPassenger{src: "apps/workbench", depends: []bootTask{installPassenger{src: "services/api"}}}, // dependency ensures workbench doesn't delay api startup
+ runPassenger{src: "apps/workbench", svc: boot.cluster.Services.Workbench1, depends: []bootTask{installPassenger{src: "apps/workbench"}}},
+ seedDatabase{},
+ }
+ if boot.ClusterType != "test" {
+ tasks = append(tasks,
+ runServiceCommand{name: "dispatch-cloud", svc: boot.cluster.Services.Controller},
+ runGoProgram{src: "services/keep-balance"},
+ )
+ }
+ boot.tasksReady = map[string]chan bool{}
+ for _, task := range tasks {
+ boot.tasksReady[task.String()] = make(chan bool)
+ }
+ for _, task := range tasks {
+ task := task
+ fail := func(err error) {
+ if boot.ctx.Err() != nil {
+ return
+ }
+ boot.cancel()
+ boot.logger.WithField("task", task.String()).WithError(err).Error("task failed")
+ }
go func() {
- defer wg.Done()
- defer boot.cancel()
- boot.logger.WithField("component", cmpt.name).Info("starting")
- err := cmpt.Run(boot.ctx, boot)
- if err != nil && err != context.Canceled {
- boot.logger.WithError(err).WithField("component", cmpt.name).Error("exited")
+ boot.logger.WithField("task", task.String()).Info("starting")
+ err := task.Run(boot.ctx, fail, boot)
+ if err != nil {
+ fail(err)
+ return
}
+ close(boot.tasksReady[task.String()])
}()
}
- wg.Wait()
+ err = boot.wait(boot.ctx, tasks...)
+ if err != nil {
+ return err
+ }
+ boot.logger.Info("all startup tasks are complete; starting health checks")
+ boot.healthChecker = &health.Aggregator{Cluster: boot.cluster}
+ <-boot.ctx.Done()
+ boot.logger.Info("shutting down")
+ return boot.ctx.Err()
+}
+
+func (boot *Booter) wait(ctx context.Context, tasks ...bootTask) error {
+ for _, task := range tasks {
+ ch, ok := boot.tasksReady[task.String()]
+ if !ok {
+ return fmt.Errorf("no such task: %s", task)
+ }
+ boot.logger.WithField("task", task.String()).Info("waiting")
+ select {
+ case <-ch:
+ case <-ctx.Done():
+ return ctx.Err()
+ }
+ }
return nil
}
<-boot.done
}
-func (boot *Booter) WaitReady() bool {
+func (boot *Booter) WaitReady() (*arvados.URL, bool) {
for waiting := true; waiting; {
time.Sleep(time.Second)
if boot.ctx.Err() != nil {
- return false
+ return nil, false
}
if boot.healthChecker == nil {
// not set up yet
// instead we wait for all configured components to
// pass.
waiting = false
- for _, check := range resp.Checks {
+ for target, check := range resp.Checks {
if check.Health != "OK" {
waiting = true
+ boot.logger.WithField("target", target).Debug("waiting")
}
}
}
- return true
+ u := boot.cluster.Services.Controller.ExternalURL
+ return &u, true
}
func (boot *Booter) prependEnv(key, prepend string) {
boot.environ = append(boot.environ, key+"="+prepend)
}
+var cleanEnvPrefixes = []string{
+ "GEM_HOME=",
+ "GEM_PATH=",
+ "ARVADOS_",
+}
+
+func (boot *Booter) cleanEnv() {
+ var cleaned []string
+ for _, s := range boot.environ {
+ drop := false
+ for _, p := range cleanEnvPrefixes {
+ if strings.HasPrefix(s, p) {
+ drop = true
+ break
+ }
+ }
+ if !drop {
+ cleaned = append(cleaned, s)
+ }
+ }
+ boot.environ = cleaned
+}
+
func (boot *Booter) setEnv(key, val string) {
for i, s := range boot.environ {
if strings.HasPrefix(s, key+"=") {
}
func (boot *Booter) setupRubyEnv() error {
- buf, err := exec.Command("gem", "env", "gempath").Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:...
+ cmd := exec.Command("gem", "env", "gempath")
+ cmd.Env = boot.environ
+ buf, err := cmd.Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:...
if err != nil || len(buf) == 0 {
return fmt.Errorf("gem env gempath: %v", err)
}
boot.prependEnv("PATH", gempath+"/bin:")
boot.setEnv("GEM_HOME", gempath)
boot.setEnv("GEM_PATH", gempath)
+ // Passenger install doesn't work unless $HOME is ~user
+ u, err := user.Current()
+ if err != nil {
+ return err
+ }
+ boot.setEnv("HOME", u.HomeDir)
return nil
}
func (boot *Booter) RunProgram(ctx context.Context, dir string, output io.Writer, env []string, prog string, args ...string) error {
cmdline := fmt.Sprintf("%s", append([]string{prog}, args...))
fmt.Fprintf(boot.Stderr, "%s executing in %s\n", cmdline, dir)
+
+ logprefix := prog
+ if prog == "bundle" && len(args) > 2 && args[0] == "exec" {
+ logprefix = args[1]
+ }
+ if !strings.HasPrefix(dir, "/") {
+ logprefix = dir + ": " + logprefix
+ }
+ stderr := &logPrefixer{Writer: boot.Stderr, Prefix: []byte("[" + logprefix + "] ")}
+
cmd := exec.Command(boot.lookPath(prog), args...)
if output == nil {
- cmd.Stdout = boot.Stderr
+ cmd.Stdout = stderr
} else {
cmd.Stdout = output
}
- cmd.Stderr = boot.Stderr
+ cmd.Stderr = stderr
if strings.HasPrefix(dir, "/") {
cmd.Dir = dir
} else {
return nil
}
-type component struct {
- name string
- svc arvados.Service
- cmdHandler cmd.Handler
- runFunc func(ctx context.Context, boot *Booter) error
- railsApp string // source dir in arvados tree, e.g., "services/api"
- goProg string // source dir in arvados tree, e.g., "services/keepstore"
- notIfTest bool // don't run this component on a test cluster
-}
-
-func (cmpt *component) Run(ctx context.Context, boot *Booter) error {
- if cmpt.notIfTest && boot.ClusterType == "test" {
- fmt.Fprintf(boot.Stderr, "skipping component %q in %s mode\n", cmpt.name, boot.ClusterType)
- <-ctx.Done()
- return nil
+func (boot *Booter) autofillConfig(cfg *arvados.Config, log logrus.FieldLogger) error {
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
}
- fmt.Fprintf(boot.Stderr, "starting component %q\n", cmpt.name)
- if cmpt.cmdHandler != nil {
- errs := make(chan error, 1)
- go func() {
- defer close(errs)
- exitcode := cmpt.cmdHandler.RunCommand(cmpt.name, []string{"-config", boot.configfile}, bytes.NewBuffer(nil), boot.Stderr, boot.Stderr)
- if exitcode != 0 {
- errs <- fmt.Errorf("exit code %d", exitcode)
+ usedPort := map[string]bool{}
+ nextPort := func() string {
+ for {
+ port, err := availablePort(":0")
+ if err != nil {
+ panic(err)
}
- }()
- select {
- case err := <-errs:
- return err
- case <-ctx.Done():
- // cmpt.cmdHandler.RunCommand() doesn't have
- // access to our context, so it won't shut
- // down by itself. We just abandon it.
- return nil
- }
- }
- if cmpt.goProg != "" {
- boot.RunProgram(ctx, cmpt.goProg, nil, nil, "go", "install")
- if ctx.Err() != nil {
- return nil
- }
- _, basename := filepath.Split(cmpt.goProg)
- if len(cmpt.svc.InternalURLs) > 0 {
- // Run one for each URL
- var wg sync.WaitGroup
- for u := range cmpt.svc.InternalURLs {
- u := u
- wg.Add(1)
- go func() {
- defer wg.Done()
- boot.RunProgram(ctx, boot.tempdir, nil, []string{"ARVADOS_SERVICE_INTERNAL_URL=" + u.String()}, basename)
- }()
+ if usedPort[port] {
+ continue
}
- wg.Wait()
- } else {
- // Just run one
- boot.RunProgram(ctx, boot.tempdir, nil, nil, basename)
+ usedPort[port] = true
+ return port
}
- return nil
}
- if cmpt.runFunc != nil {
- return cmpt.runFunc(ctx, boot)
- }
- if cmpt.railsApp != "" {
- port, err := internalPort(cmpt.svc)
- if err != nil {
- return fmt.Errorf("bug: no InternalURLs for component %q: %v", cmpt.name, cmpt.svc.InternalURLs)
- }
- var buf bytes.Buffer
- err = boot.RunProgram(ctx, cmpt.railsApp, &buf, nil, "gem", "list", "--details", "bundler")
- if err != nil {
- return err
- }
- for _, version := range []string{"1.11.0", "1.17.3", "2.0.2"} {
- if !strings.Contains(buf.String(), "("+version+")") {
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "gem", "install", "--user", "bundler:1.11", "bundler:1.17.3", "bundler:2.0.2")
- if err != nil {
- return err
- }
- break
- }
- }
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "bundle", "install", "--jobs", "4", "--path", filepath.Join(os.Getenv("HOME"), ".gem"))
+ if cluster.Services.Controller.ExternalURL.Host == "" {
+ h, p, err := net.SplitHostPort(boot.ControllerAddr)
if err != nil {
return err
}
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "bundle", "exec", "passenger-config", "build-native-support")
- if err != nil {
- return err
+ if h == "" {
+ h = boot.ListenHost
}
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "bundle", "exec", "passenger-config", "install-standalone-runtime")
- if err != nil {
- return err
- }
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "bundle", "exec", "passenger-config", "validate-install")
- if err != nil {
- return err
- }
- err = boot.RunProgram(ctx, cmpt.railsApp, nil, nil, "bundle", "exec", "passenger", "start", "-p", port)
- if err != nil {
- return err
+ if p == "0" {
+ p, err = availablePort(":0")
+ if err != nil {
+ return err
+ }
+ usedPort[p] = true
}
- return nil
+ cluster.Services.Controller.ExternalURL = arvados.URL{Scheme: "https", Host: net.JoinHostPort(h, p)}
}
- return fmt.Errorf("bug: component %q has nothing to run", cmpt.name)
-}
-
-func (boot *Booter) autofillConfig(cfg *arvados.Config, log logrus.FieldLogger) error {
- cluster, err := cfg.GetCluster("")
- if err != nil {
- return err
- }
- port := 9000
for _, svc := range []*arvados.Service{
&cluster.Services.Controller,
&cluster.Services.DispatchCloud,
&cluster.Services.WebDAV,
&cluster.Services.WebDAVDownload,
&cluster.Services.Websocket,
+ &cluster.Services.Workbench1,
} {
if svc == &cluster.Services.DispatchCloud && boot.ClusterType == "test" {
continue
}
- if len(svc.InternalURLs) == 0 {
- port++
- svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{
- arvados.URL{Scheme: "http", Host: fmt.Sprintf("localhost:%d", port)}: arvados.ServiceInstance{},
- }
- }
if svc.ExternalURL.Host == "" && (svc == &cluster.Services.Controller ||
svc == &cluster.Services.GitHTTP ||
svc == &cluster.Services.Keepproxy ||
svc == &cluster.Services.WebDAV ||
svc == &cluster.Services.WebDAVDownload ||
- svc == &cluster.Services.Websocket) {
- port++
- svc.ExternalURL = arvados.URL{Scheme: "https", Host: fmt.Sprintf("localhost:%d", port)}
+ svc == &cluster.Services.Websocket ||
+ svc == &cluster.Services.Workbench1) {
+ svc.ExternalURL = arvados.URL{Scheme: "https", Host: fmt.Sprintf("%s:%s", boot.ListenHost, nextPort())}
+ }
+ if len(svc.InternalURLs) == 0 {
+ svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{
+ arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", boot.ListenHost, nextPort())}: arvados.ServiceInstance{},
+ }
}
}
if cluster.SystemRootToken == "" {
}
if boot.ClusterType == "test" {
// Add a second keepstore process.
- port++
- cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: fmt.Sprintf("localhost:%d", port)}] = arvados.ServiceInstance{}
+ cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", boot.ListenHost, nextPort())}] = arvados.ServiceInstance{}
// Create a directory-backed volume for each keepstore
// process.
} else if err = os.Mkdir(datadir, 0777); err != nil {
return err
}
- cluster.Volumes[fmt.Sprintf("zzzzz-nyw5e-%015d", volnum)] = arvados.Volume{
+ cluster.Volumes[fmt.Sprintf(cluster.ClusterID+"-nyw5e-%015d", volnum)] = arvados.Volume{
Driver: "Directory",
DriverParameters: json.RawMessage(fmt.Sprintf(`{"Root":%q}`, datadir)),
AccessViaHosts: map[arvados.URL]arvados.VolumeAccess{
}
}
}
+ if boot.OwnTemporaryDatabase {
+ cluster.PostgreSQL.Connection = arvados.PostgreSQLConnection{
+ "client_encoding": "utf8",
+ "host": "localhost",
+ "port": nextPort(),
+ "dbname": "arvados_test",
+ "user": "arvados",
+ "password": "insecure_arvados_test",
+ }
+ }
+
cfg.Clusters[cluster.ClusterID] = *cluster
return nil
}
return "80", nil
}
}
+
+func availablePort(addr string) (string, error) {
+ ln, err := net.Listen("tcp", addr)
+ if err != nil {
+ return "", err
+ }
+ defer ln.Close()
+ _, port, err := net.SplitHostPort(ln.Addr().String())
+ if err != nil {
+ return "", err
+ }
+ return port, nil
+}
+
+// Try to connect to addr until it works, then close ch. Give up if
+// ctx cancels.
+func waitForConnect(ctx context.Context, addr string) error {
+ dialer := net.Dialer{Timeout: time.Second}
+ for ctx.Err() == nil {
+ conn, err := dialer.DialContext(ctx, "tcp", addr)
+ if err != nil {
+ time.Sleep(time.Second / 10)
+ continue
+ }
+ conn.Close()
+ return nil
+ }
+ return ctx.Err()
+}