From: Tom Clegg Date: Wed, 20 Jul 2022 19:02:00 +0000 (-0400) Subject: 17344: Merge branch 'main' X-Git-Tag: 2.5.0~115^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/8b2af30849edeab5ad8ebc6b51eaec39e5fdd81a?hp=-c 17344: Merge branch 'main' Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- 8b2af30849edeab5ad8ebc6b51eaec39e5fdd81a diff --combined doc/install/automatic.html.textile.liquid index 33c6fd3d37,d72f8f69ee..398ebc20e0 --- a/doc/install/automatic.html.textile.liquid +++ b/doc/install/automatic.html.textile.liquid @@@ -22,74 -22,26 +22,74 @@@ h2. Prerequisite You will need: * a server host running Debian 10 (buster) or Debian 11 (bullseye). * a unique 5-character ID like @x9999@ for your cluster (first character should be @[a-w]@ for a long-lived / production cluster; all characters are @[a-z0-9]@). -* a DNS name like @x9999.example.com@ that resolves to your server host (or a load balancer / proxy that passes HTTP and HTTPS requests through to your server host). -* a Google account (use it in place of example@gmail.com.example in the instructions below). +* a DNS name like @x9999.example.com@ that resolves to your server host (or a load balancer / proxy that passes HTTP requests on port 80[1] and HTTPS requests on ports 443 and 4440-4460 through to the same port on your server host). +* a firewall setup that allows incoming connections to ports 80[1], 443, and 4440-4460. + +fn1. Port 80 is only used to obtain TLS certificates automatically from Let's Encrypt. It is not needed if you have another way to provision certificates. + +h2. Options + +Arvados needs a PostgreSQL database. To get started quickly, install the postgresql-server package on your server host. + +
 +# apt install postgresql
 +
+ +Arvados normally uses cloud VMs or a Slurm/LSF cluster to run containers. To get started quickly, install Docker on your system host. The @arvados-server init@ command, as shown below, will configure Arvados to run containers on the system host. + +
 +# apt install docker.io
 +
+ +Arvados needs a login backend. To get started quickly, add a user account on your server host and assign a password. The @arvados-server init ... -login pam@ option, as shown below, will configure Arvados so you can log in with this username and password. + +
 +# adduser exampleUserName
 +
h2. Initialize the cluster
 -# echo > /etc/apt/sources.list.d/arvados.list "deb http://apt.arvados.org/buster buster main"
 -# apt-get update
 -# apt-get install arvados-server-easy
 -# arvados-server init -cluster-id x9999 -domain x9999.example.com -tls acme -admin-email example@gmail.com.example
 +# echo > /etc/apt/sources.list.d/arvados.list "deb http://apt.arvados.org/$(lsb_release -sc) $(lsb_release -sc) main"
 +# apt update
 +# apt install arvados-server-easy
- # arvados-server init -cluster-id x9999 -domain x9999.example.com -tls auto -login pam
++# arvados-server init -cluster-id x9999 -domain x9999.example.com -tls acme -login pam
  
-When the "init" command is finished, navigate to the link shown in the terminal (e.g., @https://x9999.example.com/token?api_token=zzzzzzzzzzzzzzzzzzzzzz@). This will log you in to your admin account. +When the "init" command is finished, navigate to the link shown in the terminal (e.g., @https://x9999.example.com/@) and log in with the account you created above. -h2. Enable login +Activate your new Arvados user account. Copy the UUID (looks like @x9999-tpzed-xxxxxxxxxxxxxxx@) from your browser's location bar and run: -Follow the instructions to "set up Google login":{{site.baseurl}}/install/setup-login.html or another authentication option. +
 +# arv sudo user setup --uuid x9999-tpzed-xxxxxxxxxxxxxxx
 +
+ +Run the diagnostics tool to ensure everything is working. + +
 +# arv sudo diagnostics
 +
+ +h2. Customize the cluster + +Things you should plan to update before using your cluster in production: +* "Set up Google login":{{site.baseurl}}/install/setup-login.html or another authentication option. +* "Set up a wildcard TLS certificate and DNS name,":{{site.baseurl}}/install/install-manual-prerequisites.html#dnstls or enable @TrustAllContent@ mode. +* Update storage configuration to use a cloud storage bucket ("S3":{{site.baseurl}}/install/configure-s3-object-storage.html or "Azure":{{site.baseurl}}/install/configure-azure-blob-storage.html) instead of the local filesystem. +* Update "CloudVMs configuration":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html to use a cloud provider to bring up VMs on demand instead of running containers on the server host. + +h2. Updating configuration + +After updating your configuration file (@/etc/arvados/config.yml@), notify the server: + +
 +# systemctl reload arvados-server
 +
-After updating your configuration file (@/etc/arvados/config.yml@), restart the server to make your changes take effect: +Optionally, add "AutoReloadConfig: true" at the top of @/etc/arvados/config.yml@. Arvados will automatically reload the config file when it changes.
 -# systemctl restart arvados-server
 +AutoReloadConfig: true
 +Clusters:
 +  [...]
  
diff --combined lib/install/init.go index 14e2eaafab,d322e753eb..79c9fbdac7 --- a/lib/install/init.go +++ b/lib/install/init.go @@@ -13,24 -13,17 +13,24 @@@ import "flag" "fmt" "io" + "net" + "net/http" + "net/url" "os" "os/exec" "os/user" "regexp" "strconv" "strings" + "sync/atomic" "text/template" + "time" "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/lib/controller/rpc" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/auth" "git.arvados.org/arvados.git/sdk/go/ctxlog" "github.com/lib/pq" ) @@@ -51,6 -44,7 +51,7 @@@ type initCommand struct LoginGoogle bool LoginGoogleClientID string LoginGoogleClientSecret string + TLSDir string } func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { @@@ -79,7 -73,7 +80,7 @@@ flags.StringVar(&initcmd.Domain, "domain", hostname, "cluster public DNS `name`, like x1234.arvadosapi.com") flags.StringVar(&initcmd.Login, "login", "", "login `backend`: test, pam, 'google {client-id} {client-secret}', or ''") flags.StringVar(&initcmd.AdminEmail, "admin-email", "", "give admin privileges to user with given `email`") - flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, auto, insecure, or none") + flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, insecure, none, or /path/to/dir containing privkey and cert files") flags.BoolVar(&initcmd.Start, "start", true, "start systemd service after creating config") if ok, code := cmd.ParseFlags(flags, prog, args, "", stderr); !ok { return code @@@ -108,6 -102,16 +109,16 @@@ return 1 } + switch initcmd.TLS { + case "none", "acme", "insecure": + default: + if !strings.HasPrefix(initcmd.TLS, "/") { + err = fmt.Errorf("invalid argument to -tls: %q; see %s -help", initcmd.TLS, prog) + return 1 + } + initcmd.TLSDir = initcmd.TLS + } + confdir := "/etc/arvados" conffile := confdir + "/config.yml" if _, err = os.Stat(conffile); err == nil { @@@ -115,37 -119,6 +126,37 @@@ return 1 } + ports := []int{443} + for i := 4440; i < 4460; i++ { + ports = append(ports, i) + } + if initcmd.TLS == "acme" { + ports = append(ports, 80) + } + for _, port := range ports { + err = initcmd.checkPort(ctx, fmt.Sprintf("%d", port)) + if err != nil { + return 1 + } + } + + // Do the "create extension" thing early. This way, if there's + // no local postgresql server (a likely failure mode), we can + // bail out without any side effects, and the user can start + // over easily. + fmt.Fprintln(stderr, "installing pg_trgm postgresql extension...") + cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "--quiet", + "-c", `CREATE EXTENSION IF NOT EXISTS pg_trgm`) + cmd.Dir = "/" + cmd.Stdout = stdout + cmd.Stderr = stderr + err = cmd.Run() + if err != nil { + err = fmt.Errorf("error preparing postgresql server: %w", err) + return 1 + } + fmt.Fprintln(stderr, "...done") + wwwuser, err := user.Lookup("www-data") if err != nil { err = fmt.Errorf("user.Lookup(%q): %w", "www-data", err) @@@ -157,15 -130,13 +168,15 @@@ } initcmd.PostgreSQLPassword = initcmd.RandomHex(32) + fmt.Fprintln(stderr, "creating data storage directory /var/lib/arvados/keep ...") err = os.Mkdir("/var/lib/arvados/keep", 0600) if err != nil && !os.IsExist(err) { err = fmt.Errorf("mkdir /var/lib/arvados/keep: %w", err) return 1 } - fmt.Fprintln(stderr, "created /var/lib/arvados/keep") + fmt.Fprintln(stderr, "...done") + fmt.Fprintln(stderr, "creating config file", conffile, "...") err = os.Mkdir(confdir, 0750) if err != nil && !os.IsExist(err) { err = fmt.Errorf("mkdir %s: %w", confdir, err) @@@ -176,9 -147,9 +187,9 @@@ err = fmt.Errorf("chown 0:%d %s: %w", wwwgid, confdir, err) return 1 } - f, err := os.OpenFile(conffile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644) + f, err := os.OpenFile(conffile+".tmp", os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644) if err != nil { - err = fmt.Errorf("open %s: %w", conffile, err) + err = fmt.Errorf("open %s: %w", conffile+".tmp", err) return 1 } tmpl, err := template.New("config").Parse(`Clusters: @@@ -254,11 -225,12 +265,12 @@@ TLS: {{if eq .TLS "insecure"}} Insecure: true - {{else if eq .TLS "auto"}} - Automatic: true {{else if eq .TLS "acme"}} - Certificate: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/cert")}} - Key: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/privkey")}} + ACME: + Server: LE + {{else if ne .TLSDir ""}} + Certificate: {{printf "%q" (print .TLSDir "/cert")}} + Key: {{printf "%q" (print .TLSDir "/privkey")}} {{else}} {} {{end}} @@@ -297,24 -269,18 +309,24 @@@ } err = tmpl.Execute(f, initcmd) if err != nil { - err = fmt.Errorf("%s: tmpl.Execute: %w", conffile, err) + err = fmt.Errorf("%s: tmpl.Execute: %w", conffile+".tmp", err) return 1 } err = f.Close() if err != nil { - err = fmt.Errorf("%s: close: %w", conffile, err) + err = fmt.Errorf("%s: close: %w", conffile+".tmp", err) return 1 } - fmt.Fprintln(stderr, "created", conffile) + err = os.Rename(conffile+".tmp", conffile) + if err != nil { + err = fmt.Errorf("rename %s -> %s: %w", conffile+".tmp", conffile, err) + return 1 + } + fmt.Fprintln(stderr, "...done") ldr := config.NewLoader(nil, logger) ldr.SkipLegacy = true + ldr.Path = conffile // load the file we just wrote, even if $ARVADOS_CONFIG is set cfg, err := ldr.Load() if err != nil { err = fmt.Errorf("%s: %w", conffile, err) @@@ -325,15 -291,12 +337,15 @@@ return 1 } + fmt.Fprintln(stderr, "creating postresql user and database...") err = initcmd.createDB(ctx, cluster.PostgreSQL.Connection, stderr) if err != nil { return 1 } + fmt.Fprintln(stderr, "...done") - cmd := exec.CommandContext(ctx, "sudo", "-u", "www-data", "-E", "HOME=/var/www", "PATH=/var/lib/arvados/bin:"+os.Getenv("PATH"), "/var/lib/arvados/bin/bundle", "exec", "rake", "db:setup") + fmt.Fprintln(stderr, "initializing database...") + cmd = exec.CommandContext(ctx, "sudo", "-u", "www-data", "-E", "HOME=/var/www", "PATH=/var/lib/arvados/bin:"+os.Getenv("PATH"), "/var/lib/arvados/bin/bundle", "exec", "rake", "db:setup") cmd.Dir = "/var/lib/arvados/railsapi" cmd.Stdout = stderr cmd.Stderr = stderr @@@ -342,11 -305,11 +354,11 @@@ err = fmt.Errorf("rake db:setup failed: %w", err) return 1 } - fmt.Fprintln(stderr, "initialized database") + fmt.Fprintln(stderr, "...done") if initcmd.Start { - fmt.Fprintln(stderr, "starting systemd service") - cmd := exec.CommandContext(ctx, "systemctl", "start", "--no-block", "arvados") + fmt.Fprintln(stderr, "starting systemd service...") + cmd := exec.CommandContext(ctx, "systemctl", "start", "arvados") cmd.Dir = "/" cmd.Stdout = stderr cmd.Stderr = stderr @@@ -355,50 -318,8 +367,50 @@@ err = fmt.Errorf("%v: %w", cmd.Args, err) return 1 } + fmt.Fprintln(stderr, "...done") + + fmt.Fprintln(stderr, "checking controller API endpoint...") + u := url.URL(cluster.Services.Controller.ExternalURL) + conn := rpc.NewConn(cluster.ClusterID, &u, cluster.TLS.Insecure, rpc.PassthroughTokenProvider) + ctx := auth.NewContext(context.Background(), auth.NewCredentials(cluster.SystemRootToken)) + _, err = conn.UserGetCurrent(ctx, arvados.GetOptions{}) + if err != nil { + err = fmt.Errorf("API request failed: %w", err) + return 1 + } + fmt.Fprintln(stderr, "...looks good") + } + + if out, err := exec.CommandContext(ctx, "docker", "version").CombinedOutput(); err == nil && strings.Contains(string(out), "\nServer:\n") { + fmt.Fprintln(stderr, "loading alpine docker image for diagnostics...") + cmd := exec.CommandContext(ctx, "docker", "pull", "alpine") + cmd.Stdout = stderr + cmd.Stderr = stderr + err = cmd.Run() + if err != nil { + err = fmt.Errorf("%v: %w", cmd.Args, err) + return 1 + } + cmd = exec.CommandContext(ctx, "arv", "sudo", "keep", "docker", "alpine") + cmd.Stdout = stderr + cmd.Stderr = stderr + err = cmd.Run() + if err != nil { + err = fmt.Errorf("%v: %w", cmd.Args, err) + return 1 + } + fmt.Fprintln(stderr, "...done") + } else { + fmt.Fprintln(stderr, "docker is not installed -- skipping step of downloading 'alpine' image") } + fmt.Fprintf(stderr, ` +Setup complete. Next steps: +* run 'arv sudo diagnostics' +* log in to workbench2 at %s +* see documentation at https://doc.arvados.org/install/automatic.html +`, cluster.Services.Workbench2.ExternalURL.String()) + return 0 } @@@ -427,98 -348,19 +439,98 @@@ func (initcmd *initCommand) RandomHex(c } func (initcmd *initCommand) createDB(ctx context.Context, dbconn arvados.PostgreSQLConnection, stderr io.Writer) error { - for _, sql := range []string{ - `CREATE USER ` + pq.QuoteIdentifier(dbconn["user"]) + ` WITH SUPERUSER ENCRYPTED PASSWORD ` + pq.QuoteLiteral(dbconn["password"]), - `CREATE DATABASE ` + pq.QuoteIdentifier(dbconn["dbname"]) + ` WITH TEMPLATE template0 ENCODING 'utf8'`, - `CREATE EXTENSION IF NOT EXISTS pg_trgm`, - } { - cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "-c", sql) - cmd.Dir = "/" - cmd.Stdout = stderr - cmd.Stderr = stderr - err := cmd.Run() - if err != nil { - return fmt.Errorf("error setting up arvados user/database: %w", err) - } + cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "--quiet", + "-c", `CREATE USER `+pq.QuoteIdentifier(dbconn["user"])+` WITH SUPERUSER ENCRYPTED PASSWORD `+pq.QuoteLiteral(dbconn["password"]), + "-c", `CREATE DATABASE `+pq.QuoteIdentifier(dbconn["dbname"])+` WITH TEMPLATE template0 ENCODING 'utf8'`, + ) + cmd.Dir = "/" + cmd.Stdout = stderr + cmd.Stderr = stderr + err := cmd.Run() + if err != nil { + return fmt.Errorf("error setting up arvados user/database: %w", err) + } + return nil +} + +// Confirm that http://{initcmd.Domain}:{port} reaches a server that +// we run on {port}. +// +// If port is "80", listening fails, and Nginx appears to be using the +// debian-packaged default configuration that listens on port 80, +// disable that Nginx config and try again. +// +// (Typically, the reason Nginx is installed is so that Arvados can +// run an Nginx child process; the default Nginx service using config +// from /etc/nginx is just an unfortunate side effect of installing +// Nginx by way of the Debian package.) +func (initcmd *initCommand) checkPort(ctx context.Context, port string) error { + err := initcmd.checkPortOnce(ctx, port) + if err == nil || port != "80" { + // success, or poking Nginx in the eye won't help + return err + } + d, err2 := os.Open("/etc/nginx/sites-enabled/.") + if err2 != nil { + return err + } + fis, err2 := d.Readdir(-1) + if err2 != nil || len(fis) != 1 { + return err + } + if target, err2 := os.Readlink("/etc/nginx/sites-enabled/default"); err2 != nil || target != "/etc/nginx/sites-available/default" { + return err + } + err2 = os.Remove("/etc/nginx/sites-enabled/default") + if err2 != nil { + return err + } + exec.CommandContext(ctx, "nginx", "-s", "reload").Run() + time.Sleep(time.Second) + return initcmd.checkPortOnce(ctx, port) +} + +// Start an http server on 0.0.0.0:{port} and confirm that +// http://{initcmd.Domain}:{port} reaches that server. +func (initcmd *initCommand) checkPortOnce(ctx context.Context, port string) error { + b := make([]byte, 128) + _, err := rand.Read(b) + if err != nil { + return err + } + token := fmt.Sprintf("%x", b) + + srv := http.Server{ + Addr: net.JoinHostPort("", port), + Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, token) + })} + var errServe atomic.Value + go func() { + errServe.Store(srv.ListenAndServe()) + }() + defer srv.Close() + url := "http://" + net.JoinHostPort(initcmd.Domain, port) + "/probe" + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err == nil { + defer resp.Body.Close() + } + if errServe, _ := errServe.Load().(error); errServe != nil { + // If server already exited, return that error + // (probably "can't listen"), not the request error. + return errServe + } + if err != nil { + return err + } + buf := make([]byte, len(token)) + n, err := io.ReadFull(resp.Body, buf) + if string(buf[:n]) != token { + return fmt.Errorf("listened on port %s but %s connected to something else, returned %q, err %v", port, url, buf[:n], err) } return nil }