You will need:
* a server host running Debian 10 (buster) or Debian 11 (bullseye).
* a unique 5-character ID like @x9999@ for your cluster (first character should be @[a-w]@ for a long-lived / production cluster; all characters are @[a-z0-9]@).
-* a DNS name like @x9999.example.com@ that resolves to your server host (or a load balancer / proxy that passes HTTP and HTTPS requests through to your server host).
-* a Google account (use it in place of <code>example@gmail.com.example</code> in the instructions below).
+* a DNS name like @x9999.example.com@ that resolves to your server host (or a load balancer / proxy that passes HTTP requests on port 80[1] and HTTPS requests on ports 443 and 4440-4460 through to the same port on your server host).
+* a firewall setup that allows incoming connections to ports 80[1], 443, and 4440-4460.
+
+fn1. Port 80 is only used to obtain TLS certificates automatically from Let's Encrypt. It is not needed if you have another way to provision certificates.
+
+h2. Options
+
+Arvados needs a PostgreSQL database. To get started quickly, install the postgresql-server package on your server host.
+
+<pre>
+# apt install postgresql
+</pre>
+
+Arvados normally uses cloud VMs or a Slurm/LSF cluster to run containers. To get started quickly, install Docker on your system host. The @arvados-server init@ command, as shown below, will configure Arvados to run containers on the system host.
+
+<pre>
+# apt install docker.io
+</pre>
+
+Arvados needs a login backend. To get started quickly, add a user account on your server host and assign a password. The @arvados-server init ... -login pam@ option, as shown below, will configure Arvados so you can log in with this username and password.
+
+<pre>
+# adduser exampleUserName
+</pre>
h2. Initialize the cluster
<pre>
-# echo > /etc/apt/sources.list.d/arvados.list "deb http://apt.arvados.org/buster buster main"
-# apt-get update
-# apt-get install arvados-server-easy
-# arvados-server init -cluster-id x9999 -domain x9999.example.com -tls acme -admin-email example@gmail.com.example
+# echo > /etc/apt/sources.list.d/arvados.list "deb http://apt.arvados.org/$(lsb_release -sc) $(lsb_release -sc) main"
+# apt update
+# apt install arvados-server-easy
- # arvados-server init -cluster-id x9999 -domain x9999.example.com -tls auto -login pam
++# arvados-server init -cluster-id x9999 -domain x9999.example.com -tls acme -login pam
</pre>
-When the "init" command is finished, navigate to the link shown in the terminal (e.g., @https://x9999.example.com/token?api_token=zzzzzzzzzzzzzzzzzzzzzz@). This will log you in to your admin account.
+When the "init" command is finished, navigate to the link shown in the terminal (e.g., @https://x9999.example.com/@) and log in with the account you created above.
-h2. Enable login
+Activate your new Arvados user account. Copy the UUID (looks like @x9999-tpzed-xxxxxxxxxxxxxxx@) from your browser's location bar and run:
-Follow the instructions to "set up Google login":{{site.baseurl}}/install/setup-login.html or another authentication option.
+<pre>
+# arv sudo user setup --uuid x9999-tpzed-xxxxxxxxxxxxxxx
+</pre>
+
+Run the diagnostics tool to ensure everything is working.
+
+<pre>
+# arv sudo diagnostics
+</pre>
+
+h2. Customize the cluster
+
+Things you should plan to update before using your cluster in production:
+* "Set up Google login":{{site.baseurl}}/install/setup-login.html or another authentication option.
+* "Set up a wildcard TLS certificate and DNS name,":{{site.baseurl}}/install/install-manual-prerequisites.html#dnstls or enable @TrustAllContent@ mode.
+* Update storage configuration to use a cloud storage bucket ("S3":{{site.baseurl}}/install/configure-s3-object-storage.html or "Azure":{{site.baseurl}}/install/configure-azure-blob-storage.html) instead of the local filesystem.
+* Update "CloudVMs configuration":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html to use a cloud provider to bring up VMs on demand instead of running containers on the server host.
+
+h2. Updating configuration
+
+After updating your configuration file (@/etc/arvados/config.yml@), notify the server:
+
+<pre>
+# systemctl reload arvados-server
+</pre>
-After updating your configuration file (@/etc/arvados/config.yml@), restart the server to make your changes take effect:
+Optionally, add "AutoReloadConfig: true" at the top of @/etc/arvados/config.yml@. Arvados will automatically reload the config file when it changes.
<pre>
-# systemctl restart arvados-server
+AutoReloadConfig: true
+Clusters:
+ [...]
</pre>
"flag"
"fmt"
"io"
+ "net"
+ "net/http"
+ "net/url"
"os"
"os/exec"
"os/user"
"regexp"
"strconv"
"strings"
+ "sync/atomic"
"text/template"
+ "time"
"git.arvados.org/arvados.git/lib/cmd"
"git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"github.com/lib/pq"
)
LoginGoogle bool
LoginGoogleClientID string
LoginGoogleClientSecret string
+ TLSDir string
}
func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
flags.StringVar(&initcmd.Domain, "domain", hostname, "cluster public DNS `name`, like x1234.arvadosapi.com")
flags.StringVar(&initcmd.Login, "login", "", "login `backend`: test, pam, 'google {client-id} {client-secret}', or ''")
flags.StringVar(&initcmd.AdminEmail, "admin-email", "", "give admin privileges to user with given `email`")
- flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, auto, insecure, or none")
+ flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, insecure, none, or /path/to/dir containing privkey and cert files")
flags.BoolVar(&initcmd.Start, "start", true, "start systemd service after creating config")
if ok, code := cmd.ParseFlags(flags, prog, args, "", stderr); !ok {
return code
return 1
}
+ switch initcmd.TLS {
+ case "none", "acme", "insecure":
+ default:
+ if !strings.HasPrefix(initcmd.TLS, "/") {
+ err = fmt.Errorf("invalid argument to -tls: %q; see %s -help", initcmd.TLS, prog)
+ return 1
+ }
+ initcmd.TLSDir = initcmd.TLS
+ }
+
confdir := "/etc/arvados"
conffile := confdir + "/config.yml"
if _, err = os.Stat(conffile); err == nil {
return 1
}
+ ports := []int{443}
+ for i := 4440; i < 4460; i++ {
+ ports = append(ports, i)
+ }
+ if initcmd.TLS == "acme" {
+ ports = append(ports, 80)
+ }
+ for _, port := range ports {
+ err = initcmd.checkPort(ctx, fmt.Sprintf("%d", port))
+ if err != nil {
+ return 1
+ }
+ }
+
+ // Do the "create extension" thing early. This way, if there's
+ // no local postgresql server (a likely failure mode), we can
+ // bail out without any side effects, and the user can start
+ // over easily.
+ fmt.Fprintln(stderr, "installing pg_trgm postgresql extension...")
+ cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "--quiet",
+ "-c", `CREATE EXTENSION IF NOT EXISTS pg_trgm`)
+ cmd.Dir = "/"
+ cmd.Stdout = stdout
+ cmd.Stderr = stderr
+ err = cmd.Run()
+ if err != nil {
+ err = fmt.Errorf("error preparing postgresql server: %w", err)
+ return 1
+ }
+ fmt.Fprintln(stderr, "...done")
+
wwwuser, err := user.Lookup("www-data")
if err != nil {
err = fmt.Errorf("user.Lookup(%q): %w", "www-data", err)
}
initcmd.PostgreSQLPassword = initcmd.RandomHex(32)
+ fmt.Fprintln(stderr, "creating data storage directory /var/lib/arvados/keep ...")
err = os.Mkdir("/var/lib/arvados/keep", 0600)
if err != nil && !os.IsExist(err) {
err = fmt.Errorf("mkdir /var/lib/arvados/keep: %w", err)
return 1
}
- fmt.Fprintln(stderr, "created /var/lib/arvados/keep")
+ fmt.Fprintln(stderr, "...done")
+ fmt.Fprintln(stderr, "creating config file", conffile, "...")
err = os.Mkdir(confdir, 0750)
if err != nil && !os.IsExist(err) {
err = fmt.Errorf("mkdir %s: %w", confdir, err)
err = fmt.Errorf("chown 0:%d %s: %w", wwwgid, confdir, err)
return 1
}
- f, err := os.OpenFile(conffile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
+ f, err := os.OpenFile(conffile+".tmp", os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
if err != nil {
- err = fmt.Errorf("open %s: %w", conffile, err)
+ err = fmt.Errorf("open %s: %w", conffile+".tmp", err)
return 1
}
tmpl, err := template.New("config").Parse(`Clusters:
TLS:
{{if eq .TLS "insecure"}}
Insecure: true
- {{else if eq .TLS "auto"}}
- Automatic: true
{{else if eq .TLS "acme"}}
- Certificate: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/cert")}}
- Key: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/privkey")}}
+ ACME:
+ Server: LE
+ {{else if ne .TLSDir ""}}
+ Certificate: {{printf "%q" (print .TLSDir "/cert")}}
+ Key: {{printf "%q" (print .TLSDir "/privkey")}}
{{else}}
{}
{{end}}
}
err = tmpl.Execute(f, initcmd)
if err != nil {
- err = fmt.Errorf("%s: tmpl.Execute: %w", conffile, err)
+ err = fmt.Errorf("%s: tmpl.Execute: %w", conffile+".tmp", err)
return 1
}
err = f.Close()
if err != nil {
- err = fmt.Errorf("%s: close: %w", conffile, err)
+ err = fmt.Errorf("%s: close: %w", conffile+".tmp", err)
return 1
}
- fmt.Fprintln(stderr, "created", conffile)
+ err = os.Rename(conffile+".tmp", conffile)
+ if err != nil {
+ err = fmt.Errorf("rename %s -> %s: %w", conffile+".tmp", conffile, err)
+ return 1
+ }
+ fmt.Fprintln(stderr, "...done")
ldr := config.NewLoader(nil, logger)
ldr.SkipLegacy = true
+ ldr.Path = conffile // load the file we just wrote, even if $ARVADOS_CONFIG is set
cfg, err := ldr.Load()
if err != nil {
err = fmt.Errorf("%s: %w", conffile, err)
return 1
}
+ fmt.Fprintln(stderr, "creating postresql user and database...")
err = initcmd.createDB(ctx, cluster.PostgreSQL.Connection, stderr)
if err != nil {
return 1
}
+ fmt.Fprintln(stderr, "...done")
- cmd := exec.CommandContext(ctx, "sudo", "-u", "www-data", "-E", "HOME=/var/www", "PATH=/var/lib/arvados/bin:"+os.Getenv("PATH"), "/var/lib/arvados/bin/bundle", "exec", "rake", "db:setup")
+ fmt.Fprintln(stderr, "initializing database...")
+ cmd = exec.CommandContext(ctx, "sudo", "-u", "www-data", "-E", "HOME=/var/www", "PATH=/var/lib/arvados/bin:"+os.Getenv("PATH"), "/var/lib/arvados/bin/bundle", "exec", "rake", "db:setup")
cmd.Dir = "/var/lib/arvados/railsapi"
cmd.Stdout = stderr
cmd.Stderr = stderr
err = fmt.Errorf("rake db:setup failed: %w", err)
return 1
}
- fmt.Fprintln(stderr, "initialized database")
+ fmt.Fprintln(stderr, "...done")
if initcmd.Start {
- fmt.Fprintln(stderr, "starting systemd service")
- cmd := exec.CommandContext(ctx, "systemctl", "start", "--no-block", "arvados")
+ fmt.Fprintln(stderr, "starting systemd service...")
+ cmd := exec.CommandContext(ctx, "systemctl", "start", "arvados")
cmd.Dir = "/"
cmd.Stdout = stderr
cmd.Stderr = stderr
err = fmt.Errorf("%v: %w", cmd.Args, err)
return 1
}
+ fmt.Fprintln(stderr, "...done")
+
+ fmt.Fprintln(stderr, "checking controller API endpoint...")
+ u := url.URL(cluster.Services.Controller.ExternalURL)
+ conn := rpc.NewConn(cluster.ClusterID, &u, cluster.TLS.Insecure, rpc.PassthroughTokenProvider)
+ ctx := auth.NewContext(context.Background(), auth.NewCredentials(cluster.SystemRootToken))
+ _, err = conn.UserGetCurrent(ctx, arvados.GetOptions{})
+ if err != nil {
+ err = fmt.Errorf("API request failed: %w", err)
+ return 1
+ }
+ fmt.Fprintln(stderr, "...looks good")
+ }
+
+ if out, err := exec.CommandContext(ctx, "docker", "version").CombinedOutput(); err == nil && strings.Contains(string(out), "\nServer:\n") {
+ fmt.Fprintln(stderr, "loading alpine docker image for diagnostics...")
+ cmd := exec.CommandContext(ctx, "docker", "pull", "alpine")
+ cmd.Stdout = stderr
+ cmd.Stderr = stderr
+ err = cmd.Run()
+ if err != nil {
+ err = fmt.Errorf("%v: %w", cmd.Args, err)
+ return 1
+ }
+ cmd = exec.CommandContext(ctx, "arv", "sudo", "keep", "docker", "alpine")
+ cmd.Stdout = stderr
+ cmd.Stderr = stderr
+ err = cmd.Run()
+ if err != nil {
+ err = fmt.Errorf("%v: %w", cmd.Args, err)
+ return 1
+ }
+ fmt.Fprintln(stderr, "...done")
+ } else {
+ fmt.Fprintln(stderr, "docker is not installed -- skipping step of downloading 'alpine' image")
}
+ fmt.Fprintf(stderr, `
+Setup complete. Next steps:
+* run 'arv sudo diagnostics'
+* log in to workbench2 at %s
+* see documentation at https://doc.arvados.org/install/automatic.html
+`, cluster.Services.Workbench2.ExternalURL.String())
+
return 0
}
}
func (initcmd *initCommand) createDB(ctx context.Context, dbconn arvados.PostgreSQLConnection, stderr io.Writer) error {
- for _, sql := range []string{
- `CREATE USER ` + pq.QuoteIdentifier(dbconn["user"]) + ` WITH SUPERUSER ENCRYPTED PASSWORD ` + pq.QuoteLiteral(dbconn["password"]),
- `CREATE DATABASE ` + pq.QuoteIdentifier(dbconn["dbname"]) + ` WITH TEMPLATE template0 ENCODING 'utf8'`,
- `CREATE EXTENSION IF NOT EXISTS pg_trgm`,
- } {
- cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "-c", sql)
- cmd.Dir = "/"
- cmd.Stdout = stderr
- cmd.Stderr = stderr
- err := cmd.Run()
- if err != nil {
- return fmt.Errorf("error setting up arvados user/database: %w", err)
- }
+ cmd := exec.CommandContext(ctx, "sudo", "-u", "postgres", "psql", "--quiet",
+ "-c", `CREATE USER `+pq.QuoteIdentifier(dbconn["user"])+` WITH SUPERUSER ENCRYPTED PASSWORD `+pq.QuoteLiteral(dbconn["password"]),
+ "-c", `CREATE DATABASE `+pq.QuoteIdentifier(dbconn["dbname"])+` WITH TEMPLATE template0 ENCODING 'utf8'`,
+ )
+ cmd.Dir = "/"
+ cmd.Stdout = stderr
+ cmd.Stderr = stderr
+ err := cmd.Run()
+ if err != nil {
+ return fmt.Errorf("error setting up arvados user/database: %w", err)
+ }
+ return nil
+}
+
+// Confirm that http://{initcmd.Domain}:{port} reaches a server that
+// we run on {port}.
+//
+// If port is "80", listening fails, and Nginx appears to be using the
+// debian-packaged default configuration that listens on port 80,
+// disable that Nginx config and try again.
+//
+// (Typically, the reason Nginx is installed is so that Arvados can
+// run an Nginx child process; the default Nginx service using config
+// from /etc/nginx is just an unfortunate side effect of installing
+// Nginx by way of the Debian package.)
+func (initcmd *initCommand) checkPort(ctx context.Context, port string) error {
+ err := initcmd.checkPortOnce(ctx, port)
+ if err == nil || port != "80" {
+ // success, or poking Nginx in the eye won't help
+ return err
+ }
+ d, err2 := os.Open("/etc/nginx/sites-enabled/.")
+ if err2 != nil {
+ return err
+ }
+ fis, err2 := d.Readdir(-1)
+ if err2 != nil || len(fis) != 1 {
+ return err
+ }
+ if target, err2 := os.Readlink("/etc/nginx/sites-enabled/default"); err2 != nil || target != "/etc/nginx/sites-available/default" {
+ return err
+ }
+ err2 = os.Remove("/etc/nginx/sites-enabled/default")
+ if err2 != nil {
+ return err
+ }
+ exec.CommandContext(ctx, "nginx", "-s", "reload").Run()
+ time.Sleep(time.Second)
+ return initcmd.checkPortOnce(ctx, port)
+}
+
+// Start an http server on 0.0.0.0:{port} and confirm that
+// http://{initcmd.Domain}:{port} reaches that server.
+func (initcmd *initCommand) checkPortOnce(ctx context.Context, port string) error {
+ b := make([]byte, 128)
+ _, err := rand.Read(b)
+ if err != nil {
+ return err
+ }
+ token := fmt.Sprintf("%x", b)
+
+ srv := http.Server{
+ Addr: net.JoinHostPort("", port),
+ Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ fmt.Fprint(w, token)
+ })}
+ var errServe atomic.Value
+ go func() {
+ errServe.Store(srv.ListenAndServe())
+ }()
+ defer srv.Close()
+ url := "http://" + net.JoinHostPort(initcmd.Domain, port) + "/probe"
+ req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+ if err != nil {
+ return err
+ }
+ resp, err := http.DefaultClient.Do(req)
+ if err == nil {
+ defer resp.Body.Close()
+ }
+ if errServe, _ := errServe.Load().(error); errServe != nil {
+ // If server already exited, return that error
+ // (probably "can't listen"), not the request error.
+ return errServe
+ }
+ if err != nil {
+ return err
+ }
+ buf := make([]byte, len(token))
+ n, err := io.ReadFull(resp.Body, buf)
+ if string(buf[:n]) != token {
+ return fmt.Errorf("listened on port %s but %s connected to something else, returned %q, err %v", port, url, buf[:n], err)
}
return nil
}