Merge branch '21678-installer-diagnostics-internal'. Closes #21678
[arvados.git] / lib / boot / nginx.go
index dc4aebd528d4bf3f6c8d43c359efb8b51ed6b73a..338a6b5bcc494b1fdfbcd50c3263fed119ceb916 100644 (file)
@@ -5,6 +5,7 @@
 package boot
 
 import (
+       "bytes"
        "context"
        "fmt"
        "io/ioutil"
@@ -12,11 +13,12 @@ import (
        "net/url"
        "os"
        "os/exec"
-       "os/user"
        "path/filepath"
        "regexp"
+       "strings"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
 )
 
 // Run an Nginx process that proxies the supervisor's configured
@@ -32,13 +34,37 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er
        if err != nil {
                return err
        }
+       extListenHost := "0.0.0.0"
+       if super.ClusterType == "test" {
+               // Our dynamic port number assignment strategy (choose
+               // an available port, write it in a config file, and
+               // have another process/goroutine bind to it) is prone
+               // to races when used by concurrent supervisors. In
+               // test mode we don't accept remote connections, so we
+               // can avoid collisions by using the per-cluster
+               // loopback address instead of 0.0.0.0.
+               extListenHost = super.ListenHost
+       }
        vars := map[string]string{
-               "LISTENHOST": super.ListenHost,
-               "SSLCERT":    filepath.Join(super.tempdir, "server.crt"),
-               "SSLKEY":     filepath.Join(super.tempdir, "server.key"),
-               "ACCESSLOG":  filepath.Join(super.tempdir, "nginx_access.log"),
-               "ERRORLOG":   filepath.Join(super.tempdir, "nginx_error.log"),
-               "TMPDIR":     super.wwwtempdir,
+               "LISTENHOST":       extListenHost,
+               "UPSTREAMHOST":     super.ListenHost,
+               "INTERNALSUBNETS":  internalSubnets(super.logger),
+               "SSLCERT":          filepath.Join(super.tempdir, "server.crt"),
+               "SSLKEY":           filepath.Join(super.tempdir, "server.key"),
+               "ACCESSLOG":        filepath.Join(super.tempdir, "nginx_access.log"),
+               "ERRORLOG":         filepath.Join(super.tempdir, "nginx_error.log"),
+               "TMPDIR":           super.wwwtempdir,
+               "ARVADOS_API_HOST": super.cluster.Services.Controller.ExternalURL.Host,
+       }
+       u := url.URL(super.cluster.Services.Controller.ExternalURL)
+       ctrlHost := u.Hostname()
+       if strings.HasPrefix(super.cluster.TLS.Certificate, "file:/") && strings.HasPrefix(super.cluster.TLS.Key, "file:/") {
+               vars["SSLCERT"] = filepath.Clean(super.cluster.TLS.Certificate[5:])
+               vars["SSLKEY"] = filepath.Clean(super.cluster.TLS.Key[5:])
+       } else if f, err := os.Open("/var/lib/acme/live/" + ctrlHost + "/privkey"); err == nil {
+               f.Close()
+               vars["SSLCERT"] = "/var/lib/acme/live/" + ctrlHost + "/cert"
+               vars["SSLKEY"] = "/var/lib/acme/live/" + ctrlHost + "/privkey"
        }
        for _, cmpt := range []struct {
                varname string
@@ -48,17 +74,22 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er
                {"KEEPWEB", super.cluster.Services.WebDAV},
                {"KEEPWEBDL", super.cluster.Services.WebDAVDownload},
                {"KEEPPROXY", super.cluster.Services.Keepproxy},
-               {"GIT", super.cluster.Services.GitHTTP},
                {"HEALTH", super.cluster.Services.Health},
                {"WORKBENCH1", super.cluster.Services.Workbench1},
+               {"WORKBENCH2", super.cluster.Services.Workbench2},
                {"WS", super.cluster.Services.Websocket},
        } {
-               host, port, err := internalPort(cmpt.svc)
-               if err != nil {
+               var host, port string
+               if len(cmpt.svc.InternalURLs) == 0 {
+                       // We won't run this service, but we need an
+                       // upstream port to write in our templated
+                       // nginx config. Choose a port that will
+                       // return 502 Bad Gateway.
+                       port = "9"
+               } else if host, port, err = internalPort(cmpt.svc); err != nil {
                        return fmt.Errorf("%s internal port: %w (%v)", cmpt.varname, err, cmpt.svc)
-               }
-               if ok, err := addrIsLocal(net.JoinHostPort(host, port)); !ok || err != nil {
-                       return fmt.Errorf("urlIsLocal() failed for host %q port %q: %v", host, port, err)
+               } else if ok, err := addrIsLocal(net.JoinHostPort(host, port)); !ok || err != nil {
+                       return fmt.Errorf("%s addrIsLocal() failed for host %q port %q: %v", cmpt.varname, host, port, err)
                }
                vars[cmpt.varname+"PORT"] = port
 
@@ -66,8 +97,9 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er
                if err != nil {
                        return fmt.Errorf("%s external port: %w (%v)", cmpt.varname, err, cmpt.svc)
                }
-               if ok, err := addrIsLocal(net.JoinHostPort(super.ListenHost, port)); !ok || err != nil {
-                       return fmt.Errorf("urlIsLocal() failed for host %q port %q: %v", super.ListenHost, port, err)
+               listenAddr := net.JoinHostPort(super.ListenHost, port)
+               if ok, err := addrIsLocal(listenAddr); !ok || err != nil {
+                       return fmt.Errorf("%s addrIsLocal(%q) failed: %w", cmpt.varname, listenAddr, err)
                }
                vars[cmpt.varname+"SSLPORT"] = port
        }
@@ -102,31 +134,45 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er
                }
        }
 
-       args := []string{
-               "-g", "error_log stderr info;",
-               "-g", "pid " + filepath.Join(super.wwwtempdir, "nginx.pid") + ";",
-               "-c", conffile,
-       }
-       // Nginx ignores "user www-data;" when running as a non-root
-       // user... except that it causes it to ignore our other -g
-       // options. So we still have to decide for ourselves whether
-       // it's needed.
-       if u, err := user.Current(); err != nil {
-               return fmt.Errorf("user.Current(): %w", err)
-       } else if u.Uid == "0" {
-               args = append([]string{"-g", "user www-data;"}, args...)
-       }
+       configs := "error_log stderr warn; "
+       configs += "pid " + filepath.Join(super.wwwtempdir, "nginx.pid") + "; "
+       configs += "user www-data; "
 
        super.waitShutdown.Add(1)
        go func() {
                defer super.waitShutdown.Done()
-               fail(super.RunProgram(ctx, ".", runOptions{}, nginx, args...))
+               fail(super.RunProgram(ctx, ".", runOptions{}, nginx, "-g", configs, "-c", conffile))
        }()
        // Choose one of the ports where Nginx should listen, and wait
-       // here until we can connect. If ExternalURL is https://foo (with no port) then we connect to "foo:https"
+       // here until we can connect. If ExternalURL is https://foo
+       // (with no port) then we connect to "foo:https"
        testurl := url.URL(super.cluster.Services.Controller.ExternalURL)
        if testurl.Port() == "" {
                testurl.Host = net.JoinHostPort(testurl.Host, testurl.Scheme)
        }
        return waitForConnect(ctx, testurl.Host)
 }
+
+// Return 0 or more local subnets as "geo" fragments for Nginx config,
+// e.g., "1.2.3.0/24 0; 10.1.0.0/16 0;".
+func internalSubnets(logger logrus.FieldLogger) string {
+       iproutes, err := exec.Command("ip", "route").CombinedOutput()
+       if err != nil {
+               logger.Warnf("treating all clients as external because `ip route` failed: %s (%q)", err, iproutes)
+               return ""
+       }
+       subnets := ""
+       for _, line := range bytes.Split(iproutes, []byte("\n")) {
+               fields := strings.Fields(string(line))
+               if len(fields) > 2 && fields[1] == "dev" {
+                       // lan example:
+                       // 192.168.86.0/24 dev ens3 proto kernel scope link src 192.168.86.196
+                       // gcp example (private subnet):
+                       // 10.47.0.0/24 dev eth0 proto kernel scope link src 10.47.0.5
+                       // gcp example (no private subnet):
+                       // 10.128.0.1 dev ens4 scope link
+                       subnets += fields[0] + " 0; "
+               }
+       }
+       return subnets
+}