18790: Error out instead of retrying if server is too old.
[arvados.git] / lib / controller / localdb / container_gateway.go
index 4a7920c01ffe4ab2a8b374b4db4cc71bd2ab85bc..74c00da8b1251fbd8d612d0e46ae6a7144e908dd 100644 (file)
@@ -21,6 +21,7 @@ import (
        "net/http"
        "net/http/httputil"
        "net/url"
+       "os"
        "strings"
 
        "git.arvados.org/arvados.git/lib/controller/rpc"
@@ -56,6 +57,13 @@ var (
 func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOptions) (http.Handler, error) {
        ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID, Select: []string{"uuid", "state", "gateway_address", "log"}})
        if err != nil {
+               if se := httpserver.HTTPStatusError(nil); errors.As(err, &se) && se.HTTPStatus() == http.StatusUnauthorized {
+                       // Hint to WebDAV client that we accept HTTP basic auth.
+                       return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+                               w.Header().Set("Www-Authenticate", "Basic realm=\"collections\"")
+                               w.WriteHeader(http.StatusUnauthorized)
+                       }), nil
+               }
                return nil, err
        }
        if ctr.GatewayAddress == "" ||
@@ -78,16 +86,25 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                var proxyErr error
                var expectRespondAuth string
                proxy := &httputil.ReverseProxy{
+                       // Our custom Transport:
+                       //
+                       // - Uses a custom dialer to connect to the
+                       // gateway (either directly or through a
+                       // tunnel set up though ContainerTunnel)
+                       //
+                       // - Verifies the gateway's TLS certificate
+                       // using X-Arvados-Authorization headers.
+                       //
+                       // This involves modifying the outgoing
+                       // request header in DialTLSContext.
+                       // (ReverseProxy certainly doesn't expect us
+                       // to do this, but it works.)
                        Transport: &http.Transport{
                                DialTLSContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
                                        tlsconn, requestAuth, respondAuth, err := dial()
                                        if err != nil {
                                                return nil, err
                                        }
-                                       // Modify our response header
-                                       // on the fly, even though
-                                       // ReverseProxy surely doesn't
-                                       // expect us to do this.
                                        proxyReq.Header.Set("X-Arvados-Authorization", requestAuth)
                                        expectRespondAuth = respondAuth
                                        return tlsconn, nil
@@ -96,10 +113,10 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                        Director: func(r *http.Request) {
                                // Scheme/host of incoming r.URL are
                                // irrelevant now, and may even be
-                               // missing. Ensure we have a generic
-                               // syntactically correct URL for
-                               // net/http to work with. (Host is
-                               // ignored by our DialTLSContext.)
+                               // missing. Host is ignored by our
+                               // DialTLSContext, but we need a
+                               // generic syntactically correct URL
+                               // for net/http to work with.
                                r.URL.Scheme = "https"
                                r.URL.Host = "0.0.0.0:0"
                                r.Header.Set("X-Arvados-Container-Gateway-Uuid", opts.UUID)
@@ -107,6 +124,8 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                        },
                        ModifyResponse: func(resp *http.Response) error {
                                if resp.Header.Get("X-Arvados-Authorization-Response") != expectRespondAuth {
+                                       // Note this is how we detect
+                                       // an attacker-in-the-middle.
                                        return httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
                                }
                                return nil
@@ -173,13 +192,24 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
        }
        proxy := &httputil.ReverseProxy{
                Director: func(r *http.Request) {
-                       r.Host = conn.cluster.Services.WebDAVDownload.ExternalURL.Host
-                       r.URL = &url.URL{
-                               Scheme: webdavBase.Scheme,
-                               Host:   webdavBase.Host,
-                               Path:   "/by_id/" + url.PathEscape(ctr.Log) + opts.Path,
-                       }
+                       r.URL.Scheme = webdavBase.Scheme
+                       r.URL.Host = webdavBase.Host
+                       // Outgoing Host header specifies the
+                       // collection ID.
+                       r.Host = strings.Replace(ctr.Log, "+", "-", -1) + ".internal"
+                       // We already checked permission on the
+                       // container, so we can use a root token here
+                       // instead of counting on the "access to log
+                       // via container request and container"
+                       // permission check, which can be racy when a
+                       // request gets retried with a new container.
                        r.Header.Set("Authorization", "Bearer "+conn.cluster.SystemRootToken)
+                       // We can't change r.URL.Path without
+                       // confusing WebDAV (request body and response
+                       // headers refer to the same paths) so we tell
+                       // keep-web to map the log collection onto the
+                       // containers/X/log/ namespace.
+                       r.Header.Set("X-Webdav-Prefix", "/arvados/v1/containers/"+ctr.UUID+"/log")
                },
        }
        if conn.cluster.TLS.Insecure {
@@ -202,7 +232,7 @@ func (conn *Conn) serveEmptyDir(path string, w http.ResponseWriter, r *http.Requ
                FileSystem: webdav.NewMemFS(),
                LockSystem: webdavfs.NoLockSystem,
                Logger: func(r *http.Request, err error) {
-                       if err != nil {
+                       if err != nil && !os.IsNotExist(err) {
                                ctxlog.FromContext(r.Context()).WithError(err).Info("webdav error on empty collection fs")
                        }
                },
@@ -409,7 +439,7 @@ func (conn *Conn) findGateway(ctx context.Context, ctr arvados.Container, noForw
                return func() (net.Conn, string, string, error) {
                        rawconn, err := (&net.Dialer{}).DialContext(ctx, "tcp", ctr.GatewayAddress)
                        if err != nil {
-                               err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+                               return nil, "", "", httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
                        }
                        return conn.dialGatewayTLS(ctx, ctr, rawconn)
                }, nil, nil
@@ -431,7 +461,7 @@ func (conn *Conn) findGateway(ctx context.Context, ctr arvados.Container, noForw
                return func() (net.Conn, string, string, error) {
                        rawconn, err := tunnel.Open()
                        if err != nil {
-                               err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+                               return nil, "", "", httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
                        }
                        return conn.dialGatewayTLS(ctx, ctr, rawconn)
                }, nil, nil
@@ -465,10 +495,12 @@ func (conn *Conn) findGateway(ctx context.Context, ctr arvados.Container, noForw
        }
 }
 
+// dialGatewayTLS negotiates a TLS connection to a container gateway
+// over the given raw connection.
 func (conn *Conn) dialGatewayTLS(ctx context.Context, ctr arvados.Container, rawconn net.Conn) (*tls.Conn, string, string, error) {
        // crunch-run uses a self-signed / unverifiable TLS
        // certificate, so we use the following scheme to ensure we're
-       // not talking to a MITM.
+       // not talking to an attacker-in-the-middle.
        //
        // 1. Compute ctrKey = HMAC-SHA256(sysRootToken,ctrUUID) --
        // this will be the same ctrKey that a-d-c supplied to