20831: Make the IsAdmin and IsInvited pointers so they are nullable
[arvados.git] / lib / controller / localdb / container_gateway.go
index 8a70dc8e81264ceec663cf2d90d901b5140c4211..376f55b7b3f65d0be7121d07e0a5e985a33a7c07 100644 (file)
@@ -21,6 +21,7 @@ import (
        "net/http"
        "net/http/httputil"
        "net/url"
+       "os"
        "strings"
 
        "git.arvados.org/arvados.git/lib/controller/rpc"
@@ -30,6 +31,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
+       keepweb "git.arvados.org/arvados.git/services/keep-web"
        "github.com/hashicorp/yamux"
        "golang.org/x/net/webdav"
 )
@@ -39,29 +41,88 @@ var (
        forceInternalURLForTest *arvados.URL
 )
 
-// ContainerLog returns a WebDAV handler that reads logs from the
-// indicated container. It works by proxying the request to
+// ContainerRequestLog returns a WebDAV handler that reads logs from
+// the indicated container request. It works by proxying the incoming
+// HTTP request to
 //
-//   - the container gateway, if the container is running
+//   - the container gateway, if there is an associated container that
+//     is running
 //
-//   - a different controller process, if the container is running and
-//     the gateway is accessible through a tunnel to a different
+//   - a different controller process, if there is a running container
+//     whose gateway is accessible through a tunnel to a different
 //     controller process
 //
 //   - keep-web, if saved logs exist and there is no gateway (or the
-//     container is finished)
+//     associated container is finished)
 //
 //   - an empty-collection stub, if there is no gateway and no saved
 //     log
-func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOptions) (http.Handler, error) {
-       ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID, Select: []string{"uuid", "state", "gateway_address", "log"}})
+//
+// For an incoming request
+//
+//     GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+//
+// The upstream request may be to {c_uuid}'s container gateway
+//
+//     GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+//     X-Webdav-Prefix: /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}
+//     X-Webdav-Source: /log
+//
+// ...or the upstream request may be to keep-web (where {cr_log_uuid}
+// is the container request log collection UUID)
+//
+//     GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+//     Host: {cr_log_uuid}.internal
+//     X-Webdav-Prefix: /arvados/v1/container_requests/{cr_uuid}/log
+//     X-Arvados-Container-Uuid: {c_uuid}
+//
+// ...or the request may be handled locally using an empty-collection
+// stub.
+func (conn *Conn) ContainerRequestLog(ctx context.Context, opts arvados.ContainerLogOptions) (http.Handler, error) {
+       if opts.Method == "OPTIONS" && opts.Header.Get("Access-Control-Request-Method") != "" {
+               return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+                       if !keepweb.ServeCORSPreflight(w, opts.Header) {
+                               // Inconceivable.  We already checked
+                               // for the only condition where
+                               // ServeCORSPreflight returns false.
+                               httpserver.Error(w, "unhandled CORS preflight request", http.StatusInternalServerError)
+                       }
+               }), nil
+       }
+       cr, err := conn.railsProxy.ContainerRequestGet(ctx, arvados.GetOptions{UUID: opts.UUID, Select: []string{"uuid", "container_uuid", "log_uuid"}})
+       if err != nil {
+               if se := httpserver.HTTPStatusError(nil); errors.As(err, &se) && se.HTTPStatus() == http.StatusUnauthorized {
+                       // Hint to WebDAV client that we accept HTTP basic auth.
+                       return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+                               w.Header().Set("Www-Authenticate", "Basic realm=\"collections\"")
+                               w.WriteHeader(http.StatusUnauthorized)
+                       }), nil
+               }
+               return nil, err
+       }
+       ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: cr.ContainerUUID, Select: []string{"uuid", "state", "gateway_address"}})
        if err != nil {
                return nil, err
        }
+       // .../log/{ctr.UUID} is a directory where the currently
+       // assigned container's log data [will] appear (as opposed to
+       // previous attempts in .../log/{previous_ctr_uuid}). Requests
+       // that are outside that directory, and requests on a
+       // non-running container, are proxied to keep-web instead of
+       // going through the container gateway system.
+       //
+       // Side note: a depth>1 directory tree listing starting at
+       // .../{cr_uuid}/log will only include subdirectories for
+       // finished containers, i.e., will not include a subdirectory
+       // with log data for a current (unfinished) container UUID.
+       // In order to access live logs, a client must look up the
+       // container_uuid field of the container request record, and
+       // explicitly request a path under .../{cr_uuid}/log/{c_uuid}.
        if ctr.GatewayAddress == "" ||
-               (ctr.State != arvados.ContainerStateLocked && ctr.State != arvados.ContainerStateRunning) {
+               (ctr.State != arvados.ContainerStateLocked && ctr.State != arvados.ContainerStateRunning) ||
+               !(opts.Path == "/"+ctr.UUID || strings.HasPrefix(opts.Path, "/"+ctr.UUID+"/")) {
                return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-                       conn.serveContainerLogViaKeepWeb(opts, ctr, w, r)
+                       conn.serveContainerRequestLogViaKeepWeb(opts, cr, w, r)
                }), nil
        }
        dial, arpc, err := conn.findGateway(ctx, ctr, opts.NoForward)
@@ -70,7 +131,7 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
        }
        if arpc != nil {
                opts.NoForward = true
-               return arpc.ContainerLog(ctx, opts)
+               return arpc.ContainerRequestLog(ctx, opts)
        }
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                r = r.WithContext(ctx)
@@ -111,7 +172,9 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                                // for net/http to work with.
                                r.URL.Scheme = "https"
                                r.URL.Host = "0.0.0.0:0"
-                               r.Header.Set("X-Arvados-Container-Gateway-Uuid", opts.UUID)
+                               r.Header.Set("X-Arvados-Container-Gateway-Uuid", ctr.UUID)
+                               r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log/"+ctr.UUID)
+                               r.Header.Set("X-Webdav-Source", "/log")
                                proxyReq = r
                        },
                        ModifyResponse: func(resp *http.Response) error {
@@ -120,6 +183,8 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                                        // an attacker-in-the-middle.
                                        return httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
                                }
+                               resp.Header.Del("X-Arvados-Authorization-Response")
+                               preemptivelyDeduplicateHeaders(w.Header(), resp.Header)
                                return nil
                        },
                        ErrorHandler: func(w http.ResponseWriter, r *http.Request, err error) {
@@ -136,7 +201,7 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                // after we decided (above) the log was not final.
                // In that case we should proxy to keep-web.
                ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{
-                       UUID:   opts.UUID,
+                       UUID:   ctr.UUID,
                        Select: []string{"uuid", "state", "gateway_address", "log"},
                })
                if err != nil {
@@ -146,7 +211,7 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
                        // No race, proxyErr was the best we can do
                        httpserver.Error(w, "proxy error: "+proxyErr.Error(), http.StatusServiceUnavailable)
                } else {
-                       conn.serveContainerLogViaKeepWeb(opts, ctr, w, r)
+                       conn.serveContainerRequestLogViaKeepWeb(opts, cr, w, r)
                }
        }), nil
 }
@@ -155,12 +220,12 @@ func (conn *Conn) ContainerLog(ctx context.Context, opts arvados.ContainerLogOpt
 // log content by proxying to one of the configured keep-web servers.
 //
 // It tries to choose a keep-web server that is running on this host.
-func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions, ctr arvados.Container, w http.ResponseWriter, r *http.Request) {
-       if ctr.Log == "" {
+func (conn *Conn) serveContainerRequestLogViaKeepWeb(opts arvados.ContainerLogOptions, cr arvados.ContainerRequest, w http.ResponseWriter, r *http.Request) {
+       if cr.LogUUID == "" {
                // Special case: if no log data exists yet, we serve
                // an empty collection by ourselves instead of
                // proxying to keep-web.
-               conn.serveEmptyDir("/arvados/v1/containers/"+ctr.UUID+"/log", w, r)
+               conn.serveEmptyDir("/arvados/v1/container_requests/"+cr.UUID+"/log", w, r)
                return
        }
        myURL, _ := service.URLFromContext(r.Context())
@@ -168,7 +233,7 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
        myHostname := u.Hostname()
        var webdavBase arvados.URL
        var ok bool
-       for webdavBase = range conn.cluster.Services.WebDAVDownload.InternalURLs {
+       for webdavBase = range conn.cluster.Services.WebDAV.InternalURLs {
                ok = true
                u := url.URL(webdavBase)
                if h := u.Hostname(); h == "127.0.0.1" || h == "0.0.0.0" || h == "::1" || h == myHostname {
@@ -184,15 +249,11 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
        }
        proxy := &httputil.ReverseProxy{
                Director: func(r *http.Request) {
-                       r.URL = &url.URL{
-                               Scheme: webdavBase.Scheme,
-                               Host:   webdavBase.Host,
-                               Path:   "/by_id/" + url.PathEscape(ctr.Log) + opts.Path,
-                       }
-                       // Our outgoing Host header must match
-                       // WebDAVDownload.ExternalURL, otherwise
-                       // keep-web does not accept an auth token.
-                       r.Host = conn.cluster.Services.WebDAVDownload.ExternalURL.Host
+                       r.URL.Scheme = webdavBase.Scheme
+                       r.URL.Host = webdavBase.Host
+                       // Outgoing Host header specifies the
+                       // collection ID.
+                       r.Host = cr.LogUUID + ".internal"
                        // We already checked permission on the
                        // container, so we can use a root token here
                        // instead of counting on the "access to log
@@ -200,6 +261,23 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
                        // permission check, which can be racy when a
                        // request gets retried with a new container.
                        r.Header.Set("Authorization", "Bearer "+conn.cluster.SystemRootToken)
+                       // We can't change r.URL.Path without
+                       // confusing WebDAV (request body and response
+                       // headers refer to the same paths) so we tell
+                       // keep-web to map the log collection onto the
+                       // containers/X/log/ namespace.
+                       r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log")
+                       if len(opts.Path) >= 28 && opts.Path[6:13] == "-dz642-" {
+                               // "/arvados/v1/container_requests/{crUUID}/log/{cUUID}..."
+                               // proxies to
+                               // "/log for container {cUUID}..."
+                               r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log/"+opts.Path[1:28])
+                               r.Header.Set("X-Webdav-Source", "/log for container "+opts.Path[1:28]+"/")
+                       }
+               },
+               ModifyResponse: func(resp *http.Response) error {
+                       preemptivelyDeduplicateHeaders(w.Header(), resp.Header)
+                       return nil
                },
        }
        if conn.cluster.TLS.Insecure {
@@ -212,6 +290,31 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
        proxy.ServeHTTP(w, r)
 }
 
+// httputil.ReverseProxy uses (http.Header)Add() to copy headers from
+// the upstream Response to the downstream ResponseWriter. If headers
+// have already been set on the downstream ResponseWriter, Add() will
+// result in duplicate headers. For example, if we set CORS headers
+// and then use ReverseProxy with an upstream that also sets CORS
+// headers, our client will receive
+//
+//     Access-Control-Allow-Origin: *
+//     Access-Control-Allow-Origin: *
+//
+// ...which is incorrect.
+//
+// preemptivelyDeduplicateHeaders, when called from a ModifyResponse
+// hook, solves this by removing any conflicting headers from
+// ResponseWriter. This way, when ReverseProxy calls Add(), it will
+// assign the new values without causing duplicates.
+//
+// dst is the downstream ResponseWriter's Header(). src is the
+// upstream resp.Header.
+func preemptivelyDeduplicateHeaders(dst, src http.Header) {
+       for hdr := range src {
+               dst.Del(hdr)
+       }
+}
+
 // serveEmptyDir handles read-only webdav requests as if there was an
 // empty collection rooted at the given path. It's equivalent to
 // proxying to an empty collection in keep-web, but avoids the extra
@@ -222,7 +325,7 @@ func (conn *Conn) serveEmptyDir(path string, w http.ResponseWriter, r *http.Requ
                FileSystem: webdav.NewMemFS(),
                LockSystem: webdavfs.NoLockSystem,
                Logger: func(r *http.Request, err error) {
-                       if err != nil {
+                       if err != nil && !os.IsNotExist(err) {
                                ctxlog.FromContext(r.Context()).WithError(err).Info("webdav error on empty collection fs")
                        }
                },
@@ -246,7 +349,7 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
                return sshconn, err
        }
        ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
-       if !user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
+       if !*user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
                if !conn.cluster.Containers.ShellAccess.User {
                        return sshconn, httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
                }
@@ -429,7 +532,7 @@ func (conn *Conn) findGateway(ctx context.Context, ctr arvados.Container, noForw
                return func() (net.Conn, string, string, error) {
                        rawconn, err := (&net.Dialer{}).DialContext(ctx, "tcp", ctr.GatewayAddress)
                        if err != nil {
-                               err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+                               return nil, "", "", httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
                        }
                        return conn.dialGatewayTLS(ctx, ctr, rawconn)
                }, nil, nil
@@ -451,7 +554,7 @@ func (conn *Conn) findGateway(ctx context.Context, ctr arvados.Container, noForw
                return func() (net.Conn, string, string, error) {
                        rawconn, err := tunnel.Open()
                        if err != nil {
-                               err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+                               return nil, "", "", httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
                        }
                        return conn.dialGatewayTLS(ctx, ctr, rawconn)
                }, nil, nil