+var (
+ forceProxyForTest = false
+ forceInternalURLForTest *arvados.URL
+)
+
+// ContainerRequestLog returns a WebDAV handler that reads logs from
+// the indicated container request. It works by proxying the incoming
+// HTTP request to
+//
+// - the container gateway, if there is an associated container that
+// is running
+//
+// - a different controller process, if there is a running container
+// whose gateway is accessible through a tunnel to a different
+// controller process
+//
+// - keep-web, if saved logs exist and there is no gateway (or the
+// associated container is finished)
+//
+// - an empty-collection stub, if there is no gateway and no saved
+// log
+//
+// For an incoming request
+//
+// GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+//
+// The upstream request may be to {c_uuid}'s container gateway
+//
+// GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+// X-Webdav-Prefix: /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}
+// X-Webdav-Source: /log
+//
+// ...or the upstream request may be to keep-web (where {cr_log_uuid}
+// is the container request log collection UUID)
+//
+// GET /arvados/v1/container_requests/{cr_uuid}/log/{c_uuid}{/c_log_path}
+// Host: {cr_log_uuid}.internal
+// X-Webdav-Prefix: /arvados/v1/container_requests/{cr_uuid}/log
+// X-Arvados-Container-Uuid: {c_uuid}
+//
+// ...or the request may be handled locally using an empty-collection
+// stub.
+func (conn *Conn) ContainerRequestLog(ctx context.Context, opts arvados.ContainerLogOptions) (http.Handler, error) {
+ if opts.Method == "OPTIONS" && opts.Header.Get("Access-Control-Request-Method") != "" {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if !keepweb.ServeCORSPreflight(w, opts.Header) {
+ // Inconceivable. We already checked
+ // for the only condition where
+ // ServeCORSPreflight returns false.
+ httpserver.Error(w, "unhandled CORS preflight request", http.StatusInternalServerError)
+ }
+ }), nil
+ }
+ cr, err := conn.railsProxy.ContainerRequestGet(ctx, arvados.GetOptions{UUID: opts.UUID, Select: []string{"uuid", "container_uuid", "log_uuid"}})
+ if err != nil {
+ if se := httpserver.HTTPStatusError(nil); errors.As(err, &se) && se.HTTPStatus() == http.StatusUnauthorized {
+ // Hint to WebDAV client that we accept HTTP basic auth.
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Www-Authenticate", "Basic realm=\"collections\"")
+ w.WriteHeader(http.StatusUnauthorized)
+ }), nil
+ }
+ return nil, err
+ }
+ ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: cr.ContainerUUID, Select: []string{"uuid", "state", "gateway_address"}})
+ if err != nil {
+ return nil, err
+ }
+ // .../log/{ctr.UUID} is a directory where the currently
+ // assigned container's log data [will] appear (as opposed to
+ // previous attempts in .../log/{previous_ctr_uuid}). Requests
+ // that are outside that directory, and requests on a
+ // non-running container, are proxied to keep-web instead of
+ // going through the container gateway system.
+ //
+ // Side note: a depth>1 directory tree listing starting at
+ // .../{cr_uuid}/log will only include subdirectories for
+ // finished containers, i.e., will not include a subdirectory
+ // with log data for a current (unfinished) container UUID.
+ // In order to access live logs, a client must look up the
+ // container_uuid field of the container request record, and
+ // explicitly request a path under .../{cr_uuid}/log/{c_uuid}.
+ if ctr.GatewayAddress == "" ||
+ (ctr.State != arvados.ContainerStateLocked && ctr.State != arvados.ContainerStateRunning) ||
+ !(opts.Path == "/"+ctr.UUID || strings.HasPrefix(opts.Path, "/"+ctr.UUID+"/")) {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ conn.serveContainerRequestLogViaKeepWeb(opts, cr, w, r)
+ }), nil
+ }
+ dial, arpc, err := conn.findGateway(ctx, ctr, opts.NoForward)
+ if err != nil {
+ return nil, err
+ }
+ if arpc != nil {
+ opts.NoForward = true
+ return arpc.ContainerRequestLog(ctx, opts)
+ }
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ r = r.WithContext(ctx)
+ var proxyReq *http.Request
+ var proxyErr error
+ var expectRespondAuth string
+ proxy := &httputil.ReverseProxy{
+ // Our custom Transport:
+ //
+ // - Uses a custom dialer to connect to the
+ // gateway (either directly or through a
+ // tunnel set up though ContainerTunnel)
+ //
+ // - Verifies the gateway's TLS certificate
+ // using X-Arvados-Authorization headers.
+ //
+ // This involves modifying the outgoing
+ // request header in DialTLSContext.
+ // (ReverseProxy certainly doesn't expect us
+ // to do this, but it works.)
+ Transport: &http.Transport{
+ DialTLSContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
+ tlsconn, requestAuth, respondAuth, err := dial()
+ if err != nil {
+ return nil, err
+ }
+ proxyReq.Header.Set("X-Arvados-Authorization", requestAuth)
+ expectRespondAuth = respondAuth
+ return tlsconn, nil
+ },
+ },
+ Director: func(r *http.Request) {
+ // Scheme/host of incoming r.URL are
+ // irrelevant now, and may even be
+ // missing. Host is ignored by our
+ // DialTLSContext, but we need a
+ // generic syntactically correct URL
+ // for net/http to work with.
+ r.URL.Scheme = "https"
+ r.URL.Host = "0.0.0.0:0"
+ r.Header.Set("X-Arvados-Container-Gateway-Uuid", ctr.UUID)
+ r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log/"+ctr.UUID)
+ r.Header.Set("X-Webdav-Source", "/log")
+ proxyReq = r
+ },
+ ModifyResponse: func(resp *http.Response) error {
+ if resp.Header.Get("X-Arvados-Authorization-Response") != expectRespondAuth {
+ // Note this is how we detect
+ // an attacker-in-the-middle.
+ return httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
+ }
+ resp.Header.Del("X-Arvados-Authorization-Response")
+ preemptivelyDeduplicateHeaders(w.Header(), resp.Header)
+ return nil
+ },
+ ErrorHandler: func(w http.ResponseWriter, r *http.Request, err error) {
+ proxyErr = err
+ },
+ }
+ proxy.ServeHTTP(w, r)
+ if proxyErr == nil {
+ // proxy succeeded
+ return
+ }
+ // If proxying to the container gateway fails, it
+ // might be caused by a race where crunch-run exited
+ // after we decided (above) the log was not final.
+ // In that case we should proxy to keep-web.
+ ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{
+ UUID: ctr.UUID,
+ Select: []string{"uuid", "state", "gateway_address", "log"},
+ })
+ if err != nil {
+ // Lost access to the container record?
+ httpserver.Error(w, "error re-fetching container record: "+err.Error(), http.StatusServiceUnavailable)
+ } else if ctr.State == arvados.ContainerStateLocked || ctr.State == arvados.ContainerStateRunning {
+ // No race, proxyErr was the best we can do
+ httpserver.Error(w, "proxy error: "+proxyErr.Error(), http.StatusServiceUnavailable)
+ } else {
+ conn.serveContainerRequestLogViaKeepWeb(opts, cr, w, r)
+ }
+ }), nil
+}
+
+// serveContainerLogViaKeepWeb handles a request for saved container
+// log content by proxying to one of the configured keep-web servers.
+//
+// It tries to choose a keep-web server that is running on this host.
+func (conn *Conn) serveContainerRequestLogViaKeepWeb(opts arvados.ContainerLogOptions, cr arvados.ContainerRequest, w http.ResponseWriter, r *http.Request) {
+ if cr.LogUUID == "" {
+ // Special case: if no log data exists yet, we serve
+ // an empty collection by ourselves instead of
+ // proxying to keep-web.
+ conn.serveEmptyDir("/arvados/v1/container_requests/"+cr.UUID+"/log", w, r)
+ return
+ }
+ myURL, _ := service.URLFromContext(r.Context())
+ u := url.URL(myURL)
+ myHostname := u.Hostname()
+ var webdavBase arvados.URL
+ var ok bool
+ for webdavBase = range conn.cluster.Services.WebDAV.InternalURLs {
+ ok = true
+ u := url.URL(webdavBase)
+ if h := u.Hostname(); h == "127.0.0.1" || h == "0.0.0.0" || h == "::1" || h == myHostname {
+ // Prefer a keep-web service running on the
+ // same host as us. (If we don't find one, we
+ // pick one arbitrarily.)
+ break
+ }
+ }
+ if !ok {
+ httpserver.Error(w, "no internalURLs configured for WebDAV service", http.StatusInternalServerError)
+ return
+ }
+ proxy := &httputil.ReverseProxy{
+ Director: func(r *http.Request) {
+ r.URL.Scheme = webdavBase.Scheme
+ r.URL.Host = webdavBase.Host
+ // Outgoing Host header specifies the
+ // collection ID.
+ r.Host = cr.LogUUID + ".internal"
+ // We already checked permission on the
+ // container, so we can use a root token here
+ // instead of counting on the "access to log
+ // via container request and container"
+ // permission check, which can be racy when a
+ // request gets retried with a new container.
+ r.Header.Set("Authorization", "Bearer "+conn.cluster.SystemRootToken)
+ // We can't change r.URL.Path without
+ // confusing WebDAV (request body and response
+ // headers refer to the same paths) so we tell
+ // keep-web to map the log collection onto the
+ // containers/X/log/ namespace.
+ r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log")
+ if len(opts.Path) >= 28 && opts.Path[6:13] == "-dz642-" {
+ // "/arvados/v1/container_requests/{crUUID}/log/{cUUID}..."
+ // proxies to
+ // "/log for container {cUUID}..."
+ r.Header.Set("X-Webdav-Prefix", "/arvados/v1/container_requests/"+cr.UUID+"/log/"+opts.Path[1:28])
+ r.Header.Set("X-Webdav-Source", "/log for container "+opts.Path[1:28]+"/")
+ }
+ },
+ ModifyResponse: func(resp *http.Response) error {
+ preemptivelyDeduplicateHeaders(w.Header(), resp.Header)
+ return nil
+ },
+ }
+ if conn.cluster.TLS.Insecure {
+ proxy.Transport = &http.Transport{
+ TLSClientConfig: &tls.Config{
+ InsecureSkipVerify: conn.cluster.TLS.Insecure,
+ },
+ }
+ }
+ proxy.ServeHTTP(w, r)
+}
+
+// httputil.ReverseProxy uses (http.Header)Add() to copy headers from
+// the upstream Response to the downstream ResponseWriter. If headers
+// have already been set on the downstream ResponseWriter, Add() will
+// result in duplicate headers. For example, if we set CORS headers
+// and then use ReverseProxy with an upstream that also sets CORS
+// headers, our client will receive
+//
+// Access-Control-Allow-Origin: *
+// Access-Control-Allow-Origin: *
+//
+// ...which is incorrect.
+//
+// preemptivelyDeduplicateHeaders, when called from a ModifyResponse
+// hook, solves this by removing any conflicting headers from
+// ResponseWriter. This way, when ReverseProxy calls Add(), it will
+// assign the new values without causing duplicates.
+//
+// dst is the downstream ResponseWriter's Header(). src is the
+// upstream resp.Header.
+func preemptivelyDeduplicateHeaders(dst, src http.Header) {
+ for hdr := range src {
+ dst.Del(hdr)
+ }
+}
+
+// serveEmptyDir handles read-only webdav requests as if there was an
+// empty collection rooted at the given path. It's equivalent to
+// proxying to an empty collection in keep-web, but avoids the extra
+// hop.
+func (conn *Conn) serveEmptyDir(path string, w http.ResponseWriter, r *http.Request) {
+ wh := webdav.Handler{
+ Prefix: path,
+ FileSystem: webdav.NewMemFS(),
+ LockSystem: webdavfs.NoLockSystem,
+ Logger: func(r *http.Request, err error) {
+ if err != nil && !os.IsNotExist(err) {
+ ctxlog.FromContext(r.Context()).WithError(err).Info("webdav error on empty collection fs")
+ }
+ },
+ }
+ wh.ServeHTTP(w, r)
+}
+