19889: Preserve WebDAV path when proxying to keep-web.
authorTom Clegg <tom@curii.com>
Tue, 28 Mar 2023 19:51:59 +0000 (15:51 -0400)
committerTom Clegg <tom@curii.com>
Tue, 28 Mar 2023 19:51:59 +0000 (15:51 -0400)
WebDAV clients expect the path in the server response to match the
request.

Previously when proxying to keep-web we were rewriting the request
from /arvados/v1/containers/{uuid}/log/stderr.txt to
/by_id/{pdh}/stderr.txt, so the response referred to
/by_id/{pdh}/stderr.txt.

With this change, we leave the request path alone and use a new
X-Webdav-Prefix request header (/arvados/v1/containers/{uuid}/log in
this case) to tell keep-web to strip that part when accessing the
virtual filesystem.

New test uses cadaver, which fails on the previous version with

Could not access /arvados/v1/containers/zzzzz-dz642-queuedcontainer/log/ (not WebDAV-enabled?):
Did not find a collection resource.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/controller/localdb/container_gateway.go
lib/controller/localdb/container_gateway_test.go
lib/controller/localdb/localdb_test.go
lib/controller/router/router.go
services/keep-web/handler.go

index 6cf787fcb4d857743f3b9513ab85b09695317885..77507901abb9e0b5cf2c68c8a999b5d27f468116 100644 (file)
@@ -191,15 +191,11 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
        }
        proxy := &httputil.ReverseProxy{
                Director: func(r *http.Request) {
-                       r.URL = &url.URL{
-                               Scheme: webdavBase.Scheme,
-                               Host:   webdavBase.Host,
-                               Path:   "/by_id/" + url.PathEscape(ctr.Log) + opts.Path,
-                       }
-                       // Our outgoing Host header must match
-                       // WebDAVDownload.ExternalURL, otherwise
-                       // keep-web does not accept an auth token.
-                       r.Host = conn.cluster.Services.WebDAVDownload.ExternalURL.Host
+                       r.URL.Scheme = webdavBase.Scheme
+                       r.URL.Host = webdavBase.Host
+                       // Outgoing Host header specifies the
+                       // collection ID.
+                       r.Host = strings.Replace(ctr.Log, "+", "-", -1) + ".internal"
                        // We already checked permission on the
                        // container, so we can use a root token here
                        // instead of counting on the "access to log
@@ -207,6 +203,12 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions,
                        // permission check, which can be racy when a
                        // request gets retried with a new container.
                        r.Header.Set("Authorization", "Bearer "+conn.cluster.SystemRootToken)
+                       // We can't change r.URL.Path without
+                       // confusing WebDAV (request body and response
+                       // headers refer to the same paths) so we tell
+                       // keep-web to map the log collection onto the
+                       // containers/X/log/ namespace.
+                       r.Header.Set("X-Webdav-Prefix", "/arvados/v1/containers/"+ctr.UUID+"/log")
                },
        }
        if conn.cluster.TLS.Insecure {
index cc0011864e16939fc21605a763d561d71739bf7c..4884eda466f9662e5c9dbb65bc74eb9fecc16486 100644 (file)
@@ -29,6 +29,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        "golang.org/x/crypto/ssh"
        check "gopkg.in/check.v1"
@@ -53,7 +54,7 @@ func (s *ContainerGatewaySuite) SetUpTest(c *check.C) {
        authKey := fmt.Sprintf("%x", h.Sum(nil))
 
        rtr := router.New(s.localdb, router.Config{})
-       s.srv = httptest.NewUnstartedServer(rtr)
+       s.srv = httptest.NewUnstartedServer(httpserver.AddRequestIDs(httpserver.LogRequests(rtr)))
        s.srv.StartTLS()
        // the test setup doesn't use lib/service so
        // service.URLFromContext() returns nothing -- instead, this
@@ -201,7 +202,11 @@ func (s *ContainerGatewaySuite) TestDirectTCP(c *check.C) {
        }
 }
 
-func (s *ContainerGatewaySuite) setupLogCollection(c *check.C, files map[string]string) {
+func (s *ContainerGatewaySuite) setupLogCollection(c *check.C) {
+       files := map[string]string{
+               "stderr.txt":   "hello world\n",
+               "a/b/c/d.html": "<html></html>\n",
+       }
        client := arvados.NewClientFromEnv()
        ac, err := arvadosclient.New(client)
        c.Assert(err, check.IsNil)
@@ -226,14 +231,35 @@ func (s *ContainerGatewaySuite) setupLogCollection(c *check.C, files map[string]
        s.gw.LogCollection = cfs
 }
 
+func (s *ContainerGatewaySuite) saveLogAndCloseGateway(c *check.C) {
+       rootctx := ctrlctx.NewWithToken(s.ctx, s.cluster, s.cluster.SystemRootToken)
+       txt, err := s.gw.LogCollection.MarshalManifest(".")
+       c.Assert(err, check.IsNil)
+       coll, err := s.localdb.CollectionCreate(rootctx, arvados.CreateOptions{
+               Attrs: map[string]interface{}{
+                       "manifest_text": txt,
+               }})
+       c.Assert(err, check.IsNil)
+       _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
+               UUID: s.ctrUUID,
+               Attrs: map[string]interface{}{
+                       "log":             coll.PortableDataHash,
+                       "gateway_address": "",
+               }})
+       c.Assert(err, check.IsNil)
+       // gateway_address="" above already ensures localdb
+       // can't circumvent the keep-web proxy test by getting
+       // content from the container gateway; this is just
+       // extra insurance.
+       s.gw.LogCollection = nil
+}
+
 func (s *ContainerGatewaySuite) TestContainerLogViaTunnel(c *check.C) {
        forceProxyForTest = true
        defer func() { forceProxyForTest = false }()
 
        s.gw = s.setupGatewayWithTunnel(c)
-       s.setupLogCollection(c, map[string]string{
-               "stderr.txt": "hello world\n",
-       })
+       s.setupLogCollection(c)
 
        for _, broken := range []bool{false, true} {
                c.Logf("broken=%v", broken)
@@ -269,40 +295,17 @@ func (s *ContainerGatewaySuite) TestContainerLogViaTunnel(c *check.C) {
 }
 
 func (s *ContainerGatewaySuite) TestContainerLogViaGateway(c *check.C) {
-       s.testContainerLog(c, true)
+       s.setupLogCollection(c)
+       s.testContainerLog(c)
 }
 
 func (s *ContainerGatewaySuite) TestContainerLogViaKeepWeb(c *check.C) {
-       s.testContainerLog(c, false)
+       s.setupLogCollection(c)
+       s.saveLogAndCloseGateway(c)
+       s.testContainerLog(c)
 }
 
-func (s *ContainerGatewaySuite) testContainerLog(c *check.C, viaGateway bool) {
-       s.setupLogCollection(c, map[string]string{
-               "stderr.txt":   "hello world\n",
-               "a/b/c/d.html": "<html></html>\n",
-       })
-       if !viaGateway {
-               rootctx := ctrlctx.NewWithToken(s.ctx, s.cluster, s.cluster.SystemRootToken)
-               txt, err := s.gw.LogCollection.MarshalManifest(".")
-               c.Assert(err, check.IsNil)
-               coll, err := s.localdb.CollectionCreate(rootctx, arvados.CreateOptions{
-                       Attrs: map[string]interface{}{
-                               "manifest_text": txt,
-                       }})
-               c.Assert(err, check.IsNil)
-               _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
-                       UUID: s.ctrUUID,
-                       Attrs: map[string]interface{}{
-                               "log":             coll.PortableDataHash,
-                               "gateway_address": "",
-                       }})
-               c.Assert(err, check.IsNil)
-               // gateway_address="" above already ensures localdb
-               // can't circumvent the keep-web proxy test by getting
-               // content from the container gateway; this is just
-               // extra insurance.
-               s.gw.LogCollection = nil
-       }
+func (s *ContainerGatewaySuite) testContainerLog(c *check.C) {
        for _, trial := range []struct {
                method       string
                path         string
@@ -397,12 +400,19 @@ func (s *ContainerGatewaySuite) testContainerLog(c *check.C, viaGateway bool) {
 }
 
 func (s *ContainerGatewaySuite) TestContainerLogViaCadaver(c *check.C) {
+       s.setupLogCollection(c)
+
        out := s.runCadaver(c, arvadostest.ActiveToken, "/arvados/v1/containers/"+s.ctrUUID+"/log", "ls")
        c.Check(out, check.Matches, `(?ms).*stderr\.txt\s+12\s.*`)
        c.Check(out, check.Matches, `(?ms).*a\s+0\s.*`)
 
        out = s.runCadaver(c, arvadostest.ActiveTokenV2, "/arvados/v1/containers/"+s.ctrUUID+"/log", "get stderr.txt")
        c.Check(out, check.Matches, `(?ms).*Downloading .* to stderr\.txt: .* succeeded\..*`)
+
+       s.saveLogAndCloseGateway(c)
+
+       out = s.runCadaver(c, arvadostest.ActiveTokenV2, "/arvados/v1/containers/"+s.ctrUUID+"/log", "get stderr.txt")
+       c.Check(out, check.Matches, `(?ms).*Downloading .* to stderr\.txt: .* succeeded\..*`)
 }
 
 func (s *ContainerGatewaySuite) runCadaver(c *check.C, password, path, stdin string) string {
@@ -410,14 +420,9 @@ func (s *ContainerGatewaySuite) runCadaver(c *check.C, password, path, stdin str
        // just fail on TLS cert verification.
        s.srv.Close()
        rtr := router.New(s.localdb, router.Config{})
-       s.srv = httptest.NewUnstartedServer(rtr)
+       s.srv = httptest.NewUnstartedServer(httpserver.AddRequestIDs(httpserver.LogRequests(rtr)))
        s.srv.Start()
 
-       s.setupLogCollection(c, map[string]string{
-               "stderr.txt":   "hello world\n",
-               "a/b/c/d.html": "<html></html>\n",
-       })
-
        tempdir, err := ioutil.TempDir("", "localdb-test-")
        c.Assert(err, check.IsNil)
        defer os.RemoveAll(tempdir)
index 8e543114ce025ffc93d52427a0ba356a59c21861..053031a8cfdec1b8c8aea58d5cfe93f79a838448 100644 (file)
@@ -44,8 +44,10 @@ func (s *localdbSuite) TearDownSuite(c *check.C) {
 
 func (s *localdbSuite) SetUpTest(c *check.C) {
        *s = localdbSuite{}
+       logger := ctxlog.TestLogger(c)
        s.ctx, s.cancel = context.WithCancel(context.Background())
-       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+       s.ctx = ctxlog.Context(s.ctx, logger)
+       cfg, err := config.NewLoader(nil, logger).Load()
        c.Assert(err, check.IsNil)
        s.cluster, err = cfg.GetCluster("")
        c.Assert(err, check.IsNil)
index 72e5ab3108432a25e5d213681eff0e9c29aafab9..703c45701bd8c19caeec5f984c044ff2cb9d5b39 100644 (file)
@@ -662,11 +662,8 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int
                }
                ctx := auth.NewContext(req.Context(), creds)
                ctx = arvados.ContextWithRequestID(ctx, req.Header.Get("X-Request-Id"))
-               logger.WithFields(logrus.Fields{
-                       "apiEndpoint": endpoint,
-                       "apiOptsType": fmt.Sprintf("%T", opts),
-                       "apiOpts":     opts,
-               }).Debug("exec")
+               req = req.WithContext(ctx)
+
                // Extract the token UUIDs (or a placeholder for v1 tokens)
                var tokenUUIDs []string
                for _, t := range creds.Tokens {
@@ -683,7 +680,13 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int
                                tokenUUIDs = append(tokenUUIDs, "v1 token ending in "+end)
                        }
                }
-               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{"tokenUUIDs": tokenUUIDs})
+               httpserver.SetResponseLogFields(ctx, logrus.Fields{"tokenUUIDs": tokenUUIDs})
+
+               logger.WithFields(logrus.Fields{
+                       "apiEndpoint": endpoint,
+                       "apiOptsType": fmt.Sprintf("%T", opts),
+                       "apiOpts":     opts,
+               }).Debug("exec")
                resp, err := exec(ctx, opts)
                if err != nil {
                        logger.WithError(err).Debugf("returning error type %T", err)
index 97749d967d9c423990d750b3767057b039bc3191..3b22a13b0223145eccc186bc65f451eb217d4c64 100644 (file)
@@ -213,7 +213,26 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       pathParts := strings.Split(r.URL.Path[1:], "/")
+       webdavPrefix := ""
+       arvPath := r.URL.Path
+       if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
+               // Enable a proxy (e.g., container log handler in
+               // controller) to satisfy a request for path
+               // "/foo/bar/baz.txt" using content from
+               // "//abc123-4.internal/bar/baz.txt", by adding a
+               // request header "X-Webdav-Prefix: /foo"
+               if !strings.HasPrefix(arvPath, prefix) {
+                       http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
+                       return
+               }
+               arvPath = r.URL.Path[len(prefix):]
+               if arvPath == "" {
+                       arvPath = "/"
+               }
+               w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
+               webdavPrefix = prefix
+       }
+       pathParts := strings.Split(arvPath[1:], "/")
 
        var stripParts int
        var collectionID string
@@ -561,8 +580,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if r.Method == http.MethodGet {
                applyContentDispositionHdr(w, r, basename, attachment)
        }
+       if webdavPrefix == "" {
+               webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
+       }
        wh := webdav.Handler{
-               Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
+               Prefix: webdavPrefix,
                FileSystem: &webdavfs.FS{
                        FileSystem:    sessionFS,
                        Prefix:        fsprefix,