From d7f388108615d981636e464785129ce4a958a7f5 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 28 Mar 2023 15:51:59 -0400 Subject: [PATCH] 19889: Preserve WebDAV path when proxying to keep-web. WebDAV clients expect the path in the server response to match the request. Previously when proxying to keep-web we were rewriting the request from /arvados/v1/containers/{uuid}/log/stderr.txt to /by_id/{pdh}/stderr.txt, so the response referred to /by_id/{pdh}/stderr.txt. With this change, we leave the request path alone and use a new X-Webdav-Prefix request header (/arvados/v1/containers/{uuid}/log in this case) to tell keep-web to strip that part when accessing the virtual filesystem. New test uses cadaver, which fails on the previous version with Could not access /arvados/v1/containers/zzzzz-dz642-queuedcontainer/log/ (not WebDAV-enabled?): Did not find a collection resource. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/controller/localdb/container_gateway.go | 20 +++-- .../localdb/container_gateway_test.go | 85 ++++++++++--------- lib/controller/localdb/localdb_test.go | 4 +- lib/controller/router/router.go | 15 ++-- services/keep-web/handler.go | 26 +++++- 5 files changed, 92 insertions(+), 58 deletions(-) diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go index 6cf787fcb4..77507901ab 100644 --- a/lib/controller/localdb/container_gateway.go +++ b/lib/controller/localdb/container_gateway.go @@ -191,15 +191,11 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions, } proxy := &httputil.ReverseProxy{ Director: func(r *http.Request) { - r.URL = &url.URL{ - Scheme: webdavBase.Scheme, - Host: webdavBase.Host, - Path: "/by_id/" + url.PathEscape(ctr.Log) + opts.Path, - } - // Our outgoing Host header must match - // WebDAVDownload.ExternalURL, otherwise - // keep-web does not accept an auth token. - r.Host = conn.cluster.Services.WebDAVDownload.ExternalURL.Host + r.URL.Scheme = webdavBase.Scheme + r.URL.Host = webdavBase.Host + // Outgoing Host header specifies the + // collection ID. + r.Host = strings.Replace(ctr.Log, "+", "-", -1) + ".internal" // We already checked permission on the // container, so we can use a root token here // instead of counting on the "access to log @@ -207,6 +203,12 @@ func (conn *Conn) serveContainerLogViaKeepWeb(opts arvados.ContainerLogOptions, // permission check, which can be racy when a // request gets retried with a new container. r.Header.Set("Authorization", "Bearer "+conn.cluster.SystemRootToken) + // We can't change r.URL.Path without + // confusing WebDAV (request body and response + // headers refer to the same paths) so we tell + // keep-web to map the log collection onto the + // containers/X/log/ namespace. + r.Header.Set("X-Webdav-Prefix", "/arvados/v1/containers/"+ctr.UUID+"/log") }, } if conn.cluster.TLS.Insecure { diff --git a/lib/controller/localdb/container_gateway_test.go b/lib/controller/localdb/container_gateway_test.go index cc0011864e..4884eda466 100644 --- a/lib/controller/localdb/container_gateway_test.go +++ b/lib/controller/localdb/container_gateway_test.go @@ -29,6 +29,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/sdk/go/httpserver" "git.arvados.org/arvados.git/sdk/go/keepclient" "golang.org/x/crypto/ssh" check "gopkg.in/check.v1" @@ -53,7 +54,7 @@ func (s *ContainerGatewaySuite) SetUpTest(c *check.C) { authKey := fmt.Sprintf("%x", h.Sum(nil)) rtr := router.New(s.localdb, router.Config{}) - s.srv = httptest.NewUnstartedServer(rtr) + s.srv = httptest.NewUnstartedServer(httpserver.AddRequestIDs(httpserver.LogRequests(rtr))) s.srv.StartTLS() // the test setup doesn't use lib/service so // service.URLFromContext() returns nothing -- instead, this @@ -201,7 +202,11 @@ func (s *ContainerGatewaySuite) TestDirectTCP(c *check.C) { } } -func (s *ContainerGatewaySuite) setupLogCollection(c *check.C, files map[string]string) { +func (s *ContainerGatewaySuite) setupLogCollection(c *check.C) { + files := map[string]string{ + "stderr.txt": "hello world\n", + "a/b/c/d.html": "\n", + } client := arvados.NewClientFromEnv() ac, err := arvadosclient.New(client) c.Assert(err, check.IsNil) @@ -226,14 +231,35 @@ func (s *ContainerGatewaySuite) setupLogCollection(c *check.C, files map[string] s.gw.LogCollection = cfs } +func (s *ContainerGatewaySuite) saveLogAndCloseGateway(c *check.C) { + rootctx := ctrlctx.NewWithToken(s.ctx, s.cluster, s.cluster.SystemRootToken) + txt, err := s.gw.LogCollection.MarshalManifest(".") + c.Assert(err, check.IsNil) + coll, err := s.localdb.CollectionCreate(rootctx, arvados.CreateOptions{ + Attrs: map[string]interface{}{ + "manifest_text": txt, + }}) + c.Assert(err, check.IsNil) + _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{ + UUID: s.ctrUUID, + Attrs: map[string]interface{}{ + "log": coll.PortableDataHash, + "gateway_address": "", + }}) + c.Assert(err, check.IsNil) + // gateway_address="" above already ensures localdb + // can't circumvent the keep-web proxy test by getting + // content from the container gateway; this is just + // extra insurance. + s.gw.LogCollection = nil +} + func (s *ContainerGatewaySuite) TestContainerLogViaTunnel(c *check.C) { forceProxyForTest = true defer func() { forceProxyForTest = false }() s.gw = s.setupGatewayWithTunnel(c) - s.setupLogCollection(c, map[string]string{ - "stderr.txt": "hello world\n", - }) + s.setupLogCollection(c) for _, broken := range []bool{false, true} { c.Logf("broken=%v", broken) @@ -269,40 +295,17 @@ func (s *ContainerGatewaySuite) TestContainerLogViaTunnel(c *check.C) { } func (s *ContainerGatewaySuite) TestContainerLogViaGateway(c *check.C) { - s.testContainerLog(c, true) + s.setupLogCollection(c) + s.testContainerLog(c) } func (s *ContainerGatewaySuite) TestContainerLogViaKeepWeb(c *check.C) { - s.testContainerLog(c, false) + s.setupLogCollection(c) + s.saveLogAndCloseGateway(c) + s.testContainerLog(c) } -func (s *ContainerGatewaySuite) testContainerLog(c *check.C, viaGateway bool) { - s.setupLogCollection(c, map[string]string{ - "stderr.txt": "hello world\n", - "a/b/c/d.html": "\n", - }) - if !viaGateway { - rootctx := ctrlctx.NewWithToken(s.ctx, s.cluster, s.cluster.SystemRootToken) - txt, err := s.gw.LogCollection.MarshalManifest(".") - c.Assert(err, check.IsNil) - coll, err := s.localdb.CollectionCreate(rootctx, arvados.CreateOptions{ - Attrs: map[string]interface{}{ - "manifest_text": txt, - }}) - c.Assert(err, check.IsNil) - _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{ - UUID: s.ctrUUID, - Attrs: map[string]interface{}{ - "log": coll.PortableDataHash, - "gateway_address": "", - }}) - c.Assert(err, check.IsNil) - // gateway_address="" above already ensures localdb - // can't circumvent the keep-web proxy test by getting - // content from the container gateway; this is just - // extra insurance. - s.gw.LogCollection = nil - } +func (s *ContainerGatewaySuite) testContainerLog(c *check.C) { for _, trial := range []struct { method string path string @@ -397,12 +400,19 @@ func (s *ContainerGatewaySuite) testContainerLog(c *check.C, viaGateway bool) { } func (s *ContainerGatewaySuite) TestContainerLogViaCadaver(c *check.C) { + s.setupLogCollection(c) + out := s.runCadaver(c, arvadostest.ActiveToken, "/arvados/v1/containers/"+s.ctrUUID+"/log", "ls") c.Check(out, check.Matches, `(?ms).*stderr\.txt\s+12\s.*`) c.Check(out, check.Matches, `(?ms).*a\s+0\s.*`) out = s.runCadaver(c, arvadostest.ActiveTokenV2, "/arvados/v1/containers/"+s.ctrUUID+"/log", "get stderr.txt") c.Check(out, check.Matches, `(?ms).*Downloading .* to stderr\.txt: .* succeeded\..*`) + + s.saveLogAndCloseGateway(c) + + out = s.runCadaver(c, arvadostest.ActiveTokenV2, "/arvados/v1/containers/"+s.ctrUUID+"/log", "get stderr.txt") + c.Check(out, check.Matches, `(?ms).*Downloading .* to stderr\.txt: .* succeeded\..*`) } func (s *ContainerGatewaySuite) runCadaver(c *check.C, password, path, stdin string) string { @@ -410,14 +420,9 @@ func (s *ContainerGatewaySuite) runCadaver(c *check.C, password, path, stdin str // just fail on TLS cert verification. s.srv.Close() rtr := router.New(s.localdb, router.Config{}) - s.srv = httptest.NewUnstartedServer(rtr) + s.srv = httptest.NewUnstartedServer(httpserver.AddRequestIDs(httpserver.LogRequests(rtr))) s.srv.Start() - s.setupLogCollection(c, map[string]string{ - "stderr.txt": "hello world\n", - "a/b/c/d.html": "\n", - }) - tempdir, err := ioutil.TempDir("", "localdb-test-") c.Assert(err, check.IsNil) defer os.RemoveAll(tempdir) diff --git a/lib/controller/localdb/localdb_test.go b/lib/controller/localdb/localdb_test.go index 8e543114ce..053031a8cf 100644 --- a/lib/controller/localdb/localdb_test.go +++ b/lib/controller/localdb/localdb_test.go @@ -44,8 +44,10 @@ func (s *localdbSuite) TearDownSuite(c *check.C) { func (s *localdbSuite) SetUpTest(c *check.C) { *s = localdbSuite{} + logger := ctxlog.TestLogger(c) s.ctx, s.cancel = context.WithCancel(context.Background()) - cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load() + s.ctx = ctxlog.Context(s.ctx, logger) + cfg, err := config.NewLoader(nil, logger).Load() c.Assert(err, check.IsNil) s.cluster, err = cfg.GetCluster("") c.Assert(err, check.IsNil) diff --git a/lib/controller/router/router.go b/lib/controller/router/router.go index 72e5ab3108..703c45701b 100644 --- a/lib/controller/router/router.go +++ b/lib/controller/router/router.go @@ -662,11 +662,8 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int } ctx := auth.NewContext(req.Context(), creds) ctx = arvados.ContextWithRequestID(ctx, req.Header.Get("X-Request-Id")) - logger.WithFields(logrus.Fields{ - "apiEndpoint": endpoint, - "apiOptsType": fmt.Sprintf("%T", opts), - "apiOpts": opts, - }).Debug("exec") + req = req.WithContext(ctx) + // Extract the token UUIDs (or a placeholder for v1 tokens) var tokenUUIDs []string for _, t := range creds.Tokens { @@ -683,7 +680,13 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int tokenUUIDs = append(tokenUUIDs, "v1 token ending in "+end) } } - httpserver.SetResponseLogFields(req.Context(), logrus.Fields{"tokenUUIDs": tokenUUIDs}) + httpserver.SetResponseLogFields(ctx, logrus.Fields{"tokenUUIDs": tokenUUIDs}) + + logger.WithFields(logrus.Fields{ + "apiEndpoint": endpoint, + "apiOptsType": fmt.Sprintf("%T", opts), + "apiOpts": opts, + }).Debug("exec") resp, err := exec(ctx, opts) if err != nil { logger.WithError(err).Debugf("returning error type %T", err) diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 97749d967d..3b22a13b02 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -213,7 +213,26 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { return } - pathParts := strings.Split(r.URL.Path[1:], "/") + webdavPrefix := "" + arvPath := r.URL.Path + if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" { + // Enable a proxy (e.g., container log handler in + // controller) to satisfy a request for path + // "/foo/bar/baz.txt" using content from + // "//abc123-4.internal/bar/baz.txt", by adding a + // request header "X-Webdav-Prefix: /foo" + if !strings.HasPrefix(arvPath, prefix) { + http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest) + return + } + arvPath = r.URL.Path[len(prefix):] + if arvPath == "" { + arvPath = "/" + } + w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary")) + webdavPrefix = prefix + } + pathParts := strings.Split(arvPath[1:], "/") var stripParts int var collectionID string @@ -561,8 +580,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { if r.Method == http.MethodGet { applyContentDispositionHdr(w, r, basename, attachment) } + if webdavPrefix == "" { + webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/") + } wh := webdav.Handler{ - Prefix: "/" + strings.Join(pathParts[:stripParts], "/"), + Prefix: webdavPrefix, FileSystem: &webdavfs.FS{ FileSystem: sessionFS, Prefix: fsprefix, -- 2.30.2