Merge branch '18631-shell-login-sync'
[arvados.git] / services / keep-web / handler.go
index 643ca4f587f51bc9b353ab29b4a82869d96578a8..ef61b06873c50661bb29f622bfb1b5e9a1097495 100644 (file)
@@ -6,6 +6,7 @@ package main
 
 import (
        "encoding/json"
+       "fmt"
        "html"
        "html/template"
        "io"
@@ -62,6 +63,9 @@ func parseCollectionIDFromDNSName(s string) string {
 
 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
 
+var notFoundMessage = "404 Not found\r\n\r\nThe requested path was not found, or you do not have permission to access it.\r"
+var unauthorizedMessage = "401 Unauthorized\r\n\r\nA valid Arvados token must be provided to access this resource.\r"
+
 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
 // PDH (even if it is a PDH with "+" replaced by " " or "-");
 // otherwise "".
@@ -181,14 +185,20 @@ var (
        }
 )
 
+func stripDefaultPort(host string) string {
+       // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
+       u := &url.URL{Host: host}
+       if p := u.Port(); p == "80" || p == "443" {
+               return strings.ToLower(u.Hostname())
+       } else {
+               return strings.ToLower(host)
+       }
+}
+
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        h.setupOnce.Do(h.setup)
 
-       remoteAddr := r.RemoteAddr
-       if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
-               remoteAddr = xff + "," + remoteAddr
-       }
        if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
                r.URL.Scheme = xfp
        }
@@ -227,6 +237,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
        }
 
+       if h.serveS3(w, r) {
+               return
+       }
+
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
        var stripParts int
@@ -237,14 +251,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        var attachment bool
        var useSiteFS bool
        credentialsOK := h.Config.cluster.Collections.TrustAllContent
+       reasonNotAcceptingCredentials := ""
 
-       if r.Host != "" && r.Host == h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host {
+       if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host) {
                credentialsOK = true
                attachment = true
        } else if r.FormValue("disposition") == "attachment" {
                attachment = true
        }
 
+       if !credentialsOK {
+               reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
+                       r.Host, h.Config.cluster.Services.WebDAVDownload.ExternalURL)
+       }
+
        if collectionID = parseCollectionIDFromDNSName(r.Host); collectionID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
@@ -275,11 +295,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        // data. Tokens provided with the request are
                        // ignored.
                        credentialsOK = false
+                       reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
                }
        }
 
        if collectionID == "" && !useSiteFS {
-               w.WriteHeader(http.StatusNotFound)
+               http.Error(w, notFoundMessage, http.StatusNotFound)
                return
        }
 
@@ -293,27 +314,32 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 
        formToken := r.FormValue("api_token")
-       if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
-               // The client provided an explicit token in the POST
-               // body. The Origin header indicates this *might* be
-               // an AJAX request, in which case redirect-with-cookie
-               // won't work: we should just serve the content in the
-               // POST response. This is safe because:
+       origin := r.Header.Get("Origin")
+       cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
+       safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
+       safeAttachment := attachment && r.URL.Query().Get("api_token") == ""
+       if formToken == "" {
+               // No token to use or redact.
+       } else if safeAjax || safeAttachment {
+               // If this is a cross-origin request, the URL won't
+               // appear in the browser's address bar, so
+               // substituting a clipboard-safe URL is pointless.
+               // Redirect-with-cookie wouldn't work anyway, because
+               // it's not safe to allow third-party use of our
+               // cookie.
                //
-               // * We're supplying an attachment, not inline
-               //   content, so we don't need to convert the POST to
-               //   a GET and avoid the "really resubmit form?"
-               //   problem.
-               //
-               // * The token isn't embedded in the URL, so we don't
-               //   need to worry about bookmarks and copy/paste.
+               // If we're supplying an attachment, we don't need to
+               // convert POST to GET to avoid the "really resubmit
+               // form?" problem, so provided the token isn't
+               // embedded in the URL, there's no reason to do
+               // redirect-with-cookie in this case either.
                reqTokens = append(reqTokens, formToken)
-       } else if formToken != "" && browserMethod[r.Method] {
-               // The client provided an explicit token in the query
-               // string, or a form in POST body. We must put the
-               // token in an HttpOnly cookie, and redirect to the
-               // same URL with the query param redacted and method =
-               // GET.
+       } else if browserMethod[r.Method] {
+               // If this is a page view, and the client provided a
+               // token via query string or POST body, we must put
+               // the token in an HttpOnly cookie, and redirect to an
+               // equivalent URL with the query param redacted and
+               // method = GET.
                h.seeOtherWithCookie(w, r, "", credentialsOK)
                return
        }
@@ -338,7 +364,19 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 
        if tokens == nil {
-               tokens = append(reqTokens, h.Config.cluster.Users.AnonymousUserToken)
+               tokens = reqTokens
+               if h.Config.cluster.Users.AnonymousUserToken != "" {
+                       tokens = append(tokens, h.Config.cluster.Users.AnonymousUserToken)
+               }
+       }
+
+       if tokens == nil {
+               if !credentialsOK {
+                       http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
+               } else {
+                       http.Error(w, fmt.Sprintf("No authorization token in request, and no anonymous user token is configured."), http.StatusUnauthorized)
+               }
+               return
        }
 
        if len(targetPath) > 0 && targetPath[0] == "_" {
@@ -360,6 +398,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        defer h.clientPool.Put(arv)
 
        var collection *arvados.Collection
+       var tokenUser *arvados.User
        tokenResult := make(map[string]int)
        for _, arv.ApiToken = range tokens {
                var err error
@@ -388,14 +427,14 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        // for additional credentials would just be
                        // confusing), or we don't even accept
                        // credentials at this path.
-                       w.WriteHeader(http.StatusNotFound)
+                       http.Error(w, notFoundMessage, http.StatusNotFound)
                        return
                }
                for _, t := range reqTokens {
                        if tokenResult[t] == 404 {
                                // The client provided valid token(s), but the
                                // collection was not found.
-                               w.WriteHeader(http.StatusNotFound)
+                               http.Error(w, notFoundMessage, http.StatusNotFound)
                                return
                        }
                }
@@ -409,7 +448,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // data that has been deleted.  Allow a referrer to
                // provide this context somehow?
                w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
-               w.WriteHeader(http.StatusUnauthorized)
+               http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
                return
        }
 
@@ -445,7 +484,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
+       // Check configured permission
+       _, sess, err := h.Config.Cache.GetSession(arv.ApiToken)
+       tokenUser, err = h.Config.Cache.GetTokenUser(arv.ApiToken)
+
        if webdavMethod[r.Method] {
+               if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+                       http.Error(w, "Not permitted", http.StatusForbidden)
+                       return
+               }
+               h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser)
+
                if writeMethod[r.Method] {
                        // Save the collection only if/when all
                        // webdav->filesystem operations succeed --
@@ -477,13 +526,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 
        openPath := "/" + strings.Join(targetPath, "/")
-       if f, err := fs.Open(openPath); os.IsNotExist(err) {
+       f, err := fs.Open(openPath)
+       if os.IsNotExist(err) {
                // Requested non-existent path
-               w.WriteHeader(http.StatusNotFound)
+               http.Error(w, notFoundMessage, http.StatusNotFound)
+               return
        } else if err != nil {
                // Some other (unexpected) error
                http.Error(w, "open: "+err.Error(), http.StatusInternalServerError)
-       } else if stat, err := f.Stat(); err != nil {
+               return
+       }
+       defer f.Close()
+       if stat, err := f.Stat(); err != nil {
                // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
                http.Error(w, "stat: "+err.Error(), http.StatusInternalServerError)
        } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
@@ -495,50 +549,63 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        } else if stat.IsDir() {
                h.serveDirectory(w, r, collection.Name, fs, openPath, true)
        } else {
+               if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+                       http.Error(w, "Not permitted", http.StatusForbidden)
+                       return
+               }
+               h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser)
+
                http.ServeContent(w, r, basename, stat.ModTime(), f)
-               if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && r.Header.Get("Range") == "" {
+               if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && w.WroteStatus() == http.StatusOK {
                        // If we wrote fewer bytes than expected, it's
                        // too late to change the real response code
                        // or send an error message to the client, but
                        // at least we can try to put some useful
                        // debugging info in the logs.
                        n, err := f.Read(make([]byte, 1024))
-                       ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), wrote, n, err)
-
+                       ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", stat.Size(), wrote, n, err)
                }
        }
 }
 
+func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) {
+       arv = h.clientPool.Get()
+       if arv == nil {
+               err = h.clientPool.Err()
+               return
+       }
+       release = func() { h.clientPool.Put(arv) }
+       arv.ApiToken = token
+       kc, err = keepclient.MakeKeepClient(arv)
+       if err != nil {
+               release()
+               return
+       }
+       kc.RequestID = reqID
+       client = (&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }).WithRequestID(reqID)
+       return
+}
+
 func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) {
        if len(tokens) == 0 {
                w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
-               http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
+               http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
                return
        }
        if writeMethod[r.Method] {
                http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
                return
        }
-       arv := h.clientPool.Get()
-       if arv == nil {
-               http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
-               return
-       }
-       defer h.clientPool.Put(arv)
-       arv.ApiToken = tokens[0]
 
-       kc, err := keepclient.MakeKeepClient(arv)
+       fs, sess, err := h.Config.Cache.GetSession(tokens[0])
        if err != nil {
-               http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError)
+               http.Error(w, err.Error(), http.StatusInternalServerError)
                return
        }
-       kc.RequestID = r.Header.Get("X-Request-Id")
-       client := (&arvados.Client{
-               APIHost:   arv.ApiServer,
-               AuthToken: arv.ApiToken,
-               Insecure:  arv.ApiInsecure,
-       }).WithRequestID(r.Header.Get("X-Request-Id"))
-       fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
        f, err := fs.Open(r.URL.Path)
        if os.IsNotExist(err) {
@@ -557,6 +624,14 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s
                }
                return
        }
+
+       tokenUser, err := h.Config.Cache.GetTokenUser(tokens[0])
+       if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+               http.Error(w, "Not permitted", http.StatusForbidden)
+               return
+       }
+       h.logUploadOrDownload(r, sess.arvadosclient, fs, r.URL.Path, nil, tokenUser)
+
        if r.Method == "GET" {
                _, basename := filepath.Split(r.URL.Path)
                applyContentDispositionHdr(w, r, basename, attachment)
@@ -756,6 +831,7 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
                        Value:    auth.EncodeTokenCookie([]byte(formToken)),
                        Path:     "/",
                        HttpOnly: true,
+                       SameSite: http.SameSiteLaxMode,
                })
        }
 
@@ -786,3 +862,125 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
        io.WriteString(w, html.EscapeString(redir))
        io.WriteString(w, `">Continue</A>`)
 }
+
+func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
+       var permitDownload bool
+       var permitUpload bool
+       if tokenUser != nil && tokenUser.IsAdmin {
+               permitUpload = h.Config.cluster.Collections.WebDAVPermission.Admin.Upload
+               permitDownload = h.Config.cluster.Collections.WebDAVPermission.Admin.Download
+       } else {
+               permitUpload = h.Config.cluster.Collections.WebDAVPermission.User.Upload
+               permitDownload = h.Config.cluster.Collections.WebDAVPermission.User.Download
+       }
+       if (method == "PUT" || method == "POST") && !permitUpload {
+               // Disallow operations that upload new files.
+               // Permit webdav operations that move existing files around.
+               return false
+       } else if method == "GET" && !permitDownload {
+               // Disallow downloading file contents.
+               // Permit webdav operations like PROPFIND that retrieve metadata
+               // but not file contents.
+               return false
+       }
+       return true
+}
+
+func (h *handler) logUploadOrDownload(
+       r *http.Request,
+       client *arvadosclient.ArvadosClient,
+       fs arvados.CustomFileSystem,
+       filepath string,
+       collection *arvados.Collection,
+       user *arvados.User) {
+
+       log := ctxlog.FromContext(r.Context())
+       props := make(map[string]string)
+       props["reqPath"] = r.URL.Path
+       var useruuid string
+       if user != nil {
+               log = log.WithField("user_uuid", user.UUID).
+                       WithField("user_full_name", user.FullName)
+               useruuid = user.UUID
+       } else {
+               useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Config.cluster.ClusterID)
+       }
+       if collection == nil && fs != nil {
+               collection, filepath = h.determineCollection(fs, filepath)
+       }
+       if collection != nil {
+               log = log.WithField("collection_uuid", collection.UUID).
+                       WithField("collection_file_path", filepath)
+               props["collection_uuid"] = collection.UUID
+               props["collection_file_path"] = filepath
+               // h.determineCollection populates the collection_uuid prop with the PDH, if
+               // this collection is being accessed via PDH. In that case, blank the
+               // collection_uuid field so that consumers of the log entries can rely on it
+               // being a UUID, or blank. The PDH remains available via the
+               // portable_data_hash property.
+               if props["collection_uuid"] == collection.PortableDataHash {
+                       props["collection_uuid"] = ""
+               }
+       }
+       if r.Method == "PUT" || r.Method == "POST" {
+               log.Info("File upload")
+               if h.Config.cluster.Collections.WebDAVLogEvents {
+                       go func() {
+                               lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+                                       "object_uuid": useruuid,
+                                       "event_type":  "file_upload",
+                                       "properties":  props}}
+                               err := client.Create("logs", lr, nil)
+                               if err != nil {
+                                       log.WithError(err).Error("Failed to create upload log event on API server")
+                               }
+                       }()
+               }
+       } else if r.Method == "GET" {
+               if collection != nil && collection.PortableDataHash != "" {
+                       log = log.WithField("portable_data_hash", collection.PortableDataHash)
+                       props["portable_data_hash"] = collection.PortableDataHash
+               }
+               log.Info("File download")
+               if h.Config.cluster.Collections.WebDAVLogEvents {
+                       go func() {
+                               lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+                                       "object_uuid": useruuid,
+                                       "event_type":  "file_download",
+                                       "properties":  props}}
+                               err := client.Create("logs", lr, nil)
+                               if err != nil {
+                                       log.WithError(err).Error("Failed to create download log event on API server")
+                               }
+                       }()
+               }
+       }
+}
+
+func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
+       segments := strings.Split(path, "/")
+       var i int
+       for i = 0; i < len(segments); i++ {
+               dir := append([]string{}, segments[0:i]...)
+               dir = append(dir, ".arvados#collection")
+               f, err := fs.OpenFile(strings.Join(dir, "/"), os.O_RDONLY, 0)
+               if f != nil {
+                       defer f.Close()
+               }
+               if err != nil {
+                       if !os.IsNotExist(err) {
+                               return nil, ""
+                       }
+                       continue
+               }
+               // err is nil so we found it.
+               decoder := json.NewDecoder(f)
+               var collection arvados.Collection
+               err = decoder.Decode(&collection)
+               if err != nil {
+                       return nil, ""
+               }
+               return &collection, strings.Join(segments[i:], "/")
+       }
+       return nil, ""
+}