//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package keepweb
import (
"encoding/json"
+ "fmt"
"html"
"html/template"
"io"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
- "git.arvados.org/arvados.git/sdk/go/health"
"git.arvados.org/arvados.git/sdk/go/httpserver"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"github.com/sirupsen/logrus"
)
type handler struct {
- Config *Config
- MetricsAPI http.Handler
- clientPool *arvadosclient.ClientPool
- setupOnce sync.Once
- healthHandler http.Handler
- webdavLS webdav.LockSystem
-}
-
-// parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
-// a UUID or URL-encoded PDH; otherwise "".
-func parseCollectionIDFromDNSName(s string) string {
- // Strip domain.
- if i := strings.IndexRune(s, '.'); i >= 0 {
- s = s[:i]
- }
- // Names like {uuid}--collections.example.com serve the same
- // purpose as {uuid}.collections.example.com but can reduce
- // cost/effort of using [additional] wildcard certificates.
- if i := strings.Index(s, "--"); i >= 0 {
- s = s[:i]
- }
- if arvadosclient.UUIDMatch(s) {
- return s
- }
- if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
- return pdh
- }
- return ""
+ Cache cache
+ Cluster *arvados.Cluster
+ clientPool *arvadosclient.ClientPool
+ setupOnce sync.Once
+ webdavLS webdav.LockSystem
}
var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
func (h *handler) setup() {
// Errors will be handled at the client pool.
- arv, _ := arvados.NewClientFromConfig(h.Config.cluster)
+ arv, _ := arvados.NewClientFromConfig(h.Cluster)
h.clientPool = arvadosclient.MakeClientPoolWith(arv)
- keepclient.RefreshServiceDiscoveryOnSIGHUP()
- keepclient.DefaultBlockCache.MaxBlocks = h.Config.cluster.Collections.WebDAVCache.MaxBlockEntries
-
- h.healthHandler = &health.Handler{
- Token: h.Config.cluster.ManagementToken,
- Prefix: "/_health/",
- }
+ keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
// Even though we don't accept LOCK requests, every webdav
// handler must have a non-nil LockSystem.
}
)
+func stripDefaultPort(host string) string {
+ // Will consider port 80 and port 443 to be the same vhost. I think that's fine.
+ u := &url.URL{Host: host}
+ if p := u.Port(); p == "80" || p == "443" {
+ return strings.ToLower(u.Hostname())
+ } else {
+ return strings.ToLower(host)
+ }
+}
+
+// CheckHealth implements service.Handler.
+func (h *handler) CheckHealth() error {
+ return nil
+}
+
+// Done implements service.Handler.
+func (h *handler) Done() <-chan struct{} {
+ return nil
+}
+
// ServeHTTP implements http.Handler.
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
h.setupOnce.Do(h.setup)
w := httpserver.WrapResponseWriter(wOrig)
- if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
- h.healthHandler.ServeHTTP(w, r)
- return
- }
-
if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
if !browserMethod[method] && !webdavMethod[method] {
w.WriteHeader(http.StatusMethodNotAllowed)
var pathToken bool
var attachment bool
var useSiteFS bool
- credentialsOK := h.Config.cluster.Collections.TrustAllContent
+ credentialsOK := h.Cluster.Collections.TrustAllContent
+ reasonNotAcceptingCredentials := ""
- if r.Host != "" && r.Host == h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host {
+ if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
credentialsOK = true
attachment = true
} else if r.FormValue("disposition") == "attachment" {
attachment = true
}
- if collectionID = parseCollectionIDFromDNSName(r.Host); collectionID != "" {
+ if !credentialsOK {
+ reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
+ r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
+ }
+
+ if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
// http://ID.collections.example/PATH...
credentialsOK = true
} else if r.URL.Path == "/status.json" {
h.serveStatus(w, r)
return
- } else if strings.HasPrefix(r.URL.Path, "/metrics") {
- h.MetricsAPI.ServeHTTP(w, r)
- return
} else if siteFSDir[pathParts[0]] {
useSiteFS = true
} else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
// data. Tokens provided with the request are
// ignored.
credentialsOK = false
+ reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
}
}
}
formToken := r.FormValue("api_token")
- if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
- // The client provided an explicit token in the POST
- // body. The Origin header indicates this *might* be
- // an AJAX request, in which case redirect-with-cookie
- // won't work: we should just serve the content in the
- // POST response. This is safe because:
- //
- // * We're supplying an attachment, not inline
- // content, so we don't need to convert the POST to
- // a GET and avoid the "really resubmit form?"
- // problem.
+ origin := r.Header.Get("Origin")
+ cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
+ safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
+ safeAttachment := attachment && r.URL.Query().Get("api_token") == ""
+ if formToken == "" {
+ // No token to use or redact.
+ } else if safeAjax || safeAttachment {
+ // If this is a cross-origin request, the URL won't
+ // appear in the browser's address bar, so
+ // substituting a clipboard-safe URL is pointless.
+ // Redirect-with-cookie wouldn't work anyway, because
+ // it's not safe to allow third-party use of our
+ // cookie.
//
- // * The token isn't embedded in the URL, so we don't
- // need to worry about bookmarks and copy/paste.
+ // If we're supplying an attachment, we don't need to
+ // convert POST to GET to avoid the "really resubmit
+ // form?" problem, so provided the token isn't
+ // embedded in the URL, there's no reason to do
+ // redirect-with-cookie in this case either.
reqTokens = append(reqTokens, formToken)
- } else if formToken != "" && browserMethod[r.Method] {
- // The client provided an explicit token in the query
- // string, or a form in POST body. We must put the
- // token in an HttpOnly cookie, and redirect to the
- // same URL with the query param redacted and method =
- // GET.
+ } else if browserMethod[r.Method] {
+ // If this is a page view, and the client provided a
+ // token via query string or POST body, we must put
+ // the token in an HttpOnly cookie, and redirect to an
+ // equivalent URL with the query param redacted and
+ // method = GET.
h.seeOtherWithCookie(w, r, "", credentialsOK)
return
}
}
if tokens == nil {
- tokens = append(reqTokens, h.Config.cluster.Users.AnonymousUserToken)
+ tokens = reqTokens
+ if h.Cluster.Users.AnonymousUserToken != "" {
+ tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
+ }
+ }
+
+ if tokens == nil {
+ if !credentialsOK {
+ http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
+ } else {
+ http.Error(w, fmt.Sprintf("No authorization token in request, and no anonymous user token is configured."), http.StatusUnauthorized)
+ }
+ return
}
if len(targetPath) > 0 && targetPath[0] == "_" {
defer h.clientPool.Put(arv)
var collection *arvados.Collection
+ var tokenUser *arvados.User
tokenResult := make(map[string]int)
for _, arv.ApiToken = range tokens {
var err error
- collection, err = h.Config.Cache.Get(arv, collectionID, forceReload)
+ collection, err = h.Cache.Get(arv, collectionID, forceReload)
if err == nil {
// Success
break
}
}
// The client's token was invalid (e.g., expired), or
- // the client didn't even provide one. Propagate the
- // 401 to encourage the client to use a [different]
- // token.
+ // the client didn't even provide one. Redirect to
+ // workbench2's login-and-redirect-to-download url if
+ // this is a browser navigation request. (The redirect
+ // flow can't preserve the original method if it's not
+ // GET, and doesn't make sense if the UA is a
+ // command-line tool, is trying to load an inline
+ // image, etc.; in these cases, there's nothing we can
+ // do, so return 401 unauthorized.)
+ //
+ // Note Sec-Fetch-Mode is sent by all non-EOL
+ // browsers, except Safari.
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
//
// TODO(TC): This response would be confusing to
// someone trying (anonymously) to download public
// data that has been deleted. Allow a referrer to
// provide this context somehow?
- w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
- http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
+ if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
+ target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
+ redirkey := "redirectToPreview"
+ if attachment {
+ redirkey = "redirectToDownload"
+ }
+ callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
+ // target.RawQuery = url.Values{redirkey:
+ // {target}}.Encode() would be the obvious
+ // thing to do here, but wb2 doesn't decode
+ // this as a query param -- it takes
+ // everything after "${redirkey}=" as the
+ // target URL. If we encode "/" as "%2F" etc.,
+ // the redirect won't work.
+ target.RawQuery = redirkey + "=" + callback
+ w.Header().Add("Location", target.String())
+ w.WriteHeader(http.StatusSeeOther)
+ } else {
+ w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
+ http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
+ }
return
}
return
}
+ // Check configured permission
+ _, sess, err := h.Cache.GetSession(arv.ApiToken)
+ tokenUser, err = h.Cache.GetTokenUser(arv.ApiToken)
+
if webdavMethod[r.Method] {
+ if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+ http.Error(w, "Not permitted", http.StatusForbidden)
+ return
+ }
+ h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser)
+
if writeMethod[r.Method] {
// Save the collection only if/when all
// webdav->filesystem operations succeed --
ResponseWriter: w,
logger: ctxlog.FromContext(r.Context()),
update: func() error {
- return h.Config.Cache.Update(client, *collection, writefs)
+ return h.Cache.Update(client, *collection, writefs)
}}
}
h := webdav.Handler{
}
openPath := "/" + strings.Join(targetPath, "/")
- if f, err := fs.Open(openPath); os.IsNotExist(err) {
+ f, err := fs.Open(openPath)
+ if os.IsNotExist(err) {
// Requested non-existent path
http.Error(w, notFoundMessage, http.StatusNotFound)
+ return
} else if err != nil {
// Some other (unexpected) error
http.Error(w, "open: "+err.Error(), http.StatusInternalServerError)
- } else if stat, err := f.Stat(); err != nil {
+ return
+ }
+ defer f.Close()
+ if stat, err := f.Stat(); err != nil {
// Can't get Size/IsDir (shouldn't happen with a collectionFS!)
http.Error(w, "stat: "+err.Error(), http.StatusInternalServerError)
} else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
} else if stat.IsDir() {
h.serveDirectory(w, r, collection.Name, fs, openPath, true)
} else {
+ if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+ http.Error(w, "Not permitted", http.StatusForbidden)
+ return
+ }
+ h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser)
+
http.ServeContent(w, r, basename, stat.ModTime(), f)
- if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && r.Header.Get("Range") == "" {
+ if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && w.WroteStatus() == http.StatusOK {
// If we wrote fewer bytes than expected, it's
// too late to change the real response code
// or send an error message to the client, but
// at least we can try to put some useful
// debugging info in the logs.
n, err := f.Read(make([]byte, 1024))
- ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), wrote, n, err)
-
+ ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", stat.Size(), wrote, n, err)
}
}
}
func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) {
arv = h.clientPool.Get()
if arv == nil {
- return nil, nil, nil, nil, err
+ err = h.clientPool.Err()
+ return
}
release = func() { h.clientPool.Put(arv) }
arv.ApiToken = token
http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
return
}
- _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), tokens[0])
+
+ fs, sess, err := h.Cache.GetSession(tokens[0])
if err != nil {
- http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
- defer release()
-
- fs := client.SiteFileSystem(kc)
- fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
+ fs.ForwardSlashNameSubstitution(h.Cluster.Collections.ForwardSlashNameSubstitution)
f, err := fs.Open(r.URL.Path)
if os.IsNotExist(err) {
http.Error(w, err.Error(), http.StatusNotFound)
}
return
}
+
+ tokenUser, err := h.Cache.GetTokenUser(tokens[0])
+ if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+ http.Error(w, "Not permitted", http.StatusForbidden)
+ return
+ }
+ h.logUploadOrDownload(r, sess.arvadosclient, fs, r.URL.Path, nil, tokenUser)
+
if r.Method == "GET" {
_, basename := filepath.Split(r.URL.Path)
applyContentDispositionHdr(w, r, basename, attachment)
Value: auth.EncodeTokenCookie([]byte(formToken)),
Path: "/",
HttpOnly: true,
+ SameSite: http.SameSiteLaxMode,
})
}
io.WriteString(w, html.EscapeString(redir))
io.WriteString(w, `">Continue</A>`)
}
+
+func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
+ var permitDownload bool
+ var permitUpload bool
+ if tokenUser != nil && tokenUser.IsAdmin {
+ permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
+ permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
+ } else {
+ permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
+ permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
+ }
+ if (method == "PUT" || method == "POST") && !permitUpload {
+ // Disallow operations that upload new files.
+ // Permit webdav operations that move existing files around.
+ return false
+ } else if method == "GET" && !permitDownload {
+ // Disallow downloading file contents.
+ // Permit webdav operations like PROPFIND that retrieve metadata
+ // but not file contents.
+ return false
+ }
+ return true
+}
+
+func (h *handler) logUploadOrDownload(
+ r *http.Request,
+ client *arvadosclient.ArvadosClient,
+ fs arvados.CustomFileSystem,
+ filepath string,
+ collection *arvados.Collection,
+ user *arvados.User) {
+
+ log := ctxlog.FromContext(r.Context())
+ props := make(map[string]string)
+ props["reqPath"] = r.URL.Path
+ var useruuid string
+ if user != nil {
+ log = log.WithField("user_uuid", user.UUID).
+ WithField("user_full_name", user.FullName)
+ useruuid = user.UUID
+ } else {
+ useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
+ }
+ if collection == nil && fs != nil {
+ collection, filepath = h.determineCollection(fs, filepath)
+ }
+ if collection != nil {
+ log = log.WithField("collection_file_path", filepath)
+ props["collection_file_path"] = filepath
+ // h.determineCollection populates the collection_uuid
+ // prop with the PDH, if this collection is being
+ // accessed via PDH. For logging, we use a different
+ // field depending on whether it's a UUID or PDH.
+ if len(collection.UUID) > 32 {
+ log = log.WithField("portable_data_hash", collection.UUID)
+ props["portable_data_hash"] = collection.UUID
+ } else {
+ log = log.WithField("collection_uuid", collection.UUID)
+ props["collection_uuid"] = collection.UUID
+ }
+ }
+ if r.Method == "PUT" || r.Method == "POST" {
+ log.Info("File upload")
+ if h.Cluster.Collections.WebDAVLogEvents {
+ go func() {
+ lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+ "object_uuid": useruuid,
+ "event_type": "file_upload",
+ "properties": props}}
+ err := client.Create("logs", lr, nil)
+ if err != nil {
+ log.WithError(err).Error("Failed to create upload log event on API server")
+ }
+ }()
+ }
+ } else if r.Method == "GET" {
+ if collection != nil && collection.PortableDataHash != "" {
+ log = log.WithField("portable_data_hash", collection.PortableDataHash)
+ props["portable_data_hash"] = collection.PortableDataHash
+ }
+ log.Info("File download")
+ if h.Cluster.Collections.WebDAVLogEvents {
+ go func() {
+ lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+ "object_uuid": useruuid,
+ "event_type": "file_download",
+ "properties": props}}
+ err := client.Create("logs", lr, nil)
+ if err != nil {
+ log.WithError(err).Error("Failed to create download log event on API server")
+ }
+ }()
+ }
+ }
+}
+
+func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
+ target := strings.TrimSuffix(path, "/")
+ for {
+ fi, err := fs.Stat(target)
+ if err != nil {
+ return nil, ""
+ }
+ switch src := fi.Sys().(type) {
+ case *arvados.Collection:
+ return src, strings.TrimPrefix(path[len(target):], "/")
+ case *arvados.Group:
+ return nil, ""
+ default:
+ if _, ok := src.(error); ok {
+ return nil, ""
+ }
+ }
+ // Try parent
+ cut := strings.LastIndexByte(target, '/')
+ if cut < 0 {
+ return nil, ""
+ }
+ target = target[:cut]
+ }
+}