19088: Export collection/project properties as x-amz-meta tags.
[arvados.git] / services / keep-web / s3.go
index f03ff01b8127d6ee1d1056ee72bee5bd6507d26b..4117dafbc6f248e14cafa41cbfdd0193d1408fc9 100644 (file)
@@ -2,18 +2,20 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepweb
 
 import (
        "crypto/hmac"
        "crypto/sha256"
        "encoding/base64"
+       "encoding/json"
        "encoding/xml"
        "errors"
        "fmt"
        "hash"
        "io"
        "net/http"
+       "net/textproto"
        "net/url"
        "os"
        "path/filepath"
@@ -24,7 +26,9 @@ import (
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
        "github.com/AdRoll/goamz/s3"
 )
 
@@ -136,15 +140,39 @@ func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string,
                }
        }
 
-       normalizedURL := *r.URL
-       normalizedURL.RawPath = ""
-       normalizedURL.Path = reMultipleSlashChars.ReplaceAllString(normalizedURL.Path, "/")
-       ctxlog.FromContext(r.Context()).Infof("escapedPath %s", normalizedURL.EscapedPath())
-       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedURL.EscapedPath(), s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
+       normalizedPath := normalizePath(r.URL.Path)
+       ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
+       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
        ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
        return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
 }
 
+func normalizePath(s string) string {
+       // (url.URL).EscapedPath() would be incorrect here. AWS
+       // documentation specifies the URL path should be normalized
+       // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
+       // / "." / "_" / "~". The implication is that everything other
+       // than those chars (and "/") _must_ be percent-encoded --
+       // even chars like ";" and "," that are not normally
+       // percent-encoded in paths.
+       out := ""
+       for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
+               if (c >= 'a' && c <= 'z') ||
+                       (c >= 'A' && c <= 'Z') ||
+                       (c >= '0' && c <= '9') ||
+                       c == '-' ||
+                       c == '.' ||
+                       c == '_' ||
+                       c == '~' ||
+                       c == '/' {
+                       out += string(c)
+               } else {
+                       out += fmt.Sprintf("%%%02X", c)
+               }
+       }
+       return out
+}
+
 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
        // scope is {datestamp}/{region}/{service}/aws4_request
        drs := strings.Split(scope, "/")
@@ -196,8 +224,8 @@ func (h *handler) checks3signature(r *http.Request) (string, error) {
        }
 
        client := (&arvados.Client{
-               APIHost:  h.Config.cluster.Services.Controller.ExternalURL.Host,
-               Insecure: h.Config.cluster.TLS.Insecure,
+               APIHost:  h.Cluster.Services.Controller.ExternalURL.Host,
+               Insecure: h.Cluster.TLS.Insecure,
        }).WithRequestID(r.Header.Get("X-Request-Id"))
        var aca arvados.APIClientAuthorization
        var secret string
@@ -205,7 +233,7 @@ func (h *handler) checks3signature(r *http.Request) (string, error) {
        if len(key) == 27 && key[5:12] == "-gj3su-" {
                // Access key is the UUID of an Arvados token, secret
                // key is the secret part.
-               ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+h.Config.cluster.SystemRootToken)
+               ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+h.Cluster.SystemRootToken)
                err = client.RequestAndDecodeContext(ctx, &aca, "GET", "arvados/v1/api_client_authorizations/"+key, nil, nil)
                secret = aca.APIToken
        } else {
@@ -285,32 +313,38 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 
        var err error
        var fs arvados.CustomFileSystem
+       var arvclient *arvadosclient.ArvadosClient
        if r.Method == http.MethodGet || r.Method == http.MethodHead {
                // Use a single session (cached FileSystem) across
                // multiple read requests.
-               fs, err = h.Config.Cache.GetSession(token)
+               var sess *cachedSession
+               fs, sess, err = h.Cache.GetSession(token)
                if err != nil {
                        s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
                        return true
                }
+               arvclient = sess.arvadosclient
        } else {
                // Create a FileSystem for this request, to avoid
                // exposing incomplete write operations to concurrent
                // requests.
-               _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), token)
+               var kc *keepclient.KeepClient
+               var release func()
+               var client *arvados.Client
+               arvclient, kc, client, release, err = h.getClients(r.Header.Get("X-Request-Id"), token)
                if err != nil {
                        s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
                        return true
                }
                defer release()
                fs = client.SiteFileSystem(kc)
-               fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
+               fs.ForwardSlashNameSubstitution(h.Cluster.Collections.ForwardSlashNameSubstitution)
        }
 
        var objectNameGiven bool
        var bucketName string
        fspath := "/by_id"
-       if id := parseCollectionIDFromDNSName(r.Host); id != "" {
+       if id := arvados.CollectionIDFromDNSName(r.Host); id != "" {
                fspath += "/" + id
                bucketName = id
                objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 0
@@ -333,7 +367,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        w.Header().Set("Content-Type", "application/xml")
                        io.WriteString(w, xml.Header)
                        fmt.Fprintln(w, `<LocationConstraint><LocationConstraint xmlns="http://s3.amazonaws.com/doc/2006-03-01/">`+
-                               h.Config.cluster.ClusterID+
+                               h.Cluster.ClusterID+
                                `</LocationConstraint></LocationConstraint>`)
                } else if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
                        // GetBucketWebsite ("GET /bucketid/?website"), GetBucketTagging, etc.
@@ -353,6 +387,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                        if err == nil && fi.IsDir() {
+                               setFileInfoHeaders(w.Header(), fs, fspath)
                                w.WriteHeader(http.StatusOK)
                        } else if os.IsNotExist(err) {
                                s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -361,7 +396,8 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        }
                        return true
                }
-               if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Config.cluster.Collections.S3FolderObjects {
+               if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
+                       setFileInfoHeaders(w.Header(), fs, fspath)
                        w.Header().Set("Content-Type", "application/x-directory")
                        w.WriteHeader(http.StatusOK)
                        return true
@@ -372,9 +408,18 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        s3ErrorResponse(w, NoSuchKey, "The specified key does not exist.", r.URL.Path, http.StatusNotFound)
                        return true
                }
+
+               tokenUser, err := h.Cache.GetTokenUser(token)
+               if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+                       http.Error(w, "Not permitted", http.StatusForbidden)
+                       return true
+               }
+               h.logUploadOrDownload(r, arvclient, fs, fspath, nil, tokenUser)
+
                // shallow copy r, and change URL path
                r := *r
                r.URL.Path = fspath
+               setFileInfoHeaders(w.Header(), fs, fspath)
                http.FileServer(fs).ServeHTTP(w, &r)
                return true
        case r.Method == http.MethodPut:
@@ -389,7 +434,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                }
                var objectIsDir bool
                if strings.HasSuffix(fspath, "/") {
-                       if !h.Config.cluster.Collections.S3FolderObjects {
+                       if !h.Cluster.Collections.S3FolderObjects {
                                s3ErrorResponse(w, InvalidArgument, "invalid object name: trailing slash", r.URL.Path, http.StatusBadRequest)
                                return true
                        }
@@ -431,7 +476,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                                        return true
                                }
                                err = fs.Mkdir(dir, 0755)
-                               if err == arvados.ErrInvalidArgument {
+                               if errors.Is(err, arvados.ErrInvalidArgument) || errors.Is(err, arvados.ErrInvalidOperation) {
                                        // Cannot create a directory
                                        // here.
                                        err = fmt.Errorf("mkdir %q failed: %w", dir, err)
@@ -455,6 +500,14 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                                return true
                        }
                        defer f.Close()
+
+                       tokenUser, err := h.Cache.GetTokenUser(token)
+                       if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
+                               http.Error(w, "Not permitted", http.StatusForbidden)
+                               return true
+                       }
+                       h.logUploadOrDownload(r, arvclient, fs, fspath, nil, tokenUser)
+
                        _, err = io.Copy(f, r.Body)
                        if err != nil {
                                err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
@@ -475,7 +528,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                // Ensure a subsequent read operation will see the changes.
-               h.Config.Cache.ResetSession(token)
+               h.Cache.ResetSession(token)
                w.WriteHeader(http.StatusOK)
                return true
        case r.Method == http.MethodDelete:
@@ -529,7 +582,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                // Ensure a subsequent read operation will see the changes.
-               h.Config.Cache.ResetSession(token)
+               h.Cache.ResetSession(token)
                w.WriteHeader(http.StatusNoContent)
                return true
        default:
@@ -538,6 +591,48 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        }
 }
 
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) {
+       path = strings.TrimSuffix(path, "/")
+       var props map[string]interface{}
+       for {
+               fi, err := fs.Stat(path)
+               if err != nil {
+                       return
+               }
+               switch src := fi.Sys().(type) {
+               case *arvados.Collection:
+                       props = src.Properties
+               case *arvados.Group:
+                       props = src.Properties
+               default:
+                       // Try parent
+                       cut := strings.LastIndexByte(path, '/')
+                       if cut < 0 {
+                               return
+                       }
+                       path = path[:cut]
+                       continue
+               }
+               break
+       }
+       for k, v := range props {
+               if !validMIMEHeaderKey(k) {
+                       continue
+               }
+               k = "x-amz-meta-" + k
+               if s, ok := v.(string); ok {
+                       header.Set(k, s)
+               } else if j, err := json.Marshal(v); err == nil {
+                       header.Set(k, string(j))
+               }
+       }
+}
+
+func validMIMEHeaderKey(k string) bool {
+       check := "z-" + k
+       return check != textproto.CanonicalMIMEHeaderKey(check)
+}
+
 // Call fn on the given path (directory) and its contents, in
 // lexicographic order.
 //
@@ -708,7 +803,7 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
                if path < params.marker || path < params.prefix || path <= params.startAfter {
                        return nil
                }
-               if fi.IsDir() && !h.Config.cluster.Collections.S3FolderObjects {
+               if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
                        // Note we don't add anything to
                        // commonPrefixes here even if delimiter is
                        // "/". We descend into the directory, and