Merge branch '21907-cache-s3-token'
[arvados.git] / services / keep-web / s3.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "crypto/hmac"
9         "crypto/sha256"
10         "encoding/base64"
11         "encoding/json"
12         "encoding/xml"
13         "errors"
14         "fmt"
15         "hash"
16         "io"
17         "mime"
18         "net/http"
19         "net/textproto"
20         "net/url"
21         "os"
22         "path/filepath"
23         "regexp"
24         "sort"
25         "strconv"
26         "strings"
27         "time"
28
29         "git.arvados.org/arvados.git/sdk/go/arvados"
30         "git.arvados.org/arvados.git/sdk/go/ctxlog"
31 )
32
33 const (
34         s3MaxKeys                 = 1000
35         s3SignAlgorithm           = "AWS4-HMAC-SHA256"
36         s3MaxClockSkew            = 5 * time.Minute
37         s3SecretCacheTidyInterval = time.Minute
38 )
39
40 type commonPrefix struct {
41         Prefix string
42 }
43
44 type listV1Resp struct {
45         XMLName     string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
46         Name        string
47         Prefix      string
48         Delimiter   string
49         Marker      string
50         MaxKeys     int
51         IsTruncated bool
52         Contents    []s3Key
53         // If we use a []string here, xml marshals an empty tag when
54         // CommonPrefixes is nil, which confuses some clients.  Fix by
55         // using this nested struct instead.
56         CommonPrefixes []commonPrefix
57         // Similarly, we need omitempty here, because an empty
58         // tag confuses some clients (e.g.,
59         // github.com/aws/aws-sdk-net never terminates its
60         // paging loop).
61         NextMarker string `xml:"NextMarker,omitempty"`
62         // ListObjectsV2 has a KeyCount response field.
63         KeyCount int
64 }
65
66 type listV2Resp struct {
67         XMLName               string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
68         IsTruncated           bool
69         Contents              []s3Key
70         Name                  string
71         Prefix                string
72         Delimiter             string
73         MaxKeys               int
74         CommonPrefixes        []commonPrefix
75         EncodingType          string `xml:",omitempty"`
76         KeyCount              int
77         ContinuationToken     string `xml:",omitempty"`
78         NextContinuationToken string `xml:",omitempty"`
79         StartAfter            string `xml:",omitempty"`
80 }
81
82 type s3Key struct {
83         Key          string
84         LastModified string
85         Size         int64
86         // The following fields are not populated, but are here in
87         // case clients rely on the keys being present in xml
88         // responses.
89         ETag         string
90         StorageClass string
91         Owner        struct {
92                 ID          string
93                 DisplayName string
94         }
95 }
96
97 type cachedS3Secret struct {
98         auth   *arvados.APIClientAuthorization
99         expiry time.Time
100 }
101
102 func newCachedS3Secret(auth *arvados.APIClientAuthorization, maxExpiry time.Time) *cachedS3Secret {
103         var expiry time.Time
104         if auth.ExpiresAt.IsZero() || maxExpiry.Before(auth.ExpiresAt) {
105                 expiry = maxExpiry
106         } else {
107                 expiry = auth.ExpiresAt
108         }
109         return &cachedS3Secret{
110                 auth:   auth,
111                 expiry: expiry,
112         }
113 }
114
115 func (cs *cachedS3Secret) isValidAt(t time.Time) bool {
116         return cs.auth != nil &&
117                 !cs.expiry.IsZero() &&
118                 !t.IsZero() &&
119                 t.Before(cs.expiry)
120 }
121
122 func hmacstring(msg string, key []byte) []byte {
123         h := hmac.New(sha256.New, key)
124         io.WriteString(h, msg)
125         return h.Sum(nil)
126 }
127
128 func hashdigest(h hash.Hash, payload string) string {
129         io.WriteString(h, payload)
130         return fmt.Sprintf("%x", h.Sum(nil))
131 }
132
133 // Signing key for given secret key and request attrs.
134 func s3signatureKey(key, datestamp, regionName, serviceName string) []byte {
135         return hmacstring("aws4_request",
136                 hmacstring(serviceName,
137                         hmacstring(regionName,
138                                 hmacstring(datestamp, []byte("AWS4"+key)))))
139 }
140
141 // Canonical query string for S3 V4 signature: sorted keys, spaces
142 // escaped as %20 instead of +, keyvalues joined with &.
143 func s3querystring(u *url.URL) string {
144         keys := make([]string, 0, len(u.Query()))
145         values := make(map[string]string, len(u.Query()))
146         for k, vs := range u.Query() {
147                 k = strings.Replace(url.QueryEscape(k), "+", "%20", -1)
148                 keys = append(keys, k)
149                 for _, v := range vs {
150                         v = strings.Replace(url.QueryEscape(v), "+", "%20", -1)
151                         if values[k] != "" {
152                                 values[k] += "&"
153                         }
154                         values[k] += k + "=" + v
155                 }
156         }
157         sort.Strings(keys)
158         for i, k := range keys {
159                 keys[i] = values[k]
160         }
161         return strings.Join(keys, "&")
162 }
163
164 var reMultipleSlashChars = regexp.MustCompile(`//+`)
165
166 func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string, error) {
167         timefmt, timestr := "20060102T150405Z", r.Header.Get("X-Amz-Date")
168         if timestr == "" {
169                 timefmt, timestr = time.RFC1123, r.Header.Get("Date")
170         }
171         t, err := time.Parse(timefmt, timestr)
172         if err != nil {
173                 return "", fmt.Errorf("invalid timestamp %q: %s", timestr, err)
174         }
175         if skew := time.Now().Sub(t); skew < -s3MaxClockSkew || skew > s3MaxClockSkew {
176                 return "", errors.New("exceeded max clock skew")
177         }
178
179         var canonicalHeaders string
180         for _, h := range strings.Split(signedHeaders, ";") {
181                 if h == "host" {
182                         canonicalHeaders += h + ":" + r.Host + "\n"
183                 } else {
184                         canonicalHeaders += h + ":" + r.Header.Get(h) + "\n"
185                 }
186         }
187
188         normalizedPath := normalizePath(r.URL.Path)
189         ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
190         canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
191         ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
192         return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
193 }
194
195 func normalizePath(s string) string {
196         // (url.URL).EscapedPath() would be incorrect here. AWS
197         // documentation specifies the URL path should be normalized
198         // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
199         // / "." / "_" / "~". The implication is that everything other
200         // than those chars (and "/") _must_ be percent-encoded --
201         // even chars like ";" and "," that are not normally
202         // percent-encoded in paths.
203         out := ""
204         for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
205                 if (c >= 'a' && c <= 'z') ||
206                         (c >= 'A' && c <= 'Z') ||
207                         (c >= '0' && c <= '9') ||
208                         c == '-' ||
209                         c == '.' ||
210                         c == '_' ||
211                         c == '~' ||
212                         c == '/' {
213                         out += string(c)
214                 } else {
215                         out += fmt.Sprintf("%%%02X", c)
216                 }
217         }
218         return out
219 }
220
221 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
222         // scope is {datestamp}/{region}/{service}/aws4_request
223         drs := strings.Split(scope, "/")
224         if len(drs) != 4 {
225                 return "", fmt.Errorf("invalid scope %q", scope)
226         }
227         key := s3signatureKey(secretKey, drs[0], drs[1], drs[2])
228         return hashdigest(hmac.New(sha256.New, key), stringToSign), nil
229 }
230
231 var v2tokenUnderscore = regexp.MustCompile(`^v2_[a-z0-9]{5}-gj3su-[a-z0-9]{15}_`)
232
233 func unescapeKey(key string) string {
234         if v2tokenUnderscore.MatchString(key) {
235                 // Entire Arvados token, with "/" replaced by "_" to
236                 // avoid colliding with the Authorization header
237                 // format.
238                 return strings.Replace(key, "_", "/", -1)
239         } else if s, err := url.PathUnescape(key); err == nil {
240                 return s
241         } else {
242                 return key
243         }
244 }
245
246 func (h *handler) updateS3SecretCache(aca *arvados.APIClientAuthorization, key string) {
247         now := time.Now()
248         ttlExpiry := now.Add(h.Cluster.Collections.WebDAVCache.TTL.Duration())
249         cachedSecret := newCachedS3Secret(aca, ttlExpiry)
250
251         h.s3SecretCacheMtx.Lock()
252         defer h.s3SecretCacheMtx.Unlock()
253
254         if h.s3SecretCache == nil {
255                 h.s3SecretCache = make(map[string]*cachedS3Secret)
256         }
257         h.s3SecretCache[key] = cachedSecret
258         h.s3SecretCache[cachedSecret.auth.UUID] = cachedSecret
259         h.s3SecretCache[cachedSecret.auth.APIToken] = cachedSecret
260         h.s3SecretCache[cachedSecret.auth.TokenV2()] = cachedSecret
261
262         if h.s3SecretCacheNextTidy.After(now) {
263                 return
264         }
265         for key, entry := range h.s3SecretCache {
266                 if entry.expiry.Before(now) {
267                         delete(h.s3SecretCache, key)
268                 }
269         }
270         h.s3SecretCacheNextTidy = now.Add(s3SecretCacheTidyInterval)
271 }
272
273 // checks3signature verifies the given S3 V4 signature and returns the
274 // Arvados token that corresponds to the given accessKey. An error is
275 // returned if accessKey is not a valid token UUID or the signature
276 // does not match.
277 func (h *handler) checks3signature(r *http.Request) (string, error) {
278         var key, scope, signedHeaders, signature string
279         authstring := strings.TrimPrefix(r.Header.Get("Authorization"), s3SignAlgorithm+" ")
280         for _, cmpt := range strings.Split(authstring, ",") {
281                 cmpt = strings.TrimSpace(cmpt)
282                 split := strings.SplitN(cmpt, "=", 2)
283                 switch {
284                 case len(split) != 2:
285                         // (?) ignore
286                 case split[0] == "Credential":
287                         keyandscope := strings.SplitN(split[1], "/", 2)
288                         if len(keyandscope) == 2 {
289                                 key, scope = keyandscope[0], keyandscope[1]
290                         }
291                 case split[0] == "SignedHeaders":
292                         signedHeaders = split[1]
293                 case split[0] == "Signature":
294                         signature = split[1]
295                 }
296         }
297         keyIsUUID := len(key) == 27 && key[5:12] == "-gj3su-"
298         unescapedKey := unescapeKey(key)
299
300         h.s3SecretCacheMtx.Lock()
301         cached := h.s3SecretCache[unescapedKey]
302         h.s3SecretCacheMtx.Unlock()
303         usedCache := cached != nil && cached.isValidAt(time.Now())
304         var aca *arvados.APIClientAuthorization
305         if usedCache {
306                 aca = cached.auth
307         } else {
308                 var acaAuth, acaPath string
309                 if keyIsUUID {
310                         acaAuth = h.Cluster.SystemRootToken
311                         acaPath = key
312                 } else {
313                         acaAuth = unescapedKey
314                         acaPath = "current"
315                 }
316                 client := (&arvados.Client{
317                         APIHost:  h.Cluster.Services.Controller.ExternalURL.Host,
318                         Insecure: h.Cluster.TLS.Insecure,
319                 }).WithRequestID(r.Header.Get("X-Request-Id"))
320                 ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+acaAuth)
321                 aca = new(arvados.APIClientAuthorization)
322                 err := client.RequestAndDecodeContext(ctx, aca, "GET", "arvados/v1/api_client_authorizations/"+acaPath, nil, nil)
323                 if err != nil {
324                         ctxlog.FromContext(r.Context()).WithError(err).WithField("UUID", key).Info("token lookup failed")
325                         return "", errors.New("invalid access key")
326                 }
327         }
328         var secret string
329         if keyIsUUID {
330                 secret = aca.APIToken
331         } else {
332                 secret = key
333         }
334         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, r)
335         if err != nil {
336                 return "", err
337         }
338         expect, err := s3signature(secret, scope, signedHeaders, stringToSign)
339         if err != nil {
340                 return "", err
341         } else if expect != signature {
342                 return "", fmt.Errorf("signature does not match (scope %q signedHeaders %q stringToSign %q)", scope, signedHeaders, stringToSign)
343         }
344         if !usedCache {
345                 h.updateS3SecretCache(aca, unescapedKey)
346         }
347         return aca.TokenV2(), nil
348 }
349
350 func s3ErrorResponse(w http.ResponseWriter, s3code string, message string, resource string, code int) {
351         w.Header().Set("Content-Type", "application/xml")
352         w.Header().Set("X-Content-Type-Options", "nosniff")
353         w.WriteHeader(code)
354         var errstruct struct {
355                 Code      string
356                 Message   string
357                 Resource  string
358                 RequestId string
359         }
360         errstruct.Code = s3code
361         errstruct.Message = message
362         errstruct.Resource = resource
363         errstruct.RequestId = ""
364         enc := xml.NewEncoder(w)
365         fmt.Fprint(w, xml.Header)
366         enc.EncodeElement(errstruct, xml.StartElement{Name: xml.Name{Local: "Error"}})
367 }
368
369 var NoSuchKey = "NoSuchKey"
370 var NoSuchBucket = "NoSuchBucket"
371 var InvalidArgument = "InvalidArgument"
372 var InternalError = "InternalError"
373 var UnauthorizedAccess = "UnauthorizedAccess"
374 var InvalidRequest = "InvalidRequest"
375 var SignatureDoesNotMatch = "SignatureDoesNotMatch"
376
377 var reRawQueryIndicatesAPI = regexp.MustCompile(`^[a-z]+(&|$)`)
378
379 // serveS3 handles r and returns true if r is a request from an S3
380 // client, otherwise it returns false.
381 func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
382         var token string
383         if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") {
384                 split := strings.SplitN(auth[4:], ":", 2)
385                 if len(split) < 2 {
386                         s3ErrorResponse(w, InvalidRequest, "malformed Authorization header", r.URL.Path, http.StatusUnauthorized)
387                         return true
388                 }
389                 token = unescapeKey(split[0])
390         } else if strings.HasPrefix(auth, s3SignAlgorithm+" ") {
391                 t, err := h.checks3signature(r)
392                 if err != nil {
393                         s3ErrorResponse(w, SignatureDoesNotMatch, "signature verification failed: "+err.Error(), r.URL.Path, http.StatusForbidden)
394                         return true
395                 }
396                 token = t
397         } else {
398                 return false
399         }
400
401         fs, sess, tokenUser, err := h.Cache.GetSession(token)
402         if err != nil {
403                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
404                 return true
405         }
406         defer sess.Release()
407         readfs := fs
408         if writeMethod[r.Method] {
409                 // Create a FileSystem for this request, to avoid
410                 // exposing incomplete write operations to concurrent
411                 // requests.
412                 client := sess.client.WithRequestID(r.Header.Get("X-Request-Id"))
413                 fs = client.SiteFileSystem(sess.keepclient)
414                 fs.ForwardSlashNameSubstitution(h.Cluster.Collections.ForwardSlashNameSubstitution)
415         }
416
417         var objectNameGiven bool
418         var bucketName string
419         fspath := "/by_id"
420         if id := arvados.CollectionIDFromDNSName(r.Host); id != "" {
421                 fspath += "/" + id
422                 bucketName = id
423                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 0
424         } else {
425                 bucketName = strings.SplitN(strings.TrimPrefix(r.URL.Path, "/"), "/", 2)[0]
426                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
427         }
428         fspath += reMultipleSlashChars.ReplaceAllString(r.URL.Path, "/")
429
430         switch {
431         case r.Method == http.MethodGet && !objectNameGiven:
432                 // Path is "/{uuid}" or "/{uuid}/", has no object name
433                 if _, ok := r.URL.Query()["versioning"]; ok {
434                         // GetBucketVersioning
435                         w.Header().Set("Content-Type", "application/xml")
436                         io.WriteString(w, xml.Header)
437                         fmt.Fprintln(w, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
438                 } else if _, ok = r.URL.Query()["location"]; ok {
439                         // GetBucketLocation
440                         w.Header().Set("Content-Type", "application/xml")
441                         io.WriteString(w, xml.Header)
442                         fmt.Fprintln(w, `<LocationConstraint><LocationConstraint xmlns="http://s3.amazonaws.com/doc/2006-03-01/">`+
443                                 h.Cluster.ClusterID+
444                                 `</LocationConstraint></LocationConstraint>`)
445                 } else if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
446                         // GetBucketWebsite ("GET /bucketid/?website"), GetBucketTagging, etc.
447                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
448                 } else {
449                         // ListObjects
450                         h.s3list(bucketName, w, r, fs)
451                 }
452                 return true
453         case r.Method == http.MethodGet || r.Method == http.MethodHead:
454                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
455                         // GetObjectRetention ("GET /bucketid/objectid?retention&versionID=..."), etc.
456                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
457                         return true
458                 }
459                 fi, err := fs.Stat(fspath)
460                 if r.Method == "HEAD" && !objectNameGiven {
461                         // HeadBucket
462                         if err == nil && fi.IsDir() {
463                                 err = setFileInfoHeaders(w.Header(), fs, fspath)
464                                 if err != nil {
465                                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
466                                         return true
467                                 }
468                                 w.WriteHeader(http.StatusOK)
469                         } else if os.IsNotExist(err) {
470                                 s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
471                         } else {
472                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
473                         }
474                         return true
475                 }
476                 if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
477                         err = setFileInfoHeaders(w.Header(), fs, fspath)
478                         if err != nil {
479                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
480                                 return true
481                         }
482                         w.Header().Set("Content-Type", "application/x-directory")
483                         w.WriteHeader(http.StatusOK)
484                         return true
485                 }
486                 if os.IsNotExist(err) ||
487                         (err != nil && err.Error() == "not a directory") ||
488                         (fi != nil && fi.IsDir()) {
489                         s3ErrorResponse(w, NoSuchKey, "The specified key does not exist.", r.URL.Path, http.StatusNotFound)
490                         return true
491                 }
492
493                 if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
494                         http.Error(w, "Not permitted", http.StatusForbidden)
495                         return true
496                 }
497                 h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, nil, tokenUser)
498
499                 // shallow copy r, and change URL path
500                 r := *r
501                 r.URL.Path = fspath
502                 err = setFileInfoHeaders(w.Header(), fs, fspath)
503                 if err != nil {
504                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
505                         return true
506                 }
507                 http.FileServer(fs).ServeHTTP(w, &r)
508                 return true
509         case r.Method == http.MethodPut:
510                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
511                         // PutObjectAcl ("PUT /bucketid/objectid?acl&versionID=..."), etc.
512                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
513                         return true
514                 }
515                 if !objectNameGiven {
516                         s3ErrorResponse(w, InvalidArgument, "Missing object name in PUT request.", r.URL.Path, http.StatusBadRequest)
517                         return true
518                 }
519                 var objectIsDir bool
520                 if strings.HasSuffix(fspath, "/") {
521                         if !h.Cluster.Collections.S3FolderObjects {
522                                 s3ErrorResponse(w, InvalidArgument, "invalid object name: trailing slash", r.URL.Path, http.StatusBadRequest)
523                                 return true
524                         }
525                         n, err := r.Body.Read(make([]byte, 1))
526                         if err != nil && err != io.EOF {
527                                 s3ErrorResponse(w, InternalError, fmt.Sprintf("error reading request body: %s", err), r.URL.Path, http.StatusInternalServerError)
528                                 return true
529                         } else if n > 0 {
530                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless content is empty", r.URL.Path, http.StatusBadRequest)
531                                 return true
532                         } else if strings.SplitN(r.Header.Get("Content-Type"), ";", 2)[0] != "application/x-directory" {
533                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", r.URL.Path, http.StatusBadRequest)
534                                 return true
535                         }
536                         // Given PUT "foo/bar/", we'll use "foo/bar/."
537                         // in the "ensure parents exist" block below,
538                         // and then we'll be done.
539                         fspath += "."
540                         objectIsDir = true
541                 }
542                 fi, err := fs.Stat(fspath)
543                 if err != nil && err.Error() == "not a directory" {
544                         // requested foo/bar, but foo is a file
545                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
546                         return true
547                 }
548                 if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() {
549                         // requested foo/bar/, but foo/bar is a file
550                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
551                         return true
552                 }
553                 // create missing parent/intermediate directories, if any
554                 for i, c := range fspath {
555                         if i > 0 && c == '/' {
556                                 dir := fspath[:i]
557                                 if strings.HasSuffix(dir, "/") {
558                                         err = errors.New("invalid object name (consecutive '/' chars)")
559                                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
560                                         return true
561                                 }
562                                 err = fs.Mkdir(dir, 0755)
563                                 if errors.Is(err, arvados.ErrInvalidArgument) || errors.Is(err, arvados.ErrInvalidOperation) {
564                                         // Cannot create a directory
565                                         // here.
566                                         err = fmt.Errorf("mkdir %q failed: %w", dir, err)
567                                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
568                                         return true
569                                 } else if err != nil && !os.IsExist(err) {
570                                         err = fmt.Errorf("mkdir %q failed: %w", dir, err)
571                                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
572                                         return true
573                                 }
574                         }
575                 }
576                 if !objectIsDir {
577                         f, err := fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
578                         if os.IsNotExist(err) {
579                                 f, err = fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
580                         }
581                         if err != nil {
582                                 err = fmt.Errorf("open %q failed: %w", r.URL.Path, err)
583                                 s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
584                                 return true
585                         }
586                         defer f.Close()
587
588                         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
589                                 http.Error(w, "Not permitted", http.StatusForbidden)
590                                 return true
591                         }
592                         h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, nil, tokenUser)
593
594                         _, err = io.Copy(f, r.Body)
595                         if err != nil {
596                                 err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
597                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
598                                 return true
599                         }
600                         err = f.Close()
601                         if err != nil {
602                                 err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err)
603                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
604                                 return true
605                         }
606                 }
607                 err = h.syncCollection(fs, readfs, fspath)
608                 if err != nil {
609                         err = fmt.Errorf("sync failed: %w", err)
610                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
611                         return true
612                 }
613                 w.WriteHeader(http.StatusOK)
614                 return true
615         case r.Method == http.MethodDelete:
616                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
617                         // DeleteObjectTagging ("DELETE /bucketid/objectid?tagging&versionID=..."), etc.
618                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
619                         return true
620                 }
621                 if !objectNameGiven || r.URL.Path == "/" {
622                         s3ErrorResponse(w, InvalidArgument, "missing object name in DELETE request", r.URL.Path, http.StatusBadRequest)
623                         return true
624                 }
625                 if strings.HasSuffix(fspath, "/") {
626                         fspath = strings.TrimSuffix(fspath, "/")
627                         fi, err := fs.Stat(fspath)
628                         if os.IsNotExist(err) {
629                                 w.WriteHeader(http.StatusNoContent)
630                                 return true
631                         } else if err != nil {
632                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
633                                 return true
634                         } else if !fi.IsDir() {
635                                 // if "foo" exists and is a file, then
636                                 // "foo/" doesn't exist, so we say
637                                 // delete was successful.
638                                 w.WriteHeader(http.StatusNoContent)
639                                 return true
640                         }
641                 } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
642                         // if "foo" is a dir, it is visible via S3
643                         // only as "foo/", not "foo" -- so we leave
644                         // the dir alone and return 204 to indicate
645                         // that "foo" does not exist.
646                         w.WriteHeader(http.StatusNoContent)
647                         return true
648                 }
649                 err = fs.Remove(fspath)
650                 if os.IsNotExist(err) {
651                         w.WriteHeader(http.StatusNoContent)
652                         return true
653                 }
654                 if err != nil {
655                         err = fmt.Errorf("rm failed: %w", err)
656                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
657                         return true
658                 }
659                 err = h.syncCollection(fs, readfs, fspath)
660                 if err != nil {
661                         err = fmt.Errorf("sync failed: %w", err)
662                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
663                         return true
664                 }
665                 w.WriteHeader(http.StatusNoContent)
666                 return true
667         default:
668                 s3ErrorResponse(w, InvalidRequest, "method not allowed", r.URL.Path, http.StatusMethodNotAllowed)
669                 return true
670         }
671 }
672
673 // Save modifications to the indicated collection in srcfs, then (if
674 // successful) ensure they are also reflected in dstfs.
675 func (h *handler) syncCollection(srcfs, dstfs arvados.CustomFileSystem, path string) error {
676         coll, _ := h.determineCollection(srcfs, path)
677         if coll == nil || coll.UUID == "" {
678                 return errors.New("could not determine collection to sync")
679         }
680         d, err := srcfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
681         if err != nil {
682                 return err
683         }
684         defer d.Close()
685         err = d.Sync()
686         if err != nil {
687                 return err
688         }
689         snap, err := d.Snapshot()
690         if err != nil {
691                 return err
692         }
693         dstd, err := dstfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
694         if err != nil {
695                 return err
696         }
697         defer dstd.Close()
698         return dstd.Splice(snap)
699 }
700
701 func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
702         maybeEncode := func(s string) string {
703                 for _, c := range s {
704                         if c > '\u007f' || c < ' ' {
705                                 return mime.BEncoding.Encode("UTF-8", s)
706                         }
707                 }
708                 return s
709         }
710         path = strings.TrimSuffix(path, "/")
711         var props map[string]interface{}
712         for {
713                 fi, err := fs.Stat(path)
714                 if err != nil {
715                         return err
716                 }
717                 switch src := fi.Sys().(type) {
718                 case *arvados.Collection:
719                         props = src.Properties
720                 case *arvados.Group:
721                         props = src.Properties
722                 default:
723                         if err, ok := src.(error); ok {
724                                 return err
725                         }
726                         // Try parent
727                         cut := strings.LastIndexByte(path, '/')
728                         if cut < 0 {
729                                 return nil
730                         }
731                         path = path[:cut]
732                         continue
733                 }
734                 break
735         }
736         for k, v := range props {
737                 if !validMIMEHeaderKey(k) {
738                         continue
739                 }
740                 k = "x-amz-meta-" + k
741                 if s, ok := v.(string); ok {
742                         header.Set(k, maybeEncode(s))
743                 } else if j, err := json.Marshal(v); err == nil {
744                         header.Set(k, maybeEncode(string(j)))
745                 }
746         }
747         return nil
748 }
749
750 func validMIMEHeaderKey(k string) bool {
751         check := "z-" + k
752         return check != textproto.CanonicalMIMEHeaderKey(check)
753 }
754
755 // Call fn on the given path (directory) and its contents, in
756 // lexicographic order.
757 //
758 // If isRoot==true and path is not a directory, return nil.
759 //
760 // If fn returns filepath.SkipDir when called on a directory, don't
761 // descend into that directory.
762 func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error {
763         if isRoot {
764                 fi, err := fs.Stat(path)
765                 if os.IsNotExist(err) || (err == nil && !fi.IsDir()) {
766                         return nil
767                 } else if err != nil {
768                         return err
769                 }
770                 err = fn(path, fi)
771                 if err == filepath.SkipDir {
772                         return nil
773                 } else if err != nil {
774                         return err
775                 }
776         }
777         f, err := fs.Open(path)
778         if os.IsNotExist(err) && isRoot {
779                 return nil
780         } else if err != nil {
781                 return fmt.Errorf("open %q: %w", path, err)
782         }
783         defer f.Close()
784         if path == "/" {
785                 path = ""
786         }
787         fis, err := f.Readdir(-1)
788         if err != nil {
789                 return err
790         }
791         sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
792         for _, fi := range fis {
793                 err = fn(path+"/"+fi.Name(), fi)
794                 if err == filepath.SkipDir {
795                         continue
796                 } else if err != nil {
797                         return err
798                 }
799                 if fi.IsDir() {
800                         err = walkFS(fs, path+"/"+fi.Name(), false, fn)
801                         if err != nil {
802                                 return err
803                         }
804                 }
805         }
806         return nil
807 }
808
809 var errDone = errors.New("done")
810
811 func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) {
812         var params struct {
813                 v2                bool
814                 delimiter         string
815                 maxKeys           int
816                 prefix            string
817                 marker            string // decoded continuationToken (v2) or provided by client (v1)
818                 startAfter        string // v2
819                 continuationToken string // v2
820                 encodingTypeURL   bool   // v2
821         }
822         params.delimiter = r.FormValue("delimiter")
823         if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys {
824                 params.maxKeys = int(mk)
825         } else {
826                 params.maxKeys = s3MaxKeys
827         }
828         params.prefix = r.FormValue("prefix")
829         switch r.FormValue("list-type") {
830         case "":
831         case "2":
832                 params.v2 = true
833         default:
834                 http.Error(w, "invalid list-type parameter", http.StatusBadRequest)
835                 return
836         }
837         if params.v2 {
838                 params.continuationToken = r.FormValue("continuation-token")
839                 marker, err := base64.StdEncoding.DecodeString(params.continuationToken)
840                 if err != nil {
841                         http.Error(w, "invalid continuation token", http.StatusBadRequest)
842                         return
843                 }
844                 // marker and start-after perform the same function,
845                 // but we keep them separate so we can repeat them
846                 // back to the client in the response.
847                 params.marker = string(marker)
848                 params.startAfter = r.FormValue("start-after")
849                 switch r.FormValue("encoding-type") {
850                 case "":
851                 case "url":
852                         params.encodingTypeURL = true
853                 default:
854                         http.Error(w, "invalid encoding-type parameter", http.StatusBadRequest)
855                         return
856                 }
857         } else {
858                 // marker is functionally equivalent to start-after.
859                 params.marker = r.FormValue("marker")
860         }
861
862         // startAfter is params.marker or params.startAfter, whichever
863         // comes last.
864         startAfter := params.startAfter
865         if startAfter < params.marker {
866                 startAfter = params.marker
867         }
868
869         bucketdir := "by_id/" + bucket
870         // walkpath is the directory (relative to bucketdir) we need
871         // to walk: the innermost directory that is guaranteed to
872         // contain all paths that have the requested prefix. Examples:
873         // prefix "foo/bar"  => walkpath "foo"
874         // prefix "foo/bar/" => walkpath "foo/bar"
875         // prefix "foo"      => walkpath ""
876         // prefix ""         => walkpath ""
877         walkpath := params.prefix
878         if cut := strings.LastIndex(walkpath, "/"); cut >= 0 {
879                 walkpath = walkpath[:cut]
880         } else {
881                 walkpath = ""
882         }
883
884         resp := listV2Resp{
885                 Name:              bucket,
886                 Prefix:            params.prefix,
887                 Delimiter:         params.delimiter,
888                 MaxKeys:           params.maxKeys,
889                 ContinuationToken: r.FormValue("continuation-token"),
890                 StartAfter:        params.startAfter,
891         }
892
893         // nextMarker will be the last path we add to either
894         // resp.Contents or commonPrefixes.  It will be included in
895         // the response as NextMarker or NextContinuationToken if
896         // needed.
897         nextMarker := ""
898
899         commonPrefixes := map[string]bool{}
900         full := false
901         err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
902                 if path == bucketdir {
903                         return nil
904                 }
905                 path = path[len(bucketdir)+1:]
906                 filesize := fi.Size()
907                 if fi.IsDir() {
908                         path += "/"
909                         filesize = 0
910                 }
911                 if strings.HasPrefix(params.prefix, path) && params.prefix != path {
912                         // Descend into subtree until we reach desired prefix
913                         return nil
914                 } else if path < params.prefix {
915                         // Not an ancestor or descendant of desired
916                         // prefix, therefore none of its descendants
917                         // can be either -- skip
918                         return filepath.SkipDir
919                 } else if path > params.prefix && !strings.HasPrefix(path, params.prefix) {
920                         // We must have traversed everything under
921                         // desired prefix
922                         return errDone
923                 } else if path == startAfter {
924                         // Skip startAfter itself, just descend into
925                         // subtree
926                         return nil
927                 } else if strings.HasPrefix(startAfter, path) {
928                         // Descend into subtree in case it contains
929                         // something after startAfter
930                         return nil
931                 } else if path < startAfter {
932                         // Skip ahead until we reach startAfter
933                         return filepath.SkipDir
934                 }
935                 if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
936                         // Note we don't add anything to
937                         // commonPrefixes here even if delimiter is
938                         // "/". We descend into the directory, and
939                         // return a commonPrefix only if we end up
940                         // finding a regular file inside it.
941                         return nil
942                 }
943                 if params.delimiter != "" {
944                         idx := strings.Index(path[len(params.prefix):], params.delimiter)
945                         if idx >= 0 {
946                                 // with prefix "foobar" and delimiter
947                                 // "z", when we hit "foobar/baz", we
948                                 // add "/baz" to commonPrefixes and
949                                 // stop descending.
950                                 prefix := path[:len(params.prefix)+idx+1]
951                                 if prefix == startAfter {
952                                         return nil
953                                 } else if prefix < startAfter && !strings.HasPrefix(startAfter, prefix) {
954                                         return nil
955                                 } else if full {
956                                         resp.IsTruncated = true
957                                         return errDone
958                                 } else {
959                                         commonPrefixes[prefix] = true
960                                         nextMarker = prefix
961                                         full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
962                                         return filepath.SkipDir
963                                 }
964                         }
965                 }
966                 if full {
967                         resp.IsTruncated = true
968                         return errDone
969                 }
970                 resp.Contents = append(resp.Contents, s3Key{
971                         Key:          path,
972                         LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
973                         Size:         filesize,
974                 })
975                 nextMarker = path
976                 full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
977                 return nil
978         })
979         if err != nil && err != errDone {
980                 http.Error(w, err.Error(), http.StatusInternalServerError)
981                 return
982         }
983         if params.delimiter == "" && !params.v2 || !resp.IsTruncated {
984                 nextMarker = ""
985         }
986         if params.delimiter != "" {
987                 resp.CommonPrefixes = make([]commonPrefix, 0, len(commonPrefixes))
988                 for prefix := range commonPrefixes {
989                         resp.CommonPrefixes = append(resp.CommonPrefixes, commonPrefix{prefix})
990                 }
991                 sort.Slice(resp.CommonPrefixes, func(i, j int) bool { return resp.CommonPrefixes[i].Prefix < resp.CommonPrefixes[j].Prefix })
992         }
993         resp.KeyCount = len(resp.Contents)
994         var respV1orV2 interface{}
995
996         if params.encodingTypeURL {
997                 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
998                 // "If you specify the encoding-type request
999                 // parameter, Amazon S3 includes this element in the
1000                 // response, and returns encoded key name values in
1001                 // the following response elements:
1002                 //
1003                 // Delimiter, Prefix, Key, and StartAfter.
1004                 //
1005                 //      Type: String
1006                 //
1007                 // Valid Values: url"
1008                 //
1009                 // This is somewhat vague but in practice it appears
1010                 // to mean x-www-form-urlencoded as in RFC1866 8.2.1
1011                 // para 1 (encode space as "+") rather than straight
1012                 // percent-encoding as in RFC1738 2.2.  Presumably,
1013                 // the intent is to allow the client to decode XML and
1014                 // then paste the strings directly into another URI
1015                 // query or POST form like "https://host/path?foo=" +
1016                 // foo + "&bar=" + bar.
1017                 resp.EncodingType = "url"
1018                 resp.Delimiter = url.QueryEscape(resp.Delimiter)
1019                 resp.Prefix = url.QueryEscape(resp.Prefix)
1020                 resp.StartAfter = url.QueryEscape(resp.StartAfter)
1021                 for i, ent := range resp.Contents {
1022                         ent.Key = url.QueryEscape(ent.Key)
1023                         resp.Contents[i] = ent
1024                 }
1025                 for i, ent := range resp.CommonPrefixes {
1026                         ent.Prefix = url.QueryEscape(ent.Prefix)
1027                         resp.CommonPrefixes[i] = ent
1028                 }
1029         }
1030
1031         if params.v2 {
1032                 resp.NextContinuationToken = base64.StdEncoding.EncodeToString([]byte(nextMarker))
1033                 respV1orV2 = resp
1034         } else {
1035                 respV1orV2 = listV1Resp{
1036                         CommonPrefixes: resp.CommonPrefixes,
1037                         NextMarker:     nextMarker,
1038                         KeyCount:       resp.KeyCount,
1039                         IsTruncated:    resp.IsTruncated,
1040                         Name:           bucket,
1041                         Prefix:         params.prefix,
1042                         Delimiter:      params.delimiter,
1043                         Marker:         params.marker,
1044                         MaxKeys:        params.maxKeys,
1045                         Contents:       resp.Contents,
1046                 }
1047         }
1048
1049         w.Header().Set("Content-Type", "application/xml")
1050         io.WriteString(w, xml.Header)
1051         if err := xml.NewEncoder(w).Encode(respV1orV2); err != nil {
1052                 ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
1053         }
1054 }