Merge branch '22003-keep-web-and-wb2-redirect-parameter' refs #22003
[arvados.git] / services / keep-web / s3.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "crypto/hmac"
9         "crypto/sha256"
10         "encoding/base64"
11         "encoding/json"
12         "encoding/xml"
13         "errors"
14         "fmt"
15         "hash"
16         "io"
17         "mime"
18         "net/http"
19         "net/textproto"
20         "net/url"
21         "os"
22         "path/filepath"
23         "regexp"
24         "sort"
25         "strconv"
26         "strings"
27         "time"
28
29         "git.arvados.org/arvados.git/sdk/go/arvados"
30         "git.arvados.org/arvados.git/sdk/go/ctxlog"
31 )
32
33 const (
34         s3MaxKeys                 = 1000
35         s3SignAlgorithm           = "AWS4-HMAC-SHA256"
36         s3MaxClockSkew            = 5 * time.Minute
37         s3SecretCacheTidyInterval = time.Minute
38 )
39
40 type commonPrefix struct {
41         Prefix string
42 }
43
44 type listV1Resp struct {
45         XMLName     string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
46         Name        string
47         Prefix      string
48         Delimiter   string
49         Marker      string
50         MaxKeys     int
51         IsTruncated bool
52         Contents    []s3Key
53         // If we use a []string here, xml marshals an empty tag when
54         // CommonPrefixes is nil, which confuses some clients.  Fix by
55         // using this nested struct instead.
56         CommonPrefixes []commonPrefix
57         // Similarly, we need omitempty here, because an empty
58         // tag confuses some clients (e.g.,
59         // github.com/aws/aws-sdk-net never terminates its
60         // paging loop).
61         NextMarker string `xml:"NextMarker,omitempty"`
62         // ListObjectsV2 has a KeyCount response field.
63         KeyCount int
64 }
65
66 type listV2Resp struct {
67         XMLName               string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
68         IsTruncated           bool
69         Contents              []s3Key
70         Name                  string
71         Prefix                string
72         Delimiter             string
73         MaxKeys               int
74         CommonPrefixes        []commonPrefix
75         EncodingType          string `xml:",omitempty"`
76         KeyCount              int
77         ContinuationToken     string `xml:",omitempty"`
78         NextContinuationToken string `xml:",omitempty"`
79         StartAfter            string `xml:",omitempty"`
80 }
81
82 type s3Key struct {
83         Key          string
84         LastModified string
85         Size         int64
86         // The following fields are not populated, but are here in
87         // case clients rely on the keys being present in xml
88         // responses.
89         ETag         string
90         StorageClass string
91         Owner        struct {
92                 ID          string
93                 DisplayName string
94         }
95 }
96
97 type cachedS3Secret struct {
98         auth   *arvados.APIClientAuthorization
99         expiry time.Time
100 }
101
102 func newCachedS3Secret(auth *arvados.APIClientAuthorization, maxExpiry time.Time) *cachedS3Secret {
103         var expiry time.Time
104         if auth.ExpiresAt.IsZero() || maxExpiry.Before(auth.ExpiresAt) {
105                 expiry = maxExpiry
106         } else {
107                 expiry = auth.ExpiresAt
108         }
109         return &cachedS3Secret{
110                 auth:   auth,
111                 expiry: expiry,
112         }
113 }
114
115 func (cs *cachedS3Secret) isValidAt(t time.Time) bool {
116         return cs.auth != nil &&
117                 !cs.expiry.IsZero() &&
118                 !t.IsZero() &&
119                 t.Before(cs.expiry)
120 }
121
122 func hmacstring(msg string, key []byte) []byte {
123         h := hmac.New(sha256.New, key)
124         io.WriteString(h, msg)
125         return h.Sum(nil)
126 }
127
128 func hashdigest(h hash.Hash, payload string) string {
129         io.WriteString(h, payload)
130         return fmt.Sprintf("%x", h.Sum(nil))
131 }
132
133 // Signing key for given secret key and request attrs.
134 func s3signatureKey(key, datestamp, regionName, serviceName string) []byte {
135         return hmacstring("aws4_request",
136                 hmacstring(serviceName,
137                         hmacstring(regionName,
138                                 hmacstring(datestamp, []byte("AWS4"+key)))))
139 }
140
141 // Canonical query string for S3 V4 signature: sorted keys, spaces
142 // escaped as %20 instead of +, keyvalues joined with &.
143 func s3querystring(u *url.URL) string {
144         keys := make([]string, 0, len(u.Query()))
145         values := make(map[string]string, len(u.Query()))
146         for k, vs := range u.Query() {
147                 k = strings.Replace(url.QueryEscape(k), "+", "%20", -1)
148                 keys = append(keys, k)
149                 for _, v := range vs {
150                         v = strings.Replace(url.QueryEscape(v), "+", "%20", -1)
151                         if values[k] != "" {
152                                 values[k] += "&"
153                         }
154                         values[k] += k + "=" + v
155                 }
156         }
157         sort.Strings(keys)
158         for i, k := range keys {
159                 keys[i] = values[k]
160         }
161         return strings.Join(keys, "&")
162 }
163
164 var reMultipleSlashChars = regexp.MustCompile(`//+`)
165
166 func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string, error) {
167         timefmt, timestr := "20060102T150405Z", r.Header.Get("X-Amz-Date")
168         if timestr == "" {
169                 timefmt, timestr = time.RFC1123, r.Header.Get("Date")
170         }
171         t, err := time.Parse(timefmt, timestr)
172         if err != nil {
173                 return "", fmt.Errorf("invalid timestamp %q: %s", timestr, err)
174         }
175         if skew := time.Now().Sub(t); skew < -s3MaxClockSkew || skew > s3MaxClockSkew {
176                 return "", errors.New("exceeded max clock skew")
177         }
178
179         var canonicalHeaders string
180         for _, h := range strings.Split(signedHeaders, ";") {
181                 if h == "host" {
182                         canonicalHeaders += h + ":" + r.Host + "\n"
183                 } else {
184                         canonicalHeaders += h + ":" + r.Header.Get(h) + "\n"
185                 }
186         }
187
188         normalizedPath := normalizePath(r.URL.Path)
189         ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
190         canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
191         ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
192         return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
193 }
194
195 func normalizePath(s string) string {
196         // (url.URL).EscapedPath() would be incorrect here. AWS
197         // documentation specifies the URL path should be normalized
198         // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
199         // / "." / "_" / "~". The implication is that everything other
200         // than those chars (and "/") _must_ be percent-encoded --
201         // even chars like ";" and "," that are not normally
202         // percent-encoded in paths.
203         out := ""
204         for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
205                 if (c >= 'a' && c <= 'z') ||
206                         (c >= 'A' && c <= 'Z') ||
207                         (c >= '0' && c <= '9') ||
208                         c == '-' ||
209                         c == '.' ||
210                         c == '_' ||
211                         c == '~' ||
212                         c == '/' {
213                         out += string(c)
214                 } else {
215                         out += fmt.Sprintf("%%%02X", c)
216                 }
217         }
218         return out
219 }
220
221 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
222         // scope is {datestamp}/{region}/{service}/aws4_request
223         drs := strings.Split(scope, "/")
224         if len(drs) != 4 {
225                 return "", fmt.Errorf("invalid scope %q", scope)
226         }
227         key := s3signatureKey(secretKey, drs[0], drs[1], drs[2])
228         return hashdigest(hmac.New(sha256.New, key), stringToSign), nil
229 }
230
231 var v2tokenUnderscore = regexp.MustCompile(`^v2_[a-z0-9]{5}-gj3su-[a-z0-9]{15}_`)
232
233 func unescapeKey(key string) string {
234         if v2tokenUnderscore.MatchString(key) {
235                 // Entire Arvados token, with "/" replaced by "_" to
236                 // avoid colliding with the Authorization header
237                 // format.
238                 return strings.Replace(key, "_", "/", -1)
239         } else if s, err := url.PathUnescape(key); err == nil {
240                 return s
241         } else {
242                 return key
243         }
244 }
245
246 func (h *handler) updateS3SecretCache(aca *arvados.APIClientAuthorization, key string) {
247         now := time.Now()
248         ttlExpiry := now.Add(h.Cluster.Collections.WebDAVCache.TTL.Duration())
249         cachedSecret := newCachedS3Secret(aca, ttlExpiry)
250
251         h.s3SecretCacheMtx.Lock()
252         defer h.s3SecretCacheMtx.Unlock()
253
254         if h.s3SecretCache == nil {
255                 h.s3SecretCache = make(map[string]*cachedS3Secret)
256         }
257         h.s3SecretCache[key] = cachedSecret
258         h.s3SecretCache[cachedSecret.auth.UUID] = cachedSecret
259         h.s3SecretCache[cachedSecret.auth.APIToken] = cachedSecret
260         h.s3SecretCache[cachedSecret.auth.TokenV2()] = cachedSecret
261
262         if h.s3SecretCacheNextTidy.After(now) {
263                 return
264         }
265         for key, entry := range h.s3SecretCache {
266                 if entry.expiry.Before(now) {
267                         delete(h.s3SecretCache, key)
268                 }
269         }
270         h.s3SecretCacheNextTidy = now.Add(s3SecretCacheTidyInterval)
271 }
272
273 // checks3signature verifies the given S3 V4 signature and returns the
274 // Arvados token that corresponds to the given accessKey. An error is
275 // returned if accessKey is not a valid token UUID or the signature
276 // does not match.
277 func (h *handler) checks3signature(r *http.Request) (string, error) {
278         var key, scope, signedHeaders, signature string
279         authstring := strings.TrimPrefix(r.Header.Get("Authorization"), s3SignAlgorithm+" ")
280         for _, cmpt := range strings.Split(authstring, ",") {
281                 cmpt = strings.TrimSpace(cmpt)
282                 split := strings.SplitN(cmpt, "=", 2)
283                 switch {
284                 case len(split) != 2:
285                         // (?) ignore
286                 case split[0] == "Credential":
287                         keyandscope := strings.SplitN(split[1], "/", 2)
288                         if len(keyandscope) == 2 {
289                                 key, scope = keyandscope[0], keyandscope[1]
290                         }
291                 case split[0] == "SignedHeaders":
292                         signedHeaders = split[1]
293                 case split[0] == "Signature":
294                         signature = split[1]
295                 }
296         }
297         keyIsUUID := len(key) == 27 && key[5:12] == "-gj3su-"
298         unescapedKey := unescapeKey(key)
299
300         h.s3SecretCacheMtx.Lock()
301         cached := h.s3SecretCache[unescapedKey]
302         h.s3SecretCacheMtx.Unlock()
303         usedCache := cached != nil && cached.isValidAt(time.Now())
304         var aca *arvados.APIClientAuthorization
305         if usedCache {
306                 aca = cached.auth
307         } else {
308                 var acaAuth, acaPath string
309                 if keyIsUUID {
310                         acaAuth = h.Cluster.SystemRootToken
311                         acaPath = key
312                 } else {
313                         acaAuth = unescapedKey
314                         acaPath = "current"
315                 }
316                 client := (&arvados.Client{
317                         APIHost:  h.Cluster.Services.Controller.ExternalURL.Host,
318                         Insecure: h.Cluster.TLS.Insecure,
319                 }).WithRequestID(r.Header.Get("X-Request-Id"))
320                 ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+acaAuth)
321                 aca = new(arvados.APIClientAuthorization)
322                 err := client.RequestAndDecodeContext(ctx, aca, "GET", "arvados/v1/api_client_authorizations/"+acaPath, nil, nil)
323                 if err != nil {
324                         ctxlog.FromContext(r.Context()).WithError(err).WithField("UUID", key).Info("token lookup failed")
325                         return "", errors.New("invalid access key")
326                 }
327         }
328         var secret string
329         if keyIsUUID {
330                 secret = aca.APIToken
331         } else {
332                 secret = key
333         }
334         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, r)
335         if err != nil {
336                 return "", err
337         }
338         expect, err := s3signature(secret, scope, signedHeaders, stringToSign)
339         if err != nil {
340                 return "", err
341         } else if expect != signature {
342                 return "", fmt.Errorf("signature does not match (scope %q signedHeaders %q stringToSign %q)", scope, signedHeaders, stringToSign)
343         }
344         if !usedCache {
345                 h.updateS3SecretCache(aca, unescapedKey)
346         }
347         return aca.TokenV2(), nil
348 }
349
350 func s3ErrorResponse(w http.ResponseWriter, s3code string, message string, resource string, code int) {
351         w.Header().Set("Content-Type", "application/xml")
352         w.Header().Set("X-Content-Type-Options", "nosniff")
353         w.WriteHeader(code)
354         var errstruct struct {
355                 Code      string
356                 Message   string
357                 Resource  string
358                 RequestId string
359         }
360         errstruct.Code = s3code
361         errstruct.Message = message
362         errstruct.Resource = resource
363         errstruct.RequestId = ""
364         enc := xml.NewEncoder(w)
365         fmt.Fprint(w, xml.Header)
366         enc.EncodeElement(errstruct, xml.StartElement{Name: xml.Name{Local: "Error"}})
367 }
368
369 var NoSuchKey = "NoSuchKey"
370 var NoSuchBucket = "NoSuchBucket"
371 var InvalidArgument = "InvalidArgument"
372 var InternalError = "InternalError"
373 var UnauthorizedAccess = "UnauthorizedAccess"
374 var InvalidRequest = "InvalidRequest"
375 var SignatureDoesNotMatch = "SignatureDoesNotMatch"
376
377 var reRawQueryIndicatesAPI = regexp.MustCompile(`^[a-z]+(&|$)`)
378
379 // serveS3 handles r and returns true if r is a request from an S3
380 // client, otherwise it returns false.
381 func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
382         var token string
383         if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") {
384                 split := strings.SplitN(auth[4:], ":", 2)
385                 if len(split) < 2 {
386                         s3ErrorResponse(w, InvalidRequest, "malformed Authorization header", r.URL.Path, http.StatusUnauthorized)
387                         return true
388                 }
389                 token = unescapeKey(split[0])
390         } else if strings.HasPrefix(auth, s3SignAlgorithm+" ") {
391                 t, err := h.checks3signature(r)
392                 if err != nil {
393                         s3ErrorResponse(w, SignatureDoesNotMatch, "signature verification failed: "+err.Error(), r.URL.Path, http.StatusForbidden)
394                         return true
395                 }
396                 token = t
397         } else {
398                 return false
399         }
400
401         fs, sess, tokenUser, err := h.Cache.GetSession(token)
402         if err != nil {
403                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
404                 return true
405         }
406         defer sess.Release()
407         readfs := fs
408         if writeMethod[r.Method] {
409                 // Create a FileSystem for this request, to avoid
410                 // exposing incomplete write operations to concurrent
411                 // requests.
412                 client := sess.client.WithRequestID(r.Header.Get("X-Request-Id"))
413                 fs = client.SiteFileSystem(sess.keepclient)
414                 fs.ForwardSlashNameSubstitution(h.Cluster.Collections.ForwardSlashNameSubstitution)
415         }
416
417         var objectNameGiven bool
418         var bucketName string
419         fspath := "/by_id"
420         if id := arvados.CollectionIDFromDNSName(r.Host); id != "" {
421                 fspath += "/" + id
422                 bucketName = id
423                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 0
424         } else {
425                 bucketName = strings.SplitN(strings.TrimPrefix(r.URL.Path, "/"), "/", 2)[0]
426                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
427         }
428         fspath += reMultipleSlashChars.ReplaceAllString(r.URL.Path, "/")
429
430         switch {
431         case r.Method == http.MethodGet && !objectNameGiven:
432                 // Path is "/{uuid}" or "/{uuid}/", has no object name
433                 if _, ok := r.URL.Query()["versioning"]; ok {
434                         // GetBucketVersioning
435                         w.Header().Set("Content-Type", "application/xml")
436                         io.WriteString(w, xml.Header)
437                         fmt.Fprintln(w, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
438                 } else if _, ok = r.URL.Query()["location"]; ok {
439                         // GetBucketLocation
440                         w.Header().Set("Content-Type", "application/xml")
441                         io.WriteString(w, xml.Header)
442                         fmt.Fprintln(w, `<LocationConstraint><LocationConstraint xmlns="http://s3.amazonaws.com/doc/2006-03-01/">`+
443                                 h.Cluster.ClusterID+
444                                 `</LocationConstraint></LocationConstraint>`)
445                 } else if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
446                         // GetBucketWebsite ("GET /bucketid/?website"), GetBucketTagging, etc.
447                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
448                 } else {
449                         // ListObjects
450                         h.s3list(bucketName, w, r, fs)
451                 }
452                 return true
453         case r.Method == http.MethodGet || r.Method == http.MethodHead:
454                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
455                         // GetObjectRetention ("GET /bucketid/objectid?retention&versionID=..."), etc.
456                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
457                         return true
458                 }
459                 fi, err := fs.Stat(fspath)
460                 if r.Method == "HEAD" && !objectNameGiven {
461                         // HeadBucket
462                         if err == nil && fi.IsDir() {
463                                 err = setFileInfoHeaders(w.Header(), fs, fspath)
464                                 if err != nil {
465                                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
466                                         return true
467                                 }
468                                 w.WriteHeader(http.StatusOK)
469                         } else if os.IsNotExist(err) {
470                                 s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
471                         } else {
472                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
473                         }
474                         return true
475                 }
476                 if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
477                         err = setFileInfoHeaders(w.Header(), fs, fspath)
478                         if err != nil {
479                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
480                                 return true
481                         }
482                         w.Header().Set("Content-Type", "application/x-directory")
483                         w.WriteHeader(http.StatusOK)
484                         return true
485                 }
486                 if os.IsNotExist(err) ||
487                         (err != nil && err.Error() == "not a directory") ||
488                         (fi != nil && fi.IsDir()) {
489                         s3ErrorResponse(w, NoSuchKey, "The specified key does not exist.", r.URL.Path, http.StatusNotFound)
490                         return true
491                 }
492
493                 if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
494                         http.Error(w, "Not permitted", http.StatusForbidden)
495                         return true
496                 }
497                 h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, nil, tokenUser)
498
499                 // shallow copy r, and change URL path
500                 r := *r
501                 r.URL.Path = fspath
502                 err = setFileInfoHeaders(w.Header(), fs, fspath)
503                 if err != nil {
504                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
505                         return true
506                 }
507                 http.FileServer(fs).ServeHTTP(w, &r)
508                 return true
509         case r.Method == http.MethodPut:
510                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
511                         // PutObjectAcl ("PUT /bucketid/objectid?acl&versionID=..."), etc.
512                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
513                         return true
514                 }
515                 if !objectNameGiven {
516                         s3ErrorResponse(w, InvalidArgument, "Missing object name in PUT request.", r.URL.Path, http.StatusBadRequest)
517                         return true
518                 }
519                 var objectIsDir bool
520                 if strings.HasSuffix(fspath, "/") {
521                         if !h.Cluster.Collections.S3FolderObjects {
522                                 s3ErrorResponse(w, InvalidArgument, "invalid object name: trailing slash", r.URL.Path, http.StatusBadRequest)
523                                 return true
524                         }
525                         n, err := r.Body.Read(make([]byte, 1))
526                         if err != nil && err != io.EOF {
527                                 s3ErrorResponse(w, InternalError, fmt.Sprintf("error reading request body: %s", err), r.URL.Path, http.StatusInternalServerError)
528                                 return true
529                         } else if n > 0 {
530                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless content is empty", r.URL.Path, http.StatusBadRequest)
531                                 return true
532                         } else if strings.SplitN(r.Header.Get("Content-Type"), ";", 2)[0] != "application/x-directory" {
533                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", r.URL.Path, http.StatusBadRequest)
534                                 return true
535                         }
536                         // Given PUT "foo/bar/", we'll use "foo/bar/."
537                         // in the "ensure parents exist" block below,
538                         // and then we'll be done.
539                         fspath += "."
540                         objectIsDir = true
541                 }
542                 fi, err := fs.Stat(fspath)
543                 if err != nil && err.Error() == "not a directory" {
544                         // requested foo/bar, but foo is a file
545                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
546                         return true
547                 }
548                 if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() {
549                         // requested foo/bar/, but foo/bar is a file
550                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
551                         return true
552                 }
553                 // create missing parent/intermediate directories, if any
554                 for i, c := range fspath {
555                         if i > 0 && c == '/' {
556                                 dir := fspath[:i]
557                                 if strings.HasSuffix(dir, "/") {
558                                         err = errors.New("invalid object name (consecutive '/' chars)")
559                                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
560                                         return true
561                                 }
562                                 err = fs.Mkdir(dir, 0755)
563                                 if errors.Is(err, arvados.ErrInvalidArgument) || errors.Is(err, arvados.ErrInvalidOperation) {
564                                         // Cannot create a directory
565                                         // here.
566                                         err = fmt.Errorf("mkdir %q failed: %w", dir, err)
567                                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
568                                         return true
569                                 } else if err != nil && !os.IsExist(err) {
570                                         err = fmt.Errorf("mkdir %q failed: %w", dir, err)
571                                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
572                                         return true
573                                 }
574                         }
575                 }
576                 if !objectIsDir {
577                         f, err := fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
578                         if os.IsNotExist(err) {
579                                 f, err = fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
580                         }
581                         if err != nil {
582                                 err = fmt.Errorf("open %q failed: %w", r.URL.Path, err)
583                                 s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
584                                 return true
585                         }
586                         defer f.Close()
587
588                         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
589                                 http.Error(w, "Not permitted", http.StatusForbidden)
590                                 return true
591                         }
592                         h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, nil, tokenUser)
593
594                         _, err = io.Copy(f, r.Body)
595                         if err != nil {
596                                 err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
597                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
598                                 return true
599                         }
600                         err = f.Close()
601                         if err != nil {
602                                 err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err)
603                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
604                                 return true
605                         }
606                 }
607                 err = h.syncCollection(fs, readfs, fspath)
608                 if err != nil {
609                         err = fmt.Errorf("sync failed: %w", err)
610                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
611                         return true
612                 }
613                 w.WriteHeader(http.StatusOK)
614                 return true
615         case r.Method == http.MethodDelete:
616                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
617                         // DeleteObjectTagging ("DELETE /bucketid/objectid?tagging&versionID=..."), etc.
618                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
619                         return true
620                 }
621                 if !objectNameGiven || r.URL.Path == "/" {
622                         s3ErrorResponse(w, InvalidArgument, "missing object name in DELETE request", r.URL.Path, http.StatusBadRequest)
623                         return true
624                 }
625                 if strings.HasSuffix(fspath, "/") {
626                         fspath = strings.TrimSuffix(fspath, "/")
627                         fi, err := fs.Stat(fspath)
628                         if os.IsNotExist(err) {
629                                 w.WriteHeader(http.StatusNoContent)
630                                 return true
631                         } else if err != nil {
632                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
633                                 return true
634                         } else if !fi.IsDir() {
635                                 // if "foo" exists and is a file, then
636                                 // "foo/" doesn't exist, so we say
637                                 // delete was successful.
638                                 w.WriteHeader(http.StatusNoContent)
639                                 return true
640                         }
641                 } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
642                         // if "foo" is a dir, it is visible via S3
643                         // only as "foo/", not "foo" -- so we leave
644                         // the dir alone and return 204 to indicate
645                         // that "foo" does not exist.
646                         w.WriteHeader(http.StatusNoContent)
647                         return true
648                 }
649                 err = fs.Remove(fspath)
650                 if os.IsNotExist(err) {
651                         w.WriteHeader(http.StatusNoContent)
652                         return true
653                 }
654                 if err != nil {
655                         err = fmt.Errorf("rm failed: %w", err)
656                         s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
657                         return true
658                 }
659                 err = h.syncCollection(fs, readfs, fspath)
660                 if err != nil {
661                         err = fmt.Errorf("sync failed: %w", err)
662                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
663                         return true
664                 }
665                 w.WriteHeader(http.StatusNoContent)
666                 return true
667         default:
668                 s3ErrorResponse(w, InvalidRequest, "method not allowed", r.URL.Path, http.StatusMethodNotAllowed)
669                 return true
670         }
671 }
672
673 // Save modifications to the indicated collection in srcfs, then (if
674 // successful) ensure they are also reflected in dstfs.
675 func (h *handler) syncCollection(srcfs, dstfs arvados.CustomFileSystem, path string) error {
676         coll, _ := h.determineCollection(srcfs, path)
677         if coll == nil || coll.UUID == "" {
678                 return errors.New("could not determine collection to sync")
679         }
680         d, err := srcfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
681         if err != nil {
682                 return err
683         }
684         defer d.Close()
685         err = d.Sync()
686         if err != nil {
687                 return err
688         }
689         snap, err := d.Snapshot()
690         if err != nil {
691                 return err
692         }
693         dstd, err := dstfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
694         if err != nil {
695                 return err
696         }
697         defer dstd.Close()
698         return dstd.Splice(snap)
699 }
700
701 func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
702         maybeEncode := func(s string) string {
703                 for _, c := range s {
704                         if c > '\u007f' || c < ' ' {
705                                 return mime.BEncoding.Encode("UTF-8", s)
706                         }
707                 }
708                 return s
709         }
710         path = strings.TrimSuffix(path, "/")
711         var props map[string]interface{}
712         for {
713                 fi, err := fs.Stat(path)
714                 if err != nil {
715                         return err
716                 }
717                 switch src := fi.Sys().(type) {
718                 case *arvados.Collection:
719                         props = src.Properties
720                         if src.PortableDataHash != "" {
721                                 header.Set("Etag", fmt.Sprintf(`"%s"`, src.PortableDataHash))
722                         }
723                 case *arvados.Group:
724                         props = src.Properties
725                 default:
726                         if err, ok := src.(error); ok {
727                                 return err
728                         }
729                         // Try parent
730                         cut := strings.LastIndexByte(path, '/')
731                         if cut < 0 {
732                                 return nil
733                         }
734                         path = path[:cut]
735                         continue
736                 }
737                 break
738         }
739         for k, v := range props {
740                 if !validMIMEHeaderKey(k) {
741                         continue
742                 }
743                 k = "x-amz-meta-" + k
744                 if s, ok := v.(string); ok {
745                         header.Set(k, maybeEncode(s))
746                 } else if j, err := json.Marshal(v); err == nil {
747                         header.Set(k, maybeEncode(string(j)))
748                 }
749         }
750         return nil
751 }
752
753 func validMIMEHeaderKey(k string) bool {
754         check := "z-" + k
755         return check != textproto.CanonicalMIMEHeaderKey(check)
756 }
757
758 // Call fn on the given path (directory) and its contents, in
759 // lexicographic order.
760 //
761 // If isRoot==true and path is not a directory, return nil.
762 //
763 // If fn returns filepath.SkipDir when called on a directory, don't
764 // descend into that directory.
765 func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error {
766         if isRoot {
767                 fi, err := fs.Stat(path)
768                 if os.IsNotExist(err) || (err == nil && !fi.IsDir()) {
769                         return nil
770                 } else if err != nil {
771                         return err
772                 }
773                 err = fn(path, fi)
774                 if err == filepath.SkipDir {
775                         return nil
776                 } else if err != nil {
777                         return err
778                 }
779         }
780         f, err := fs.Open(path)
781         if os.IsNotExist(err) && isRoot {
782                 return nil
783         } else if err != nil {
784                 return fmt.Errorf("open %q: %w", path, err)
785         }
786         defer f.Close()
787         if path == "/" {
788                 path = ""
789         }
790         fis, err := f.Readdir(-1)
791         if err != nil {
792                 return err
793         }
794         sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
795         for _, fi := range fis {
796                 err = fn(path+"/"+fi.Name(), fi)
797                 if err == filepath.SkipDir {
798                         continue
799                 } else if err != nil {
800                         return err
801                 }
802                 if fi.IsDir() {
803                         err = walkFS(fs, path+"/"+fi.Name(), false, fn)
804                         if err != nil {
805                                 return err
806                         }
807                 }
808         }
809         return nil
810 }
811
812 var errDone = errors.New("done")
813
814 func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) {
815         var params struct {
816                 v2                bool
817                 delimiter         string
818                 maxKeys           int
819                 prefix            string
820                 marker            string // decoded continuationToken (v2) or provided by client (v1)
821                 startAfter        string // v2
822                 continuationToken string // v2
823                 encodingTypeURL   bool   // v2
824         }
825         params.delimiter = r.FormValue("delimiter")
826         if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys {
827                 params.maxKeys = int(mk)
828         } else {
829                 params.maxKeys = s3MaxKeys
830         }
831         params.prefix = r.FormValue("prefix")
832         switch r.FormValue("list-type") {
833         case "":
834         case "2":
835                 params.v2 = true
836         default:
837                 http.Error(w, "invalid list-type parameter", http.StatusBadRequest)
838                 return
839         }
840         if params.v2 {
841                 params.continuationToken = r.FormValue("continuation-token")
842                 marker, err := base64.StdEncoding.DecodeString(params.continuationToken)
843                 if err != nil {
844                         http.Error(w, "invalid continuation token", http.StatusBadRequest)
845                         return
846                 }
847                 // marker and start-after perform the same function,
848                 // but we keep them separate so we can repeat them
849                 // back to the client in the response.
850                 params.marker = string(marker)
851                 params.startAfter = r.FormValue("start-after")
852                 switch r.FormValue("encoding-type") {
853                 case "":
854                 case "url":
855                         params.encodingTypeURL = true
856                 default:
857                         http.Error(w, "invalid encoding-type parameter", http.StatusBadRequest)
858                         return
859                 }
860         } else {
861                 // marker is functionally equivalent to start-after.
862                 params.marker = r.FormValue("marker")
863         }
864
865         // startAfter is params.marker or params.startAfter, whichever
866         // comes last.
867         startAfter := params.startAfter
868         if startAfter < params.marker {
869                 startAfter = params.marker
870         }
871
872         bucketdir := "by_id/" + bucket
873         // walkpath is the directory (relative to bucketdir) we need
874         // to walk: the innermost directory that is guaranteed to
875         // contain all paths that have the requested prefix. Examples:
876         // prefix "foo/bar"  => walkpath "foo"
877         // prefix "foo/bar/" => walkpath "foo/bar"
878         // prefix "foo"      => walkpath ""
879         // prefix ""         => walkpath ""
880         walkpath := params.prefix
881         if cut := strings.LastIndex(walkpath, "/"); cut >= 0 {
882                 walkpath = walkpath[:cut]
883         } else {
884                 walkpath = ""
885         }
886
887         resp := listV2Resp{
888                 Name:              bucket,
889                 Prefix:            params.prefix,
890                 Delimiter:         params.delimiter,
891                 MaxKeys:           params.maxKeys,
892                 ContinuationToken: r.FormValue("continuation-token"),
893                 StartAfter:        params.startAfter,
894         }
895
896         // nextMarker will be the last path we add to either
897         // resp.Contents or commonPrefixes.  It will be included in
898         // the response as NextMarker or NextContinuationToken if
899         // needed.
900         nextMarker := ""
901
902         commonPrefixes := map[string]bool{}
903         full := false
904         err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
905                 if path == bucketdir {
906                         return nil
907                 }
908                 path = path[len(bucketdir)+1:]
909                 filesize := fi.Size()
910                 if fi.IsDir() {
911                         path += "/"
912                         filesize = 0
913                 }
914                 if strings.HasPrefix(params.prefix, path) && params.prefix != path {
915                         // Descend into subtree until we reach desired prefix
916                         return nil
917                 } else if path < params.prefix {
918                         // Not an ancestor or descendant of desired
919                         // prefix, therefore none of its descendants
920                         // can be either -- skip
921                         return filepath.SkipDir
922                 } else if path > params.prefix && !strings.HasPrefix(path, params.prefix) {
923                         // We must have traversed everything under
924                         // desired prefix
925                         return errDone
926                 } else if path == startAfter {
927                         // Skip startAfter itself, just descend into
928                         // subtree
929                         return nil
930                 } else if strings.HasPrefix(startAfter, path) {
931                         // Descend into subtree in case it contains
932                         // something after startAfter
933                         return nil
934                 } else if path < startAfter {
935                         // Skip ahead until we reach startAfter
936                         return filepath.SkipDir
937                 }
938                 if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
939                         // Note we don't add anything to
940                         // commonPrefixes here even if delimiter is
941                         // "/". We descend into the directory, and
942                         // return a commonPrefix only if we end up
943                         // finding a regular file inside it.
944                         return nil
945                 }
946                 if params.delimiter != "" {
947                         idx := strings.Index(path[len(params.prefix):], params.delimiter)
948                         if idx >= 0 {
949                                 // with prefix "foobar" and delimiter
950                                 // "z", when we hit "foobar/baz", we
951                                 // add "/baz" to commonPrefixes and
952                                 // stop descending.
953                                 prefix := path[:len(params.prefix)+idx+1]
954                                 if prefix == startAfter {
955                                         return nil
956                                 } else if prefix < startAfter && !strings.HasPrefix(startAfter, prefix) {
957                                         return nil
958                                 } else if full {
959                                         resp.IsTruncated = true
960                                         return errDone
961                                 } else {
962                                         commonPrefixes[prefix] = true
963                                         nextMarker = prefix
964                                         full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
965                                         return filepath.SkipDir
966                                 }
967                         }
968                 }
969                 if full {
970                         resp.IsTruncated = true
971                         return errDone
972                 }
973                 resp.Contents = append(resp.Contents, s3Key{
974                         Key:          path,
975                         LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
976                         Size:         filesize,
977                 })
978                 nextMarker = path
979                 full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
980                 return nil
981         })
982         if err != nil && err != errDone {
983                 http.Error(w, err.Error(), http.StatusInternalServerError)
984                 return
985         }
986         if params.delimiter == "" && !params.v2 || !resp.IsTruncated {
987                 nextMarker = ""
988         }
989         if params.delimiter != "" {
990                 resp.CommonPrefixes = make([]commonPrefix, 0, len(commonPrefixes))
991                 for prefix := range commonPrefixes {
992                         resp.CommonPrefixes = append(resp.CommonPrefixes, commonPrefix{prefix})
993                 }
994                 sort.Slice(resp.CommonPrefixes, func(i, j int) bool { return resp.CommonPrefixes[i].Prefix < resp.CommonPrefixes[j].Prefix })
995         }
996         resp.KeyCount = len(resp.Contents)
997         var respV1orV2 interface{}
998
999         if params.encodingTypeURL {
1000                 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
1001                 // "If you specify the encoding-type request
1002                 // parameter, Amazon S3 includes this element in the
1003                 // response, and returns encoded key name values in
1004                 // the following response elements:
1005                 //
1006                 // Delimiter, Prefix, Key, and StartAfter.
1007                 //
1008                 //      Type: String
1009                 //
1010                 // Valid Values: url"
1011                 //
1012                 // This is somewhat vague but in practice it appears
1013                 // to mean x-www-form-urlencoded as in RFC1866 8.2.1
1014                 // para 1 (encode space as "+") rather than straight
1015                 // percent-encoding as in RFC1738 2.2.  Presumably,
1016                 // the intent is to allow the client to decode XML and
1017                 // then paste the strings directly into another URI
1018                 // query or POST form like "https://host/path?foo=" +
1019                 // foo + "&bar=" + bar.
1020                 resp.EncodingType = "url"
1021                 resp.Delimiter = url.QueryEscape(resp.Delimiter)
1022                 resp.Prefix = url.QueryEscape(resp.Prefix)
1023                 resp.StartAfter = url.QueryEscape(resp.StartAfter)
1024                 for i, ent := range resp.Contents {
1025                         ent.Key = url.QueryEscape(ent.Key)
1026                         resp.Contents[i] = ent
1027                 }
1028                 for i, ent := range resp.CommonPrefixes {
1029                         ent.Prefix = url.QueryEscape(ent.Prefix)
1030                         resp.CommonPrefixes[i] = ent
1031                 }
1032         }
1033
1034         if params.v2 {
1035                 resp.NextContinuationToken = base64.StdEncoding.EncodeToString([]byte(nextMarker))
1036                 respV1orV2 = resp
1037         } else {
1038                 respV1orV2 = listV1Resp{
1039                         CommonPrefixes: resp.CommonPrefixes,
1040                         NextMarker:     nextMarker,
1041                         KeyCount:       resp.KeyCount,
1042                         IsTruncated:    resp.IsTruncated,
1043                         Name:           bucket,
1044                         Prefix:         params.prefix,
1045                         Delimiter:      params.delimiter,
1046                         Marker:         params.marker,
1047                         MaxKeys:        params.maxKeys,
1048                         Contents:       resp.Contents,
1049                 }
1050         }
1051
1052         w.Header().Set("Content-Type", "application/xml")
1053         io.WriteString(w, xml.Header)
1054         if err := xml.NewEncoder(w).Encode(respV1orV2); err != nil {
1055                 ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
1056         }
1057 }