]> git.arvados.org - arvados.git/blob - services/keep-web/s3.go
23044: De-dup ContainerWebServices routing logic.
[arvados.git] / services / keep-web / s3.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "crypto/hmac"
9         "crypto/sha256"
10         "encoding/base64"
11         "encoding/json"
12         "encoding/xml"
13         "errors"
14         "fmt"
15         "hash"
16         "io"
17         "mime"
18         "net/http"
19         "net/textproto"
20         "net/url"
21         "os"
22         "path/filepath"
23         "regexp"
24         "sort"
25         "strconv"
26         "strings"
27         "time"
28
29         "git.arvados.org/arvados.git/sdk/go/arvados"
30         "git.arvados.org/arvados.git/sdk/go/ctxlog"
31 )
32
33 const (
34         s3MaxKeys                 = 1000
35         s3SignAlgorithm           = "AWS4-HMAC-SHA256"
36         s3MaxClockSkew            = 5 * time.Minute
37         s3SecretCacheTidyInterval = time.Minute
38 )
39
40 type commonPrefix struct {
41         Prefix string
42 }
43
44 type listV1Resp struct {
45         XMLName     string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
46         Name        string
47         Prefix      string
48         Delimiter   string
49         Marker      string
50         MaxKeys     int
51         IsTruncated bool
52         Contents    []s3Key
53         // If we use a []string here, xml marshals an empty tag when
54         // CommonPrefixes is nil, which confuses some clients.  Fix by
55         // using this nested struct instead.
56         CommonPrefixes []commonPrefix
57         // Similarly, we need omitempty here, because an empty
58         // tag confuses some clients (e.g.,
59         // github.com/aws/aws-sdk-net never terminates its
60         // paging loop).
61         NextMarker string `xml:"NextMarker,omitempty"`
62         // ListObjectsV2 has a KeyCount response field.
63         KeyCount int
64 }
65
66 type listV2Resp struct {
67         XMLName               string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
68         IsTruncated           bool
69         Contents              []s3Key
70         Name                  string
71         Prefix                string
72         Delimiter             string
73         MaxKeys               int
74         CommonPrefixes        []commonPrefix
75         EncodingType          string `xml:",omitempty"`
76         KeyCount              int
77         ContinuationToken     string `xml:",omitempty"`
78         NextContinuationToken string `xml:",omitempty"`
79         StartAfter            string `xml:",omitempty"`
80 }
81
82 type s3Key struct {
83         Key          string
84         LastModified string
85         Size         int64
86         // The following fields are not populated, but are here in
87         // case clients rely on the keys being present in xml
88         // responses.
89         ETag         string
90         StorageClass string
91         Owner        struct {
92                 ID          string
93                 DisplayName string
94         }
95 }
96
97 type cachedS3Secret struct {
98         auth   *arvados.APIClientAuthorization
99         expiry time.Time
100 }
101
102 func newCachedS3Secret(auth *arvados.APIClientAuthorization, maxExpiry time.Time) *cachedS3Secret {
103         var expiry time.Time
104         if auth.ExpiresAt.IsZero() || maxExpiry.Before(auth.ExpiresAt) {
105                 expiry = maxExpiry
106         } else {
107                 expiry = auth.ExpiresAt
108         }
109         return &cachedS3Secret{
110                 auth:   auth,
111                 expiry: expiry,
112         }
113 }
114
115 func (cs *cachedS3Secret) isValidAt(t time.Time) bool {
116         return cs.auth != nil &&
117                 !cs.expiry.IsZero() &&
118                 !t.IsZero() &&
119                 t.Before(cs.expiry)
120 }
121
122 func hmacstring(msg string, key []byte) []byte {
123         h := hmac.New(sha256.New, key)
124         io.WriteString(h, msg)
125         return h.Sum(nil)
126 }
127
128 func hashdigest(h hash.Hash, payload string) string {
129         io.WriteString(h, payload)
130         return fmt.Sprintf("%x", h.Sum(nil))
131 }
132
133 // Signing key for given secret key and request attrs.
134 func s3signatureKey(key, datestamp, regionName, serviceName string) []byte {
135         return hmacstring("aws4_request",
136                 hmacstring(serviceName,
137                         hmacstring(regionName,
138                                 hmacstring(datestamp, []byte("AWS4"+key)))))
139 }
140
141 // Canonical query string for S3 V4 signature: sorted keys, spaces
142 // escaped as %20 instead of +, keyvalues joined with &.
143 func s3querystring(u *url.URL) string {
144         keys := make([]string, 0, len(u.Query()))
145         values := make(map[string]string, len(u.Query()))
146         for k, vs := range u.Query() {
147                 k = strings.Replace(url.QueryEscape(k), "+", "%20", -1)
148                 keys = append(keys, k)
149                 for _, v := range vs {
150                         v = strings.Replace(url.QueryEscape(v), "+", "%20", -1)
151                         if values[k] != "" {
152                                 values[k] += "&"
153                         }
154                         values[k] += k + "=" + v
155                 }
156         }
157         sort.Strings(keys)
158         for i, k := range keys {
159                 keys[i] = values[k]
160         }
161         return strings.Join(keys, "&")
162 }
163
164 var reMultipleSlashChars = regexp.MustCompile(`//+`)
165
166 func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string, error) {
167         timefmt, timestr := "20060102T150405Z", r.Header.Get("X-Amz-Date")
168         if timestr == "" {
169                 timefmt, timestr = time.RFC1123, r.Header.Get("Date")
170         }
171         t, err := time.Parse(timefmt, timestr)
172         if err != nil {
173                 return "", fmt.Errorf("invalid timestamp %q: %s", timestr, err)
174         }
175         if skew := time.Now().Sub(t); skew < -s3MaxClockSkew || skew > s3MaxClockSkew {
176                 return "", errors.New("exceeded max clock skew")
177         }
178
179         var canonicalHeaders string
180         for _, h := range strings.Split(signedHeaders, ";") {
181                 if h == "host" {
182                         canonicalHeaders += h + ":" + r.Host + "\n"
183                 } else {
184                         canonicalHeaders += h + ":" + r.Header.Get(h) + "\n"
185                 }
186         }
187
188         normalizedPath := normalizePath(r.URL.Path)
189         ctxlog.FromContext(r.Context()).Debugf("normalizedPath %s", normalizedPath)
190         canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
191         ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
192         return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
193 }
194
195 func normalizePath(s string) string {
196         // (url.URL).EscapedPath() would be incorrect here. AWS
197         // documentation specifies the URL path should be normalized
198         // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
199         // / "." / "_" / "~". The implication is that everything other
200         // than those chars (and "/") _must_ be percent-encoded --
201         // even chars like ";" and "," that are not normally
202         // percent-encoded in paths.
203         out := ""
204         for _, c := range []byte(s) {
205                 if (c >= 'a' && c <= 'z') ||
206                         (c >= 'A' && c <= 'Z') ||
207                         (c >= '0' && c <= '9') ||
208                         c == '-' ||
209                         c == '.' ||
210                         c == '_' ||
211                         c == '~' ||
212                         c == '/' {
213                         out += string(c)
214                 } else {
215                         out += fmt.Sprintf("%%%02X", c)
216                 }
217         }
218         return out
219 }
220
221 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
222         // scope is {datestamp}/{region}/{service}/aws4_request
223         drs := strings.Split(scope, "/")
224         if len(drs) != 4 {
225                 return "", fmt.Errorf("invalid scope %q", scope)
226         }
227         key := s3signatureKey(secretKey, drs[0], drs[1], drs[2])
228         return hashdigest(hmac.New(sha256.New, key), stringToSign), nil
229 }
230
231 var v2tokenUnderscore = regexp.MustCompile(`^v2_[a-z0-9]{5}-gj3su-[a-z0-9]{15}_`)
232
233 func unescapeKey(key string) string {
234         if v2tokenUnderscore.MatchString(key) {
235                 // Entire Arvados token, with "/" replaced by "_" to
236                 // avoid colliding with the Authorization header
237                 // format.
238                 return strings.Replace(key, "_", "/", -1)
239         } else if s, err := url.PathUnescape(key); err == nil {
240                 return s
241         } else {
242                 return key
243         }
244 }
245
246 func (h *handler) updateS3SecretCache(aca *arvados.APIClientAuthorization, key string) {
247         now := time.Now()
248         ttlExpiry := now.Add(h.Cluster.Collections.WebDAVCache.TTL.Duration())
249         cachedSecret := newCachedS3Secret(aca, ttlExpiry)
250
251         h.s3SecretCacheMtx.Lock()
252         defer h.s3SecretCacheMtx.Unlock()
253
254         if h.s3SecretCache == nil {
255                 h.s3SecretCache = make(map[string]*cachedS3Secret)
256         }
257         h.s3SecretCache[key] = cachedSecret
258         h.s3SecretCache[cachedSecret.auth.UUID] = cachedSecret
259         h.s3SecretCache[cachedSecret.auth.APIToken] = cachedSecret
260         h.s3SecretCache[cachedSecret.auth.TokenV2()] = cachedSecret
261
262         if h.s3SecretCacheNextTidy.After(now) {
263                 return
264         }
265         for key, entry := range h.s3SecretCache {
266                 if entry.expiry.Before(now) {
267                         delete(h.s3SecretCache, key)
268                 }
269         }
270         h.s3SecretCacheNextTidy = now.Add(s3SecretCacheTidyInterval)
271 }
272
273 // checks3signature verifies the given S3 V4 signature and returns the
274 // Arvados token that corresponds to the given accessKey. An error is
275 // returned if accessKey is not a valid token UUID or the signature
276 // does not match.
277 func (h *handler) checks3signature(r *http.Request) (string, error) {
278         var key, scope, signedHeaders, signature string
279         authstring := strings.TrimPrefix(r.Header.Get("Authorization"), s3SignAlgorithm+" ")
280         for _, cmpt := range strings.Split(authstring, ",") {
281                 cmpt = strings.TrimSpace(cmpt)
282                 split := strings.SplitN(cmpt, "=", 2)
283                 switch {
284                 case len(split) != 2:
285                         // (?) ignore
286                 case split[0] == "Credential":
287                         keyandscope := strings.SplitN(split[1], "/", 2)
288                         if len(keyandscope) == 2 {
289                                 key, scope = keyandscope[0], keyandscope[1]
290                         }
291                 case split[0] == "SignedHeaders":
292                         signedHeaders = split[1]
293                 case split[0] == "Signature":
294                         signature = split[1]
295                 }
296         }
297         keyIsUUID := len(key) == 27 && key[5:12] == "-gj3su-"
298         unescapedKey := unescapeKey(key)
299
300         h.s3SecretCacheMtx.Lock()
301         cached := h.s3SecretCache[unescapedKey]
302         h.s3SecretCacheMtx.Unlock()
303         usedCache := cached != nil && cached.isValidAt(time.Now())
304         var aca *arvados.APIClientAuthorization
305         if usedCache {
306                 aca = cached.auth
307         } else {
308                 var acaAuth, acaPath string
309                 if keyIsUUID {
310                         acaAuth = h.Cluster.SystemRootToken
311                         acaPath = key
312                 } else {
313                         acaAuth = unescapedKey
314                         acaPath = "current"
315                 }
316                 client := (&arvados.Client{
317                         APIHost:  h.Cluster.Services.Controller.ExternalURL.Host,
318                         Insecure: h.Cluster.TLS.Insecure,
319                 }).WithRequestID(r.Header.Get("X-Request-Id"))
320                 ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+acaAuth)
321                 aca = new(arvados.APIClientAuthorization)
322                 err := client.RequestAndDecodeContext(ctx, aca, "GET", "arvados/v1/api_client_authorizations/"+acaPath, nil, nil)
323                 if err != nil {
324                         ctxlog.FromContext(r.Context()).WithError(err).WithField("UUID", key).Info("token lookup failed")
325                         return "", errors.New("invalid access key")
326                 }
327         }
328         var secret string
329         if keyIsUUID {
330                 secret = aca.APIToken
331         } else {
332                 secret = key
333         }
334         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, r)
335         if err != nil {
336                 return "", err
337         }
338         expect, err := s3signature(secret, scope, signedHeaders, stringToSign)
339         if err != nil {
340                 return "", err
341         } else if expect != signature {
342                 return "", fmt.Errorf("signature does not match (scope %q signedHeaders %q stringToSign %q)", scope, signedHeaders, stringToSign)
343         }
344         if !usedCache {
345                 h.updateS3SecretCache(aca, unescapedKey)
346         }
347         return aca.TokenV2(), nil
348 }
349
350 func s3ErrorResponse(w http.ResponseWriter, s3code string, message string, resource string, code int) {
351         w.Header().Set("Content-Type", "application/xml")
352         w.Header().Set("X-Content-Type-Options", "nosniff")
353         w.WriteHeader(code)
354         var errstruct struct {
355                 Code      string
356                 Message   string
357                 Resource  string
358                 RequestId string
359         }
360         errstruct.Code = s3code
361         errstruct.Message = message
362         errstruct.Resource = resource
363         errstruct.RequestId = ""
364         enc := xml.NewEncoder(w)
365         fmt.Fprint(w, xml.Header)
366         enc.EncodeElement(errstruct, xml.StartElement{Name: xml.Name{Local: "Error"}})
367 }
368
369 var NoSuchKey = "NoSuchKey"
370 var NoSuchBucket = "NoSuchBucket"
371 var InvalidArgument = "InvalidArgument"
372 var InternalError = "InternalError"
373 var UnauthorizedAccess = "UnauthorizedAccess"
374 var InvalidRequest = "InvalidRequest"
375 var SignatureDoesNotMatch = "SignatureDoesNotMatch"
376
377 var reRawQueryIndicatesAPI = regexp.MustCompile(`^[a-z]+(&|$)`)
378
379 // serveS3 handles r and returns true if r is a request from an S3
380 // client, otherwise it returns false.
381 func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
382         var token string
383         if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") {
384                 split := strings.SplitN(auth[4:], ":", 2)
385                 if len(split) < 2 {
386                         s3ErrorResponse(w, InvalidRequest, "malformed Authorization header", r.URL.Path, http.StatusUnauthorized)
387                         return true
388                 }
389                 token = unescapeKey(split[0])
390         } else if strings.HasPrefix(auth, s3SignAlgorithm+" ") {
391                 t, err := h.checks3signature(r)
392                 if err != nil {
393                         s3ErrorResponse(w, SignatureDoesNotMatch, "signature verification failed: "+err.Error(), r.URL.Path, http.StatusForbidden)
394                         return true
395                 }
396                 token = t
397         } else {
398                 return false
399         }
400
401         fs, sess, tokenUser, err := h.Cache.GetSession(token)
402         if err != nil {
403                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
404                 return true
405         }
406         defer sess.Release()
407         readfs := fs
408         if writeMethod[r.Method] {
409                 // Create a FileSystem for this request, to avoid
410                 // exposing incomplete write operations to concurrent
411                 // requests.
412                 client := sess.client.WithRequestID(r.Header.Get("X-Request-Id"))
413                 fs = client.SiteFileSystem(sess.keepclient)
414                 fs.ForwardSlashNameSubstitution(h.Cluster.Collections.ForwardSlashNameSubstitution)
415         }
416
417         var objectNameGiven bool
418         var bucketName string
419         fspath := "/by_id"
420         if id := arvados.CollectionIDFromDNSName(r.Host); id != "" {
421                 fspath += "/" + id
422                 bucketName = id
423                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 0
424         } else {
425                 bucketName = strings.SplitN(strings.TrimPrefix(r.URL.Path, "/"), "/", 2)[0]
426                 objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
427         }
428         fspath += reMultipleSlashChars.ReplaceAllString(r.URL.Path, "/")
429
430         if needSync, err := h.needSync(r.Context(), fs, fspath); err != nil {
431                 s3ErrorResponse(w, InternalError, fmt.Sprintf("internal error: %s", err), r.URL.Path+"?"+r.URL.RawQuery, http.StatusInternalServerError)
432                 return true
433         } else if needSync {
434                 _, collpath := h.determineCollection(fs, fspath)
435                 syncpath := strings.TrimSuffix(fspath, collpath)
436                 syncf, err := fs.OpenFile(syncpath, os.O_RDONLY, 0)
437                 if err != nil {
438                         s3ErrorResponse(w, InternalError, fmt.Sprintf("internal error: %s", err), r.URL.Path+"?"+r.URL.RawQuery, http.StatusInternalServerError)
439                         return true
440                 }
441                 defer syncf.Close()
442                 err = syncf.Sync()
443                 if err != nil {
444                         s3ErrorResponse(w, InternalError, fmt.Sprintf("internal error: %s", err), r.URL.Path+"?"+r.URL.RawQuery, http.StatusInternalServerError)
445                         return true
446                 }
447         }
448
449         switch {
450         case r.Method == http.MethodGet && !objectNameGiven:
451                 // Path is "/{uuid}" or "/{uuid}/", has no object name
452                 if _, ok := r.URL.Query()["versioning"]; ok {
453                         // GetBucketVersioning
454                         w.Header().Set("Content-Type", "application/xml")
455                         io.WriteString(w, xml.Header)
456                         fmt.Fprintln(w, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
457                 } else if _, ok = r.URL.Query()["location"]; ok {
458                         // GetBucketLocation
459                         w.Header().Set("Content-Type", "application/xml")
460                         io.WriteString(w, xml.Header)
461                         fmt.Fprintln(w, `<LocationConstraint><LocationConstraint xmlns="http://s3.amazonaws.com/doc/2006-03-01/">`+
462                                 h.Cluster.ClusterID+
463                                 `</LocationConstraint></LocationConstraint>`)
464                 } else if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
465                         // GetBucketWebsite ("GET /bucketid/?website"), GetBucketTagging, etc.
466                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
467                 } else {
468                         // ListObjects
469                         h.s3list(bucketName, w, r, fs)
470                 }
471                 return true
472         case r.Method == http.MethodGet || r.Method == http.MethodHead:
473                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
474                         // GetObjectRetention ("GET /bucketid/objectid?retention&versionID=..."), etc.
475                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
476                         return true
477                 }
478                 fi, err := fs.Stat(fspath)
479                 if r.Method == "HEAD" && !objectNameGiven {
480                         // HeadBucket
481                         if err == nil && fi.IsDir() {
482                                 err = setFileInfoHeaders(w.Header(), fs, fspath)
483                                 if err != nil {
484                                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
485                                         return true
486                                 }
487                                 w.WriteHeader(http.StatusOK)
488                         } else if os.IsNotExist(err) {
489                                 s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
490                         } else {
491                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
492                         }
493                         return true
494                 }
495                 if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
496                         err = setFileInfoHeaders(w.Header(), fs, fspath)
497                         if err != nil {
498                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
499                                 return true
500                         }
501                         w.Header().Set("Content-Type", "application/x-directory")
502                         w.WriteHeader(http.StatusOK)
503                         return true
504                 }
505                 if os.IsNotExist(err) ||
506                         (err != nil && err.Error() == "not a directory") ||
507                         (fi != nil && fi.IsDir()) {
508                         s3ErrorResponse(w, NoSuchKey, "The specified key does not exist.", r.URL.Path, http.StatusNotFound)
509                         return true
510                 }
511
512                 if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
513                         http.Error(w, "Not permitted", http.StatusForbidden)
514                         return true
515                 }
516                 h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, 1, nil, tokenUser)
517
518                 // shallow copy r, and change URL path
519                 r := *r
520                 r.URL.Path = fspath
521                 err = setFileInfoHeaders(w.Header(), fs, fspath)
522                 if err != nil {
523                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
524                         return true
525                 }
526                 http.FileServer(fs).ServeHTTP(w, &r)
527                 return true
528         case r.Method == http.MethodPut:
529                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
530                         // PutObjectAcl ("PUT /bucketid/objectid?acl&versionID=..."), etc.
531                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
532                         return true
533                 }
534                 if !objectNameGiven {
535                         s3ErrorResponse(w, InvalidArgument, "Missing object name in PUT request.", r.URL.Path, http.StatusBadRequest)
536                         return true
537                 }
538                 if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
539                         http.Error(w, "Not permitted", http.StatusForbidden)
540                         return true
541                 }
542                 var objectIsDir bool
543                 if strings.HasSuffix(fspath, "/") {
544                         if !h.Cluster.Collections.S3FolderObjects {
545                                 s3ErrorResponse(w, InvalidArgument, "invalid object name: trailing slash", r.URL.Path, http.StatusBadRequest)
546                                 return true
547                         }
548                         n, err := r.Body.Read(make([]byte, 1))
549                         if err != nil && err != io.EOF {
550                                 s3ErrorResponse(w, InternalError, fmt.Sprintf("error reading request body: %s", err), r.URL.Path, http.StatusInternalServerError)
551                                 return true
552                         } else if n > 0 {
553                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless content is empty", r.URL.Path, http.StatusBadRequest)
554                                 return true
555                         } else if mediatype, _, err := mime.ParseMediaType(r.Header.Get("Content-Type")); err != nil || mediatype != "application/x-directory" {
556                                 s3ErrorResponse(w, InvalidArgument, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", r.URL.Path, http.StatusBadRequest)
557                                 return true
558                         }
559                         // Given PUT "foo/bar/", we'll use "foo/bar/."
560                         // in the "ensure parents exist" block below,
561                         // and then we'll be done.
562                         fspath += "."
563                         objectIsDir = true
564                 }
565                 fi, err := fs.Stat(fspath)
566                 if err != nil && err.Error() == "not a directory" {
567                         // requested foo/bar, but foo is a file
568                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
569                         return true
570                 }
571                 if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() {
572                         // requested foo/bar/, but foo/bar is a file
573                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing object", r.URL.Path, http.StatusBadRequest)
574                         return true
575                 }
576                 h.logUploadOrDownload(r, sess.arvadosclient, fs, fspath, 1, nil, tokenUser)
577                 if objectIsDir {
578                         // create directory, and any missing
579                         // parent/intermediate directories
580                         for i, c := range fspath {
581                                 if i > 0 && c == '/' {
582                                         dir := fspath[:i]
583                                         if strings.HasSuffix(dir, "/") {
584                                                 err = errors.New("invalid object name (consecutive '/' chars)")
585                                                 s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
586                                                 return true
587                                         }
588                                         err = fs.Mkdir(dir, 0755)
589                                         if errors.Is(err, arvados.ErrInvalidArgument) || errors.Is(err, arvados.ErrInvalidOperation) {
590                                                 // Cannot create a directory
591                                                 // here.
592                                                 err = fmt.Errorf("mkdir %q failed: %w", dir, err)
593                                                 s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
594                                                 return true
595                                         } else if err != nil && !os.IsExist(err) {
596                                                 err = fmt.Errorf("mkdir %q failed: %w", dir, err)
597                                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
598                                                 return true
599                                         }
600                                 }
601                         }
602                         err = h.syncCollection(fs, readfs, fspath)
603                         if err != nil {
604                                 err = fmt.Errorf("sync failed: %w", err)
605                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
606                                 return true
607                         }
608                 } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
609                         s3ErrorResponse(w, InvalidArgument, "object name conflicts with existing directory", r.URL.Path, http.StatusBadRequest)
610                         return true
611                 } else {
612                         // Copy the file data from the request body
613                         // into a file (named "file") in a new empty
614                         // collection.  Then, use the replace_files
615                         // API to atomically splice that into the
616                         // destination collection.
617                         //
618                         // Note this doesn't update our in-memory
619                         // filesystem.  If a subsequent request
620                         // depends on the outcome, it will call Sync
621                         // to update (see needSync above).
622                         coll, destpath := h.determineCollection(fs, fspath)
623                         if coll == nil {
624                                 s3ErrorResponse(w, InvalidArgument, "invalid argument: path is not in a collection", r.URL.Path, http.StatusBadRequest)
625                                 return true
626                         }
627                         client := sess.client.WithRequestID(r.Header.Get("X-Request-Id"))
628                         tmpfs, err := (&arvados.Collection{}).FileSystem(client, sess.keepclient)
629                         if err != nil {
630                                 err = fmt.Errorf("tmpfs failed: %w", err)
631                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
632                                 return true
633                         }
634                         f, err := tmpfs.OpenFile("file", os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
635                         if os.IsNotExist(err) {
636                                 f, err = tmpfs.OpenFile("file", os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
637                         }
638                         if err != nil {
639                                 err = fmt.Errorf("open failed: %w", err)
640                                 s3ErrorResponse(w, InvalidArgument, err.Error(), r.URL.Path, http.StatusBadRequest)
641                                 return true
642                         }
643                         defer f.Close()
644
645                         _, err = io.Copy(f, r.Body)
646                         if err != nil {
647                                 err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
648                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
649                                 return true
650                         }
651                         err = f.Close()
652                         if err != nil {
653                                 err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err)
654                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
655                                 return true
656                         }
657                         manifest, err := tmpfs.MarshalManifest(".")
658                         if err != nil {
659                                 err = fmt.Errorf("marshal tmpfs: %w", err)
660                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
661                                 return true
662                         }
663                         err = client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
664                                 "replace_files": map[string]string{"/" + destpath: "manifest_text/file"},
665                                 "collection":    map[string]interface{}{"manifest_text": manifest}})
666                         if err != nil {
667                                 status := http.StatusInternalServerError
668                                 if te := new(arvados.TransactionError); errors.As(err, te) {
669                                         status = te.HTTPStatus()
670                                 }
671                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, status)
672                         }
673                 }
674                 w.WriteHeader(http.StatusOK)
675                 return true
676         case r.Method == http.MethodDelete:
677                 if reRawQueryIndicatesAPI.MatchString(r.URL.RawQuery) {
678                         // DeleteObjectTagging ("DELETE /bucketid/objectid?tagging&versionID=..."), etc.
679                         s3ErrorResponse(w, InvalidRequest, "API not supported", r.URL.Path+"?"+r.URL.RawQuery, http.StatusBadRequest)
680                         return true
681                 }
682                 if !objectNameGiven || r.URL.Path == "/" {
683                         s3ErrorResponse(w, InvalidArgument, "missing object name in DELETE request", r.URL.Path, http.StatusBadRequest)
684                         return true
685                 }
686                 coll, destpath := h.determineCollection(fs, fspath)
687                 if coll == nil {
688                         s3ErrorResponse(w, InvalidArgument, "invalid argument: path is not in a collection", r.URL.Path, http.StatusBadRequest)
689                         return true
690                 }
691                 if strings.HasSuffix(fspath, "/") {
692                         fspath = strings.TrimSuffix(fspath, "/")
693                         fi, err := fs.Stat(fspath)
694                         if os.IsNotExist(err) {
695                                 w.WriteHeader(http.StatusNoContent)
696                                 return true
697                         } else if err != nil {
698                                 s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError)
699                                 return true
700                         } else if !fi.IsDir() {
701                                 // if "foo" exists and is a file, then
702                                 // "foo/" doesn't exist, so we say
703                                 // delete was successful.
704                                 w.WriteHeader(http.StatusNoContent)
705                                 return true
706                         }
707                 } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
708                         // if "foo" is a dir, it is visible via S3
709                         // only as "foo/", not "foo" -- so we leave
710                         // the dir alone and return 204 to indicate
711                         // that "foo" does not exist.
712                         w.WriteHeader(http.StatusNoContent)
713                         return true
714                 }
715                 destpath = strings.TrimSuffix(destpath, "/")
716                 client := sess.client.WithRequestID(r.Header.Get("X-Request-Id"))
717                 err = client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
718                         "replace_files": map[string]string{"/" + destpath: ""}})
719                 if err != nil {
720                         status := http.StatusInternalServerError
721                         if te := new(arvados.TransactionError); errors.As(err, te) {
722                                 status = te.HTTPStatus()
723                         }
724                         s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, status)
725                 }
726                 w.WriteHeader(http.StatusNoContent)
727                 return true
728         default:
729                 s3ErrorResponse(w, InvalidRequest, "method not allowed", r.URL.Path, http.StatusMethodNotAllowed)
730                 return true
731         }
732 }
733
734 // Save modifications to the indicated collection in srcfs, then (if
735 // successful) ensure they are also reflected in dstfs.
736 func (h *handler) syncCollection(srcfs, dstfs arvados.CustomFileSystem, path string) error {
737         coll, _ := h.determineCollection(srcfs, path)
738         if coll == nil || coll.UUID == "" {
739                 return errors.New("could not determine collection to sync")
740         }
741         d, err := srcfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
742         if err != nil {
743                 return err
744         }
745         defer d.Close()
746         err = d.Sync()
747         if err != nil {
748                 return err
749         }
750         snap, err := d.Snapshot()
751         if err != nil {
752                 return err
753         }
754         dstd, err := dstfs.OpenFile("by_id/"+coll.UUID, os.O_RDWR, 0777)
755         if err != nil {
756                 return err
757         }
758         defer dstd.Close()
759         return dstd.Splice(snap)
760 }
761
762 func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
763         maybeEncode := func(s string) string {
764                 for _, c := range s {
765                         if c > '\u007f' || c < ' ' {
766                                 return mime.BEncoding.Encode("UTF-8", s)
767                         }
768                 }
769                 return s
770         }
771         path = strings.TrimSuffix(path, "/")
772         var props map[string]interface{}
773         for {
774                 fi, err := fs.Stat(path)
775                 if err != nil {
776                         return err
777                 }
778                 switch src := fi.Sys().(type) {
779                 case *arvados.Collection:
780                         props = src.Properties
781                         if src.PortableDataHash != "" {
782                                 header.Set("Etag", fmt.Sprintf(`"%s"`, src.PortableDataHash))
783                         }
784                 case *arvados.Group:
785                         props = src.Properties
786                 default:
787                         if err, ok := src.(error); ok {
788                                 return err
789                         }
790                         // Try parent
791                         cut := strings.LastIndexByte(path, '/')
792                         if cut < 0 {
793                                 return nil
794                         }
795                         path = path[:cut]
796                         continue
797                 }
798                 break
799         }
800         for k, v := range props {
801                 if !validMIMEHeaderKey(k) {
802                         continue
803                 }
804                 k = "x-amz-meta-" + k
805                 if s, ok := v.(string); ok {
806                         header.Set(k, maybeEncode(s))
807                 } else if j, err := json.Marshal(v); err == nil {
808                         header.Set(k, maybeEncode(string(j)))
809                 }
810         }
811         return nil
812 }
813
814 func validMIMEHeaderKey(k string) bool {
815         check := "z-" + k
816         return check != textproto.CanonicalMIMEHeaderKey(check)
817 }
818
819 // Call fn on the given path (directory) and its contents, in
820 // lexicographic order.
821 //
822 // If isRoot==true and path is not a directory, return nil.
823 //
824 // If fn returns filepath.SkipDir when called on a directory, don't
825 // descend into that directory.
826 func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error {
827         if isRoot {
828                 fi, err := fs.Stat(path)
829                 if os.IsNotExist(err) || (err == nil && !fi.IsDir()) {
830                         return nil
831                 } else if err != nil {
832                         return err
833                 }
834                 err = fn(path, fi)
835                 if err == filepath.SkipDir {
836                         return nil
837                 } else if err != nil {
838                         return err
839                 }
840         }
841         f, err := fs.Open(path)
842         if os.IsNotExist(err) && isRoot {
843                 return nil
844         } else if err != nil {
845                 return fmt.Errorf("open %q: %w", path, err)
846         }
847         defer f.Close()
848         if path == "/" {
849                 path = ""
850         }
851         fis, err := f.Readdir(-1)
852         if err != nil {
853                 return err
854         }
855         sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
856         for _, fi := range fis {
857                 err = fn(path+"/"+fi.Name(), fi)
858                 if err == filepath.SkipDir {
859                         continue
860                 } else if err != nil {
861                         return err
862                 }
863                 if fi.IsDir() {
864                         err = walkFS(fs, path+"/"+fi.Name(), false, fn)
865                         if err != nil {
866                                 return err
867                         }
868                 }
869         }
870         return nil
871 }
872
873 var errDone = errors.New("done")
874
875 func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) {
876         var params struct {
877                 v2                bool
878                 delimiter         string
879                 maxKeys           int
880                 prefix            string
881                 marker            string // decoded continuationToken (v2) or provided by client (v1)
882                 startAfter        string // v2
883                 continuationToken string // v2
884                 encodingTypeURL   bool   // v2
885         }
886         params.delimiter = r.FormValue("delimiter")
887         if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys {
888                 params.maxKeys = int(mk)
889         } else {
890                 params.maxKeys = s3MaxKeys
891         }
892         params.prefix = r.FormValue("prefix")
893         switch r.FormValue("list-type") {
894         case "":
895         case "2":
896                 params.v2 = true
897         default:
898                 http.Error(w, "invalid list-type parameter", http.StatusBadRequest)
899                 return
900         }
901         if params.v2 {
902                 params.continuationToken = r.FormValue("continuation-token")
903                 marker, err := base64.StdEncoding.DecodeString(params.continuationToken)
904                 if err != nil {
905                         http.Error(w, "invalid continuation token", http.StatusBadRequest)
906                         return
907                 }
908                 // marker and start-after perform the same function,
909                 // but we keep them separate so we can repeat them
910                 // back to the client in the response.
911                 params.marker = string(marker)
912                 params.startAfter = r.FormValue("start-after")
913                 switch r.FormValue("encoding-type") {
914                 case "":
915                 case "url":
916                         params.encodingTypeURL = true
917                 default:
918                         http.Error(w, "invalid encoding-type parameter", http.StatusBadRequest)
919                         return
920                 }
921         } else {
922                 // marker is functionally equivalent to start-after.
923                 params.marker = r.FormValue("marker")
924         }
925
926         // startAfter is params.marker or params.startAfter, whichever
927         // comes last.
928         startAfter := params.startAfter
929         if startAfter < params.marker {
930                 startAfter = params.marker
931         }
932
933         bucketdir := "by_id/" + bucket
934         // walkpath is the directory (relative to bucketdir) we need
935         // to walk: the innermost directory that is guaranteed to
936         // contain all paths that have the requested prefix. Examples:
937         // prefix "foo/bar"  => walkpath "foo"
938         // prefix "foo/bar/" => walkpath "foo/bar"
939         // prefix "foo"      => walkpath ""
940         // prefix ""         => walkpath ""
941         walkpath := params.prefix
942         if cut := strings.LastIndex(walkpath, "/"); cut >= 0 {
943                 walkpath = walkpath[:cut]
944         } else {
945                 walkpath = ""
946         }
947
948         resp := listV2Resp{
949                 Name:              bucket,
950                 Prefix:            params.prefix,
951                 Delimiter:         params.delimiter,
952                 MaxKeys:           params.maxKeys,
953                 ContinuationToken: r.FormValue("continuation-token"),
954                 StartAfter:        params.startAfter,
955         }
956
957         // nextMarker will be the last path we add to either
958         // resp.Contents or commonPrefixes.  It will be included in
959         // the response as NextMarker or NextContinuationToken if
960         // needed.
961         nextMarker := ""
962
963         commonPrefixes := map[string]bool{}
964         full := false
965         err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
966                 if path == bucketdir {
967                         return nil
968                 }
969                 path = path[len(bucketdir)+1:]
970                 filesize := fi.Size()
971                 if fi.IsDir() {
972                         path += "/"
973                         filesize = 0
974                 }
975                 if strings.HasPrefix(params.prefix, path) && params.prefix != path {
976                         // Descend into subtree until we reach desired prefix
977                         return nil
978                 } else if path < params.prefix {
979                         // Not an ancestor or descendant of desired
980                         // prefix, therefore none of its descendants
981                         // can be either -- skip
982                         return filepath.SkipDir
983                 } else if path > params.prefix && !strings.HasPrefix(path, params.prefix) {
984                         // We must have traversed everything under
985                         // desired prefix
986                         return errDone
987                 } else if path == startAfter {
988                         // Skip startAfter itself, just descend into
989                         // subtree
990                         return nil
991                 } else if strings.HasPrefix(startAfter, path) {
992                         // Descend into subtree in case it contains
993                         // something after startAfter
994                         return nil
995                 } else if path < startAfter {
996                         // Skip ahead until we reach startAfter
997                         return filepath.SkipDir
998                 }
999                 if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
1000                         // Note we don't add anything to
1001                         // commonPrefixes here even if delimiter is
1002                         // "/". We descend into the directory, and
1003                         // return a commonPrefix only if we end up
1004                         // finding a regular file inside it.
1005                         return nil
1006                 }
1007                 if params.delimiter != "" {
1008                         idx := strings.Index(path[len(params.prefix):], params.delimiter)
1009                         if idx >= 0 {
1010                                 // with prefix "foobar" and delimiter
1011                                 // "z", when we hit "foobar/baz", we
1012                                 // add "/baz" to commonPrefixes and
1013                                 // stop descending.
1014                                 prefix := path[:len(params.prefix)+idx+1]
1015                                 if prefix == startAfter {
1016                                         return nil
1017                                 } else if prefix < startAfter && !strings.HasPrefix(startAfter, prefix) {
1018                                         return nil
1019                                 } else if full {
1020                                         resp.IsTruncated = true
1021                                         return errDone
1022                                 } else {
1023                                         commonPrefixes[prefix] = true
1024                                         nextMarker = prefix
1025                                         full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
1026                                         return filepath.SkipDir
1027                                 }
1028                         }
1029                 }
1030                 if full {
1031                         resp.IsTruncated = true
1032                         return errDone
1033                 }
1034                 resp.Contents = append(resp.Contents, s3Key{
1035                         Key:          path,
1036                         LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
1037                         Size:         filesize,
1038                 })
1039                 nextMarker = path
1040                 full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
1041                 return nil
1042         })
1043         if err != nil && err != errDone {
1044                 http.Error(w, err.Error(), http.StatusInternalServerError)
1045                 return
1046         }
1047         if params.delimiter == "" && !params.v2 || !resp.IsTruncated {
1048                 nextMarker = ""
1049         }
1050         if params.delimiter != "" {
1051                 resp.CommonPrefixes = make([]commonPrefix, 0, len(commonPrefixes))
1052                 for prefix := range commonPrefixes {
1053                         resp.CommonPrefixes = append(resp.CommonPrefixes, commonPrefix{prefix})
1054                 }
1055                 sort.Slice(resp.CommonPrefixes, func(i, j int) bool { return resp.CommonPrefixes[i].Prefix < resp.CommonPrefixes[j].Prefix })
1056         }
1057         resp.KeyCount = len(resp.Contents)
1058         var respV1orV2 interface{}
1059
1060         if params.encodingTypeURL {
1061                 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
1062                 // "If you specify the encoding-type request
1063                 // parameter, Amazon S3 includes this element in the
1064                 // response, and returns encoded key name values in
1065                 // the following response elements:
1066                 //
1067                 // Delimiter, Prefix, Key, and StartAfter.
1068                 //
1069                 //      Type: String
1070                 //
1071                 // Valid Values: url"
1072                 //
1073                 // This is somewhat vague but in practice it appears
1074                 // to mean x-www-form-urlencoded as in RFC1866 8.2.1
1075                 // para 1 (encode space as "+") rather than straight
1076                 // percent-encoding as in RFC1738 2.2.  Presumably,
1077                 // the intent is to allow the client to decode XML and
1078                 // then paste the strings directly into another URI
1079                 // query or POST form like "https://host/path?foo=" +
1080                 // foo + "&bar=" + bar.
1081                 resp.EncodingType = "url"
1082                 resp.Delimiter = url.QueryEscape(resp.Delimiter)
1083                 resp.Prefix = url.QueryEscape(resp.Prefix)
1084                 resp.StartAfter = url.QueryEscape(resp.StartAfter)
1085                 for i, ent := range resp.Contents {
1086                         ent.Key = url.QueryEscape(ent.Key)
1087                         resp.Contents[i] = ent
1088                 }
1089                 for i, ent := range resp.CommonPrefixes {
1090                         ent.Prefix = url.QueryEscape(ent.Prefix)
1091                         resp.CommonPrefixes[i] = ent
1092                 }
1093         }
1094
1095         if params.v2 {
1096                 resp.NextContinuationToken = base64.StdEncoding.EncodeToString([]byte(nextMarker))
1097                 respV1orV2 = resp
1098         } else {
1099                 respV1orV2 = listV1Resp{
1100                         CommonPrefixes: resp.CommonPrefixes,
1101                         NextMarker:     nextMarker,
1102                         KeyCount:       resp.KeyCount,
1103                         IsTruncated:    resp.IsTruncated,
1104                         Name:           bucket,
1105                         Prefix:         params.prefix,
1106                         Delimiter:      params.delimiter,
1107                         Marker:         params.marker,
1108                         MaxKeys:        params.maxKeys,
1109                         Contents:       resp.Contents,
1110                 }
1111         }
1112
1113         w.Header().Set("Content-Type", "application/xml")
1114         io.WriteString(w, xml.Header)
1115         if err := xml.NewEncoder(w).Encode(respV1orV2); err != nil {
1116                 ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
1117         }
1118 }