]> git.arvados.org - arvados.git/blob - services/keep-web/zip.go
22076: Fix regexps to avoid inadvertent match of suffix/prefix.
[arvados.git] / services / keep-web / zip.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "archive/zip"
9         "encoding/json"
10         "errors"
11         "fmt"
12         "io"
13         "io/fs"
14         "mime"
15         "net/http"
16         "net/url"
17         "path/filepath"
18         "strings"
19
20         "git.arvados.org/arvados.git/sdk/go/arvados"
21         "git.arvados.org/arvados.git/sdk/go/ctxlog"
22 )
23
24 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
25
26 type zipParams struct {
27         Files                     []string
28         IncludeCollectionMetadata bool `json:"include_collection_metadata"`
29 }
30
31 // serveZip handles a request for a zip archive.
32 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
33         if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
34                 // This is a generic 400, not 405 (method not allowed)
35                 // because this method/URL combination is allowed,
36                 // just not with the Accept: application/zip header.
37                 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
38                 return
39         }
40         // Check "GET" permission regardless of r.Method, because all
41         // methods result in downloads.
42         if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
43                 http.Error(w, "Not permitted", http.StatusForbidden)
44                 return
45         }
46         coll, subdir := h.determineCollection(sitefs, ziproot)
47         if coll == nil || subdir != "" {
48                 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
49                 return
50         }
51
52         // Load params from query and post form
53         var params zipParams
54         err := r.ParseForm()
55         if err != nil {
56                 http.Error(w, err.Error(), http.StatusBadRequest)
57                 return
58         }
59         params.Files = r.Form["files"]
60         params.IncludeCollectionMetadata = r.Form.Get("include_collection_metadata") != ""
61
62         // Load params from JSON request body
63         if params.Files == nil && r.Header.Get("Content-Type") == "application/json" {
64                 // r.Body is always non-nil, but will return EOF
65                 // immediately if no body is present.
66                 err := json.NewDecoder(r.Body).Decode(&params)
67                 if err != nil && err != io.EOF {
68                         http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
69                         return
70                 }
71         }
72
73         // Check that the supplied files/dirs actually exist, and use
74         // pathmatcher to build a list of all matching files in the
75         // collection.
76         collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
77         if err != nil {
78                 http.Error(w, err.Error(), http.StatusInternalServerError)
79                 return
80         }
81         matcher := make(pathmatcher)
82         for _, path := range params.Files {
83                 matcher[path] = true
84                 if path == "/" {
85                         continue
86                 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
87                         http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
88                         return
89                 } else {
90                         f.Close()
91                 }
92         }
93         filepaths, err := matcher.walk(collfs)
94         if err != nil {
95                 http.Error(w, err.Error(), http.StatusInternalServerError)
96                 return
97         }
98
99         // (Unless fetching by PDH) get additional collection details
100         // for logging, collection metadata file, and suggested
101         // filename for user agent.
102         var zipfilename string
103         if coll.UUID == "" {
104                 zipfilename = coll.PortableDataHash
105         } else {
106                 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
107                         "select": []string{
108                                 "created_at",
109                                 "description",
110                                 "modified_at",
111                                 "modified_by_user_uuid",
112                                 "name",
113                                 "portable_data_hash",
114                                 "properties",
115                                 "uuid",
116                         },
117                 })
118                 if err != nil {
119                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
120                                 http.Error(w, err.Error(), he.HTTPStatus())
121                         } else {
122                                 http.Error(w, err.Error(), http.StatusInternalServerError)
123                         }
124                         return
125                 }
126                 zipfilename = coll.Name
127         }
128         if len(filepaths) == 1 && len(params.Files) == 1 && filepaths[0] == params.Files[0] {
129                 // If the client specified a single (non-directory)
130                 // file, include the name of the file in the zip
131                 // archive name.
132                 _, basename := filepath.Split(filepaths[0])
133                 zipfilename += " - " + basename
134         } else if len(matcher) > 0 && !matcher["/"] {
135                 // If the client specified any other subset of the
136                 // collection, mention the number of files that will
137                 // be in the archive, to make it more obvious that
138                 // it's not an archive of the entire collection.
139                 zipfilename += fmt.Sprintf(" - %d files", len(filepaths))
140         }
141         zipfilename += ".zip"
142
143         logpath := ""
144         if len(filepaths) == 1 {
145                 // If downloading a zip file with exactly one file,
146                 // log that file as collection_file_path in the audit
147                 // logs.  (Otherwise, leave collection_file_path
148                 // empty.)
149                 logpath = filepaths[0]
150         }
151         rGET := r.Clone(r.Context())
152         rGET.Method = "GET"
153         h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
154
155         // Get additional user details for last-modified-by user, to
156         // include in the collection metadata file.
157         var user arvados.User
158         if params.IncludeCollectionMetadata && coll.ModifiedByUserUUID != "" {
159                 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
160                         "select": []string{
161                                 "email",
162                                 "full_name",
163                                 "username",
164                                 "uuid",
165                                 // RailsAPI <= 3.1 fails if we select
166                                 // full_name without also selecting
167                                 // first_name and last_name.
168                                 "first_name",
169                                 "last_name",
170                         },
171                 })
172                 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
173                         // Cannot retrieve the user record, but this
174                         // shouldn't prevent the download from
175                         // working.
176                 } else if errors.As(err, &he) {
177                         http.Error(w, err.Error(), he.HTTPStatus())
178                         return
179                 } else if err != nil {
180                         http.Error(w, err.Error(), http.StatusInternalServerError)
181                         return
182                 }
183         }
184
185         err = h.writeZip(w, coll, collfs, zipfilename, filepaths, params, user)
186         if err != nil {
187                 ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
188         }
189 }
190
191 func (h *handler) writeZip(w http.ResponseWriter, coll *arvados.Collection, collfs fs.FS, zipfilename string, filepaths []string, params zipParams, user arvados.User) error {
192         // Note mime.FormatMediaType() also sets the "filename*" param
193         // if zipfilename contains non-ASCII chars, as recommended by
194         // RFC 6266.
195         w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": zipfilename}))
196         w.Header().Set("Content-Type", "application/zip")
197         w.WriteHeader(http.StatusOK)
198         zipw := zip.NewWriter(w)
199
200         u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
201         if coll.UUID != "" {
202                 u.Path = "/by_id/" + coll.UUID + "/"
203         } else {
204                 u.Path = "/by_id/" + coll.PortableDataHash + "/"
205         }
206         err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
207         if err != nil {
208                 return err
209         }
210         if params.IncludeCollectionMetadata {
211                 m := map[string]interface{}{
212                         "portable_data_hash": coll.PortableDataHash,
213                 }
214                 if coll.UUID != "" {
215                         m["uuid"] = coll.UUID
216                         m["name"] = coll.Name
217                         m["properties"] = coll.Properties
218                         m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
219                         m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
220                         m["description"] = coll.Description
221                 }
222                 if user.UUID != "" {
223                         m["modified_by_user"] = map[string]interface{}{
224                                 "email":     user.Email,
225                                 "full_name": user.FullName,
226                                 "username":  user.Username,
227                                 "uuid":      user.UUID,
228                         }
229                 }
230                 zipf, err := zipw.CreateHeader(&zip.FileHeader{
231                         Name:   "collection.json",
232                         Method: zip.Store,
233                 })
234                 if err != nil {
235                         return err
236                 }
237                 err = json.NewEncoder(zipf).Encode(m)
238                 if err != nil {
239                         return err
240                 }
241         }
242         for _, path := range filepaths {
243                 f, err := collfs.Open(path)
244                 if err != nil {
245                         f.Close()
246                         break
247                 }
248                 w, err := zipw.CreateHeader(&zip.FileHeader{
249                         Name:   path,
250                         Method: zip.Store,
251                 })
252                 if err != nil {
253                         f.Close()
254                         break
255                 }
256                 _, err = io.Copy(w, f)
257                 f.Close()
258                 if err != nil {
259                         break
260                 }
261         }
262         return zipw.Close()
263 }
264
265 type pathmatcher map[string]bool
266
267 func (pm pathmatcher) match(filename string) bool {
268         if len(pm) == 0 {
269                 // No paths given ==> include all files
270                 return true
271         }
272         if pm[filename] {
273                 // Exact filename match
274                 return true
275         }
276         if pm["/"] {
277                 // Entire collection selected (special case not
278                 // covered by the generic "parent selected" loop
279                 // below)
280                 return true
281         }
282         for i := len(filename) - 1; i >= 0; i-- {
283                 if filename[i] == '/' && (pm[filename[:i]] || pm[filename[:i+1]]) {
284                         // Parent directory match
285                         return true
286                 }
287         }
288         return false
289 }
290
291 // Walk collfs and return the paths of all regular files that match.
292 func (pm pathmatcher) walk(collfs fs.FS) ([]string, error) {
293         var filepaths []string
294         err := fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
295                 if err != nil {
296                         return err
297                 }
298                 if dirent.IsDir() {
299                         return nil
300                 }
301                 if !pm.match(path) {
302                         return nil
303                 }
304                 filepaths = append(filepaths, path)
305                 return nil
306         })
307         return filepaths, err
308 }