]> git.arvados.org - arvados.git/blob - services/keep-web/zip.go
23044: De-dup ContainerWebServices routing logic.
[arvados.git] / services / keep-web / zip.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "archive/zip"
9         "encoding/json"
10         "errors"
11         "fmt"
12         "io"
13         "io/fs"
14         "mime"
15         "net/http"
16         "net/url"
17         "path/filepath"
18         "strings"
19
20         "git.arvados.org/arvados.git/sdk/go/arvados"
21         "git.arvados.org/arvados.git/sdk/go/ctxlog"
22 )
23
24 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
25
26 type zipParams struct {
27         DownloadFilename          string `json:"download_filename"`
28         Files                     []string
29         IncludeCollectionMetadata bool `json:"include_collection_metadata"`
30 }
31
32 // serveZip handles a request for a zip archive.
33 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
34         if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
35                 // This is a generic 400, not 405 (method not allowed)
36                 // because this method/URL combination is allowed,
37                 // just not with the Accept: application/zip header.
38                 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
39                 return
40         }
41         // Check "GET" permission regardless of r.Method, because all
42         // methods result in downloads.
43         if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
44                 http.Error(w, "Not permitted", http.StatusForbidden)
45                 return
46         }
47         coll, subdir := h.determineCollection(sitefs, ziproot)
48         if coll == nil || subdir != "" {
49                 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
50                 return
51         }
52
53         // Load params from query and post form
54         var params zipParams
55         err := r.ParseForm()
56         if err != nil {
57                 http.Error(w, err.Error(), http.StatusBadRequest)
58                 return
59         }
60         params.DownloadFilename = r.Form.Get("download_filename")
61         params.Files = r.Form["files"]
62         params.IncludeCollectionMetadata = r.Form.Get("include_collection_metadata") != ""
63
64         // Load params from JSON request body
65         if params.Files == nil && r.Header.Get("Content-Type") == "application/json" {
66                 // r.Body is always non-nil, but will return EOF
67                 // immediately if no body is present.
68                 err := json.NewDecoder(r.Body).Decode(&params)
69                 if err != nil && err != io.EOF {
70                         http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
71                         return
72                 }
73         }
74
75         // Check that the supplied files/dirs actually exist, and use
76         // pathmatcher to build a list of all matching files in the
77         // collection.
78         collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
79         if err != nil {
80                 http.Error(w, err.Error(), http.StatusInternalServerError)
81                 return
82         }
83         matcher := make(pathmatcher)
84         for _, path := range params.Files {
85                 matcher[path] = true
86                 if path == "/" {
87                         continue
88                 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
89                         http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
90                         return
91                 } else {
92                         f.Close()
93                 }
94         }
95         filepaths, err := matcher.walk(collfs)
96         if err != nil {
97                 http.Error(w, err.Error(), http.StatusInternalServerError)
98                 return
99         }
100
101         // (Unless fetching by PDH) get additional collection details
102         // for logging, collection metadata file, and default download
103         // filename.
104         if coll.UUID != "" {
105                 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
106                         "select": []string{
107                                 "created_at",
108                                 "description",
109                                 "modified_at",
110                                 "modified_by_user_uuid",
111                                 "name",
112                                 "portable_data_hash",
113                                 "properties",
114                                 "uuid",
115                         },
116                 })
117                 if err != nil {
118                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
119                                 http.Error(w, err.Error(), he.HTTPStatus())
120                         } else {
121                                 http.Error(w, err.Error(), http.StatusInternalServerError)
122                         }
123                         return
124                 }
125         }
126
127         if params.DownloadFilename != "" {
128                 // Add .zip extension if the user forgot to do that
129                 if !strings.HasSuffix(strings.ToLower(params.DownloadFilename), ".zip") {
130                         params.DownloadFilename += ".zip"
131                 }
132         } else {
133                 // No download_filename provided. Make up a reasonable
134                 // default.
135                 if coll.UUID == "" {
136                         params.DownloadFilename = coll.PortableDataHash
137                 } else {
138                         params.DownloadFilename = coll.Name
139                 }
140                 if len(filepaths) == 1 && len(params.Files) == 1 && filepaths[0] == params.Files[0] {
141                         // If the request specified a single
142                         // (non-directory) file, include the name of
143                         // the file in the zip archive name.
144                         _, basename := filepath.Split(filepaths[0])
145                         params.DownloadFilename += " - " + basename
146                 } else if len(matcher) > 0 && !matcher["/"] {
147                         // If the request specified any other subset
148                         // of the collection, mention the number of
149                         // files that will be in the archive, to make
150                         // it more obvious that it's not an archive of
151                         // the entire collection.
152                         params.DownloadFilename += fmt.Sprintf(" - %d files", len(filepaths))
153                 }
154                 params.DownloadFilename += ".zip"
155         }
156
157         logpath := ""
158         if len(filepaths) == 1 {
159                 // If downloading a zip file with exactly one file,
160                 // log that file as collection_file_path in the audit
161                 // logs.  (Otherwise, leave collection_file_path
162                 // empty.)
163                 logpath = filepaths[0]
164         }
165         rGET := r.Clone(r.Context())
166         rGET.Method = "GET"
167         h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
168
169         // Get additional user details for last-modified-by user, to
170         // include in the collection metadata file.
171         var user arvados.User
172         if params.IncludeCollectionMetadata && coll.ModifiedByUserUUID != "" {
173                 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
174                         "select": []string{
175                                 "email",
176                                 "full_name",
177                                 "username",
178                                 "uuid",
179                                 // RailsAPI <= 3.1 fails if we select
180                                 // full_name without also selecting
181                                 // first_name and last_name.
182                                 "first_name",
183                                 "last_name",
184                         },
185                 })
186                 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
187                         // Cannot retrieve the user record, but this
188                         // shouldn't prevent the download from
189                         // working.
190                 } else if errors.As(err, &he) {
191                         http.Error(w, err.Error(), he.HTTPStatus())
192                         return
193                 } else if err != nil {
194                         http.Error(w, err.Error(), http.StatusInternalServerError)
195                         return
196                 }
197         }
198
199         err = h.writeZip(w, coll, collfs, filepaths, params, user)
200         if err != nil {
201                 ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
202         }
203 }
204
205 func (h *handler) writeZip(w http.ResponseWriter, coll *arvados.Collection, collfs fs.FS, filepaths []string, params zipParams, user arvados.User) error {
206         // Note mime.FormatMediaType() also sets the "filename*" param
207         // if params.DownloadFilename contains non-ASCII chars, as
208         // recommended by RFC 6266.
209         w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": params.DownloadFilename}))
210         w.Header().Set("Content-Type", "application/zip")
211         w.WriteHeader(http.StatusOK)
212         zipw := zip.NewWriter(w)
213
214         u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
215         if coll.UUID != "" {
216                 u.Path = "/by_id/" + coll.UUID + "/"
217         } else {
218                 u.Path = "/by_id/" + coll.PortableDataHash + "/"
219         }
220         err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
221         if err != nil {
222                 return err
223         }
224         if params.IncludeCollectionMetadata {
225                 m := map[string]interface{}{
226                         "portable_data_hash": coll.PortableDataHash,
227                 }
228                 if coll.UUID != "" {
229                         m["uuid"] = coll.UUID
230                         m["name"] = coll.Name
231                         m["properties"] = coll.Properties
232                         m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
233                         m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
234                         m["description"] = coll.Description
235                 }
236                 if user.UUID != "" {
237                         m["modified_by_user"] = map[string]interface{}{
238                                 "email":     user.Email,
239                                 "full_name": user.FullName,
240                                 "username":  user.Username,
241                                 "uuid":      user.UUID,
242                         }
243                 }
244                 zipf, err := zipw.CreateHeader(&zip.FileHeader{
245                         Name:   "collection.json",
246                         Method: zip.Store,
247                 })
248                 if err != nil {
249                         return err
250                 }
251                 err = json.NewEncoder(zipf).Encode(m)
252                 if err != nil {
253                         return err
254                 }
255         }
256         for _, path := range filepaths {
257                 f, err := collfs.Open(path)
258                 if err != nil {
259                         f.Close()
260                         break
261                 }
262                 w, err := zipw.CreateHeader(&zip.FileHeader{
263                         Name:   path,
264                         Method: zip.Store,
265                 })
266                 if err != nil {
267                         f.Close()
268                         break
269                 }
270                 _, err = io.Copy(w, f)
271                 f.Close()
272                 if err != nil {
273                         break
274                 }
275         }
276         return zipw.Close()
277 }
278
279 type pathmatcher map[string]bool
280
281 func (pm pathmatcher) match(filename string) bool {
282         if len(pm) == 0 {
283                 // No paths given ==> include all files
284                 return true
285         }
286         if pm[filename] {
287                 // Exact filename match
288                 return true
289         }
290         if pm["/"] {
291                 // Entire collection selected (special case not
292                 // covered by the generic "parent selected" loop
293                 // below)
294                 return true
295         }
296         for i := len(filename) - 1; i >= 0; i-- {
297                 if filename[i] == '/' && (pm[filename[:i]] || pm[filename[:i+1]]) {
298                         // Parent directory match
299                         return true
300                 }
301         }
302         return false
303 }
304
305 // Walk collfs and return the paths of all regular files that match.
306 func (pm pathmatcher) walk(collfs fs.FS) ([]string, error) {
307         var filepaths []string
308         err := fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
309                 if err != nil {
310                         return err
311                 }
312                 if dirent.IsDir() {
313                         return nil
314                 }
315                 if !pm.match(path) {
316                         return nil
317                 }
318                 filepaths = append(filepaths, path)
319                 return nil
320         })
321         return filepaths, err
322 }