]> git.arvados.org - arvados.git/blob - services/keep-web/zip.go
22076: Include hash of PDH and filenames in suggested filename.
[arvados.git] / services / keep-web / zip.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "archive/zip"
9         "crypto/md5"
10         "encoding/json"
11         "errors"
12         "fmt"
13         "io"
14         "io/fs"
15         "mime"
16         "net/http"
17         "net/url"
18         "path/filepath"
19         "strings"
20
21         "git.arvados.org/arvados.git/sdk/go/arvados"
22         "git.arvados.org/arvados.git/sdk/go/ctxlog"
23 )
24
25 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
26
27 // serveZip handles a request for a zip archive.
28 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
29         if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
30                 // This is a generic 400, not 405 (method not allowed)
31                 // because this method/URL combination is allowed,
32                 // just not with the Accept: application/zip header.
33                 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
34                 return
35         }
36         // Check "GET" permission regardless of r.Method, because all
37         // methods result in downloads.
38         if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
39                 http.Error(w, "Not permitted", http.StatusForbidden)
40                 return
41         }
42         coll, subdir := h.determineCollection(sitefs, ziproot)
43         if coll == nil || subdir != "" {
44                 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
45                 return
46         }
47         err := r.ParseForm()
48         if err != nil {
49                 http.Error(w, err.Error(), http.StatusBadRequest)
50                 return
51         }
52         reqpaths := r.Form["files"]
53         if reqpaths == nil && r.Header.Get("Content-Type") == "application/json" {
54                 // r.Body is always non-nil, but will return EOF
55                 // immediately if no body is present.
56                 err := json.NewDecoder(r.Body).Decode(&reqpaths)
57                 if err != nil && err != io.EOF {
58                         http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
59                         return
60                 }
61         }
62         collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
63         if err != nil {
64                 http.Error(w, err.Error(), http.StatusInternalServerError)
65                 return
66         }
67         wanted := make(map[string]bool)
68         for _, path := range reqpaths {
69                 wanted[path] = true
70                 if path == "/" {
71                         continue
72                 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
73                         http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
74                         return
75                 } else {
76                         f.Close()
77                 }
78         }
79         iswanted := func(path string) bool {
80                 if len(wanted) == 0 {
81                         // No reqpaths provided ==> include all files
82                         return true
83                 }
84                 if wanted[path] {
85                         // Exact filename match
86                         return true
87                 }
88                 if wanted["/"] {
89                         // Entire collection selected (special case
90                         // not covered by the generic "parent
91                         // selected" loop below)
92                         return true
93                 }
94                 for i := len(path) - 1; i >= 0; i-- {
95                         if path[i] == '/' && (wanted[path[:i]] || wanted[path[:i+1]]) {
96                                 // Parent directory match
97                                 return true
98                         }
99                 }
100                 return false
101         }
102         var filepaths []string
103         err = fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
104                 if err != nil {
105                         return err
106                 }
107                 if dirent.IsDir() {
108                         return nil
109                 }
110                 if !iswanted(path) {
111                         return nil
112                 }
113                 filepaths = append(filepaths, path)
114                 return nil
115         })
116         if err != nil {
117                 http.Error(w, err.Error(), http.StatusInternalServerError)
118                 return
119         }
120
121         var zipfilename string
122         // Retrieve collection name if possible
123         if coll.Name == "" && coll.UUID != "" {
124                 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
125                         "select": []string{
126                                 "created_at",
127                                 "description",
128                                 "modified_at",
129                                 "modified_by_user_uuid",
130                                 "name",
131                                 "portable_data_hash",
132                                 "properties",
133                                 "uuid",
134                         },
135                 })
136                 if err != nil {
137                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
138                                 http.Error(w, err.Error(), he.HTTPStatus())
139                         } else {
140                                 http.Error(w, err.Error(), http.StatusInternalServerError)
141                         }
142                         return
143                 }
144                 zipfilename = coll.Name
145         } else if coll.Name == "" {
146                 zipfilename = coll.PortableDataHash
147         }
148
149         var user arvados.User
150         if coll.ModifiedByUserUUID != "" {
151                 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
152                         "select": []string{
153                                 "email",
154                                 "full_name",
155                                 "username",
156                                 "uuid",
157                                 // RailsAPI <= 3.1 fails if we select
158                                 // full_name without also selecting
159                                 // first_name and last_name.
160                                 "first_name",
161                                 "last_name",
162                         },
163                 })
164                 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
165                         // Cannot retrieve the user record, but this
166                         // shouldn't prevent the download from
167                         // working.
168                         http.Error(w, err.Error(), he.HTTPStatus())
169                 } else if errors.As(err, &he) {
170                         http.Error(w, err.Error(), he.HTTPStatus())
171                         return
172                 } else if err != nil {
173                         http.Error(w, err.Error(), http.StatusInternalServerError)
174                         return
175                 }
176         }
177
178         if len(filepaths) == 1 && len(reqpaths) == 1 && filepaths[0] == reqpaths[0] {
179                 // If the client specified a single (non-directory)
180                 // file, include the name of the file in the zip
181                 // archive name.
182                 _, basename := filepath.Split(filepaths[0])
183                 zipfilename += " - " + basename
184         } else if len(wanted) > 0 && !wanted["/"] {
185                 // If the client specified any other subset of the
186                 // collection, mention the number of files that will
187                 // be in the archive, to make it more obvious that
188                 // it's not an archive of the entire collection.
189                 //
190                 // Also include a partial hash of {PDH, list of
191                 // filenames} so downloading different subsets of a
192                 // collection results in different names, even if the
193                 // number of files happens to be the same.  (The pdh
194                 // is incorporated here because otherwise the
195                 // existence of a hash in the filename would be a
196                 // strong misleading hint that identical filenames
197                 // signify identical content.)
198                 h := md5.New()
199                 fmt.Fprintln(h, coll.PortableDataHash)
200                 for _, path := range filepaths {
201                         fmt.Fprintln(h, path)
202                 }
203                 zipfilename += fmt.Sprintf(" - %d files (%-4.4x)", len(filepaths), h.Sum(nil))
204         }
205
206         logpath := ""
207         if len(filepaths) == 1 {
208                 // If downloading a zip file with exactly one file,
209                 // log that file as collection_file_path in the audit
210                 // logs.  (Otherwise, leave collection_file_path
211                 // empty.)
212                 logpath = filepaths[0]
213         }
214         rGET := r.Clone(r.Context())
215         rGET.Method = "GET"
216         h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
217
218         w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": zipfilename}))
219         w.Header().Set("Content-Type", "application/zip")
220         zipw := zip.NewWriter(w)
221         wrote := false
222         err = func() error {
223                 u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
224                 if coll.UUID != "" {
225                         u.Path = "/by_id/" + coll.UUID + "/"
226                 } else {
227                         u.Path = "/by_id/" + coll.PortableDataHash + "/"
228                 }
229                 err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
230                 if err != nil {
231                         return err
232                 }
233                 if r.Form.Get("include_collection_metadata") != "" {
234                         m := map[string]interface{}{
235                                 "portable_data_hash": coll.PortableDataHash,
236                         }
237                         if coll.UUID != "" {
238                                 m["uuid"] = coll.UUID
239                                 m["name"] = coll.Name
240                                 m["properties"] = coll.Properties
241                                 m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
242                                 m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
243                                 m["description"] = coll.Description
244                         }
245                         if user.UUID != "" {
246                                 m["modified_by_user"] = map[string]interface{}{
247                                         "email":     user.Email,
248                                         "full_name": user.FullName,
249                                         "username":  user.Username,
250                                         "uuid":      user.UUID,
251                                 }
252                         }
253                         wrote = true
254                         zipf, err := zipw.CreateHeader(&zip.FileHeader{
255                                 Name:   "collection.json",
256                                 Method: zip.Store,
257                         })
258                         if err != nil {
259                                 return err
260                         }
261                         err = json.NewEncoder(zipf).Encode(m)
262                 }
263                 for _, path := range filepaths {
264                         f, err := collfs.Open(path)
265                         if err != nil {
266                                 f.Close()
267                                 break
268                         }
269                         wrote = true
270                         w, err := zipw.CreateHeader(&zip.FileHeader{
271                                 Name:   path,
272                                 Method: zip.Store,
273                         })
274                         if err != nil {
275                                 f.Close()
276                                 break
277                         }
278                         _, err = io.Copy(w, f)
279                         f.Close()
280                         if err != nil {
281                                 break
282                         }
283                 }
284                 wrote = true
285                 return zipw.Close()
286         }()
287         if err != nil {
288                 if wrote {
289                         ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
290                 } else {
291                         http.Error(w, err.Error(), http.StatusInternalServerError)
292                 }
293                 return
294         }
295 }