1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
21 "git.arvados.org/arvados.git/sdk/go/arvados"
22 "git.arvados.org/arvados.git/sdk/go/ctxlog"
25 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
27 // serveZip handles a request for a zip archive.
28 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
29 if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
30 // This is a generic 400, not 405 (method not allowed)
31 // because this method/URL combination is allowed,
32 // just not with the Accept: application/zip header.
33 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
36 // Check "GET" permission regardless of r.Method, because all
37 // methods result in downloads.
38 if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
39 http.Error(w, "Not permitted", http.StatusForbidden)
42 coll, subdir := h.determineCollection(sitefs, ziproot)
43 if coll == nil || subdir != "" {
44 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
49 http.Error(w, err.Error(), http.StatusBadRequest)
52 reqpaths := r.Form["files"]
53 if reqpaths == nil && r.Header.Get("Content-Type") == "application/json" {
54 // r.Body is always non-nil, but will return EOF
55 // immediately if no body is present.
56 err := json.NewDecoder(r.Body).Decode(&reqpaths)
57 if err != nil && err != io.EOF {
58 http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
62 collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
64 http.Error(w, err.Error(), http.StatusInternalServerError)
67 wanted := make(map[string]bool)
68 for _, path := range reqpaths {
72 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
73 http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
79 iswanted := func(path string) bool {
81 // No reqpaths provided ==> include all files
85 // Exact filename match
89 // Entire collection selected (special case
90 // not covered by the generic "parent
91 // selected" loop below)
94 for i := len(path) - 1; i >= 0; i-- {
95 if path[i] == '/' && (wanted[path[:i]] || wanted[path[:i+1]]) {
96 // Parent directory match
102 var filepaths []string
103 err = fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
113 filepaths = append(filepaths, path)
117 http.Error(w, err.Error(), http.StatusInternalServerError)
121 var zipfilename string
122 // Retrieve collection name if possible
123 if coll.Name == "" && coll.UUID != "" {
124 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
129 "modified_by_user_uuid",
131 "portable_data_hash",
137 if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
138 http.Error(w, err.Error(), he.HTTPStatus())
140 http.Error(w, err.Error(), http.StatusInternalServerError)
144 zipfilename = coll.Name
145 } else if coll.Name == "" {
146 zipfilename = coll.PortableDataHash
149 var user arvados.User
150 if coll.ModifiedByUserUUID != "" {
151 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
157 // RailsAPI <= 3.1 fails if we select
158 // full_name without also selecting
159 // first_name and last_name.
164 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
165 // Cannot retrieve the user record, but this
166 // shouldn't prevent the download from
168 http.Error(w, err.Error(), he.HTTPStatus())
169 } else if errors.As(err, &he) {
170 http.Error(w, err.Error(), he.HTTPStatus())
172 } else if err != nil {
173 http.Error(w, err.Error(), http.StatusInternalServerError)
178 if len(filepaths) == 1 && len(reqpaths) == 1 && filepaths[0] == reqpaths[0] {
179 // If the client specified a single (non-directory)
180 // file, include the name of the file in the zip
182 _, basename := filepath.Split(filepaths[0])
183 zipfilename += " - " + basename
184 } else if len(wanted) > 0 && !wanted["/"] {
185 // If the client specified any other subset of the
186 // collection, mention the number of files that will
187 // be in the archive, to make it more obvious that
188 // it's not an archive of the entire collection.
190 // Also include a partial hash of {PDH, list of
191 // filenames} so downloading different subsets of a
192 // collection results in different names, even if the
193 // number of files happens to be the same. (The pdh
194 // is incorporated here because otherwise the
195 // existence of a hash in the filename would be a
196 // strong misleading hint that identical filenames
197 // signify identical content.)
199 fmt.Fprintln(h, coll.PortableDataHash)
200 for _, path := range filepaths {
201 fmt.Fprintln(h, path)
203 zipfilename += fmt.Sprintf(" - %d files (%-4.4x)", len(filepaths), h.Sum(nil))
207 if len(filepaths) == 1 {
208 // If downloading a zip file with exactly one file,
209 // log that file as collection_file_path in the audit
210 // logs. (Otherwise, leave collection_file_path
212 logpath = filepaths[0]
214 rGET := r.Clone(r.Context())
216 h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
218 w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": zipfilename}))
219 w.Header().Set("Content-Type", "application/zip")
220 zipw := zip.NewWriter(w)
223 u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
225 u.Path = "/by_id/" + coll.UUID + "/"
227 u.Path = "/by_id/" + coll.PortableDataHash + "/"
229 err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
233 if r.Form.Get("include_collection_metadata") != "" {
234 m := map[string]interface{}{
235 "portable_data_hash": coll.PortableDataHash,
238 m["uuid"] = coll.UUID
239 m["name"] = coll.Name
240 m["properties"] = coll.Properties
241 m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
242 m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
243 m["description"] = coll.Description
246 m["modified_by_user"] = map[string]interface{}{
248 "full_name": user.FullName,
249 "username": user.Username,
254 zipf, err := zipw.CreateHeader(&zip.FileHeader{
255 Name: "collection.json",
261 err = json.NewEncoder(zipf).Encode(m)
263 for _, path := range filepaths {
264 f, err := collfs.Open(path)
270 w, err := zipw.CreateHeader(&zip.FileHeader{
278 _, err = io.Copy(w, f)
289 ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
291 http.Error(w, err.Error(), http.StatusInternalServerError)