1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/sdk/go/arvados"
21 "git.arvados.org/arvados.git/sdk/go/ctxlog"
24 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
26 type zipParams struct {
27 DownloadFilename string `json:"download_filename"`
29 IncludeCollectionMetadata bool `json:"include_collection_metadata"`
32 // serveZip handles a request for a zip archive.
33 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
34 if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
35 // This is a generic 400, not 405 (method not allowed)
36 // because this method/URL combination is allowed,
37 // just not with the Accept: application/zip header.
38 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
41 // Check "GET" permission regardless of r.Method, because all
42 // methods result in downloads.
43 if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
44 http.Error(w, "Not permitted", http.StatusForbidden)
47 coll, subdir := h.determineCollection(sitefs, ziproot)
48 if coll == nil || subdir != "" {
49 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
53 // Load params from query and post form
57 http.Error(w, err.Error(), http.StatusBadRequest)
60 params.DownloadFilename = r.Form.Get("download_filename")
61 params.Files = r.Form["files"]
62 params.IncludeCollectionMetadata = r.Form.Get("include_collection_metadata") != ""
64 // Load params from JSON request body
65 if params.Files == nil && r.Header.Get("Content-Type") == "application/json" {
66 // r.Body is always non-nil, but will return EOF
67 // immediately if no body is present.
68 err := json.NewDecoder(r.Body).Decode(¶ms)
69 if err != nil && err != io.EOF {
70 http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
75 // Check that the supplied files/dirs actually exist, and use
76 // pathmatcher to build a list of all matching files in the
78 collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
80 http.Error(w, err.Error(), http.StatusInternalServerError)
83 matcher := make(pathmatcher)
84 for _, path := range params.Files {
88 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
89 http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
95 filepaths, err := matcher.walk(collfs)
97 http.Error(w, err.Error(), http.StatusInternalServerError)
101 // (Unless fetching by PDH) get additional collection details
102 // for logging, collection metadata file, and default download
105 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
110 "modified_by_user_uuid",
112 "portable_data_hash",
118 if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
119 http.Error(w, err.Error(), he.HTTPStatus())
121 http.Error(w, err.Error(), http.StatusInternalServerError)
127 if params.DownloadFilename != "" {
128 // Add .zip extension if the user forgot to do that
129 if !strings.HasSuffix(strings.ToLower(params.DownloadFilename), ".zip") {
130 params.DownloadFilename += ".zip"
133 // No download_filename provided. Make up a reasonable
136 params.DownloadFilename = coll.PortableDataHash
138 params.DownloadFilename = coll.Name
140 if len(filepaths) == 1 && len(params.Files) == 1 && filepaths[0] == params.Files[0] {
141 // If the request specified a single
142 // (non-directory) file, include the name of
143 // the file in the zip archive name.
144 _, basename := filepath.Split(filepaths[0])
145 params.DownloadFilename += " - " + basename
146 } else if len(matcher) > 0 && !matcher["/"] {
147 // If the request specified any other subset
148 // of the collection, mention the number of
149 // files that will be in the archive, to make
150 // it more obvious that it's not an archive of
151 // the entire collection.
152 params.DownloadFilename += fmt.Sprintf(" - %d files", len(filepaths))
154 params.DownloadFilename += ".zip"
158 if len(filepaths) == 1 {
159 // If downloading a zip file with exactly one file,
160 // log that file as collection_file_path in the audit
161 // logs. (Otherwise, leave collection_file_path
163 logpath = filepaths[0]
165 rGET := r.Clone(r.Context())
167 h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
169 // Get additional user details for last-modified-by user, to
170 // include in the collection metadata file.
171 var user arvados.User
172 if params.IncludeCollectionMetadata && coll.ModifiedByUserUUID != "" {
173 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
179 // RailsAPI <= 3.1 fails if we select
180 // full_name without also selecting
181 // first_name and last_name.
186 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
187 // Cannot retrieve the user record, but this
188 // shouldn't prevent the download from
190 } else if errors.As(err, &he) {
191 http.Error(w, err.Error(), he.HTTPStatus())
193 } else if err != nil {
194 http.Error(w, err.Error(), http.StatusInternalServerError)
199 err = h.writeZip(w, coll, collfs, filepaths, params, user)
201 ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
205 func (h *handler) writeZip(w http.ResponseWriter, coll *arvados.Collection, collfs fs.FS, filepaths []string, params zipParams, user arvados.User) error {
206 // Note mime.FormatMediaType() also sets the "filename*" param
207 // if params.DownloadFilename contains non-ASCII chars, as
208 // recommended by RFC 6266.
209 w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": params.DownloadFilename}))
210 w.Header().Set("Content-Type", "application/zip")
211 w.WriteHeader(http.StatusOK)
212 zipw := zip.NewWriter(w)
214 u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
216 u.Path = "/by_id/" + coll.UUID + "/"
218 u.Path = "/by_id/" + coll.PortableDataHash + "/"
220 err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
224 if params.IncludeCollectionMetadata {
225 m := map[string]interface{}{
226 "portable_data_hash": coll.PortableDataHash,
229 m["uuid"] = coll.UUID
230 m["name"] = coll.Name
231 m["properties"] = coll.Properties
232 m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
233 m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
234 m["description"] = coll.Description
237 m["modified_by_user"] = map[string]interface{}{
239 "full_name": user.FullName,
240 "username": user.Username,
244 zipf, err := zipw.CreateHeader(&zip.FileHeader{
245 Name: "collection.json",
251 err = json.NewEncoder(zipf).Encode(m)
256 for _, path := range filepaths {
257 f, err := collfs.Open(path)
262 w, err := zipw.CreateHeader(&zip.FileHeader{
270 _, err = io.Copy(w, f)
279 type pathmatcher map[string]bool
281 func (pm pathmatcher) match(filename string) bool {
283 // No paths given ==> include all files
287 // Exact filename match
291 // Entire collection selected (special case not
292 // covered by the generic "parent selected" loop
296 for i := len(filename) - 1; i >= 0; i-- {
297 if filename[i] == '/' && (pm[filename[:i]] || pm[filename[:i+1]]) {
298 // Parent directory match
305 // Walk collfs and return the paths of all regular files that match.
306 func (pm pathmatcher) walk(collfs fs.FS) ([]string, error) {
307 var filepaths []string
308 err := fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
318 filepaths = append(filepaths, path)
321 return filepaths, err