1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/sdk/go/arvados"
21 "git.arvados.org/arvados.git/sdk/go/ctxlog"
24 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
26 type zipParams struct {
28 IncludeCollectionMetadata bool `json:"include_collection_metadata"`
31 // serveZip handles a request for a zip archive.
32 func (h *handler) serveZip(w http.ResponseWriter, r *http.Request, session *cachedSession, sitefs arvados.CustomFileSystem, ziproot string, tokenUser *arvados.User) {
33 if r.Method != "GET" && r.Method != "HEAD" && r.Method != "POST" {
34 // This is a generic 400, not 405 (method not allowed)
35 // because this method/URL combination is allowed,
36 // just not with the Accept: application/zip header.
37 http.Error(w, "zip archive can only be served via GET, HEAD, or POST", http.StatusBadRequest)
40 // Check "GET" permission regardless of r.Method, because all
41 // methods result in downloads.
42 if !h.userPermittedToUploadOrDownload("GET", tokenUser) {
43 http.Error(w, "Not permitted", http.StatusForbidden)
46 coll, subdir := h.determineCollection(sitefs, ziproot)
47 if coll == nil || subdir != "" {
48 http.Error(w, "zip archive can only be served from the root directory of a collection", http.StatusBadRequest)
52 // Load params from query and post form
56 http.Error(w, err.Error(), http.StatusBadRequest)
59 params.Files = r.Form["files"]
60 params.IncludeCollectionMetadata = r.Form.Get("include_collection_metadata") != ""
62 // Load params from JSON request body
63 if params.Files == nil && r.Header.Get("Content-Type") == "application/json" {
64 // r.Body is always non-nil, but will return EOF
65 // immediately if no body is present.
66 err := json.NewDecoder(r.Body).Decode(¶ms)
67 if err != nil && err != io.EOF {
68 http.Error(w, "error reading request body: "+err.Error(), http.StatusBadRequest)
73 // Check that the supplied files/dirs actually exist, and use
74 // pathmatcher to build a list of all matching files in the
76 collfs, err := fs.Sub(arvados.FS(sitefs), strings.TrimSuffix(ziproot, "/"))
78 http.Error(w, err.Error(), http.StatusInternalServerError)
81 matcher := make(pathmatcher)
82 for _, path := range params.Files {
86 } else if f, err := collfs.Open(strings.TrimSuffix(path, "/")); err != nil {
87 http.Error(w, fmt.Sprintf("%q: file does not exist", path), http.StatusNotFound)
93 filepaths, err := matcher.walk(collfs)
95 http.Error(w, err.Error(), http.StatusInternalServerError)
99 // (Unless fetching by PDH) get additional collection details
100 // for logging, collection metadata file, and suggested
101 // filename for user agent.
102 var zipfilename string
104 zipfilename = coll.PortableDataHash
106 err = session.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
111 "modified_by_user_uuid",
113 "portable_data_hash",
119 if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
120 http.Error(w, err.Error(), he.HTTPStatus())
122 http.Error(w, err.Error(), http.StatusInternalServerError)
126 zipfilename = coll.Name
128 if len(filepaths) == 1 && len(params.Files) == 1 && filepaths[0] == params.Files[0] {
129 // If the client specified a single (non-directory)
130 // file, include the name of the file in the zip
132 _, basename := filepath.Split(filepaths[0])
133 zipfilename += " - " + basename
134 } else if len(matcher) > 0 && !matcher["/"] {
135 // If the client specified any other subset of the
136 // collection, mention the number of files that will
137 // be in the archive, to make it more obvious that
138 // it's not an archive of the entire collection.
139 zipfilename += fmt.Sprintf(" - %d files", len(filepaths))
141 zipfilename += ".zip"
144 if len(filepaths) == 1 {
145 // If downloading a zip file with exactly one file,
146 // log that file as collection_file_path in the audit
147 // logs. (Otherwise, leave collection_file_path
149 logpath = filepaths[0]
151 rGET := r.Clone(r.Context())
153 h.logUploadOrDownload(rGET, session.arvadosclient, session.fs, logpath, len(filepaths), coll, tokenUser)
155 // Get additional user details for last-modified-by user, to
156 // include in the collection metadata file.
157 var user arvados.User
158 if params.IncludeCollectionMetadata && coll.ModifiedByUserUUID != "" {
159 err = session.client.RequestAndDecode(&user, "GET", "arvados/v1/users/"+coll.ModifiedByUserUUID, nil, map[string]interface{}{
165 // RailsAPI <= 3.1 fails if we select
166 // full_name without also selecting
167 // first_name and last_name.
172 if he := errorWithHTTPStatus(nil); errors.As(err, &he) && he.HTTPStatus() < 500 {
173 // Cannot retrieve the user record, but this
174 // shouldn't prevent the download from
176 } else if errors.As(err, &he) {
177 http.Error(w, err.Error(), he.HTTPStatus())
179 } else if err != nil {
180 http.Error(w, err.Error(), http.StatusInternalServerError)
185 err = h.writeZip(w, coll, collfs, zipfilename, filepaths, params, user)
187 ctxlog.FromContext(r.Context()).Errorf("error writing zip archive after sending response header: %s", err)
191 func (h *handler) writeZip(w http.ResponseWriter, coll *arvados.Collection, collfs fs.FS, zipfilename string, filepaths []string, params zipParams, user arvados.User) error {
192 // Note mime.FormatMediaType() also sets the "filename*" param
193 // if zipfilename contains non-ASCII chars, as recommended by
195 w.Header().Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": zipfilename}))
196 w.Header().Set("Content-Type", "application/zip")
197 w.WriteHeader(http.StatusOK)
198 zipw := zip.NewWriter(w)
200 u := url.URL(h.Cluster.Services.WebDAVDownload.ExternalURL)
202 u.Path = "/by_id/" + coll.UUID + "/"
204 u.Path = "/by_id/" + coll.PortableDataHash + "/"
206 err := zipw.SetComment(fmt.Sprintf("Downloaded from %s", u.String()))
210 if params.IncludeCollectionMetadata {
211 m := map[string]interface{}{
212 "portable_data_hash": coll.PortableDataHash,
215 m["uuid"] = coll.UUID
216 m["name"] = coll.Name
217 m["properties"] = coll.Properties
218 m["created_at"] = coll.CreatedAt.Format(rfc3339NanoFixed)
219 m["modified_at"] = coll.ModifiedAt.Format(rfc3339NanoFixed)
220 m["description"] = coll.Description
223 m["modified_by_user"] = map[string]interface{}{
225 "full_name": user.FullName,
226 "username": user.Username,
230 zipf, err := zipw.CreateHeader(&zip.FileHeader{
231 Name: "collection.json",
237 err = json.NewEncoder(zipf).Encode(m)
242 for _, path := range filepaths {
243 f, err := collfs.Open(path)
248 w, err := zipw.CreateHeader(&zip.FileHeader{
256 _, err = io.Copy(w, f)
265 type pathmatcher map[string]bool
267 func (pm pathmatcher) match(filename string) bool {
269 // No paths given ==> include all files
273 // Exact filename match
277 // Entire collection selected (special case not
278 // covered by the generic "parent selected" loop
282 for i := len(filename) - 1; i >= 0; i-- {
283 if filename[i] == '/' && (pm[filename[:i]] || pm[filename[:i+1]]) {
284 // Parent directory match
291 // Walk collfs and return the paths of all regular files that match.
292 func (pm pathmatcher) walk(collfs fs.FS) ([]string, error) {
293 var filepaths []string
294 err := fs.WalkDir(collfs, ".", func(path string, dirent fs.DirEntry, err error) error {
304 filepaths = append(filepaths, path)
307 return filepaths, err