5824: Add some clarifying comments and golint/vet/fmt fixes.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "flag"
5         "fmt"
6         "html"
7         "io"
8         "mime"
9         "net/http"
10         "net/url"
11         "os"
12         "strings"
13         "time"
14
15         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
16         "git.curoverse.com/arvados.git/sdk/go/auth"
17         "git.curoverse.com/arvados.git/sdk/go/httpserver"
18         "git.curoverse.com/arvados.git/sdk/go/keepclient"
19 )
20
21 type handler struct{}
22
23 var (
24         clientPool         = arvadosclient.MakeClientPool()
25         trustAllContent    = false
26         anonymousTokens    []string
27         attachmentOnlyHost = ""
28 )
29
30 func init() {
31         flag.BoolVar(&trustAllContent, "trust-all-content", false,
32                 "Serve non-public content from a single origin. Dangerous: read docs before using!")
33         flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
34                 "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
35 }
36
37 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
38 // otherwise return "".
39 func parseCollectionIDFromDNSName(s string) string {
40         // Strip domain.
41         if i := strings.IndexRune(s, '.'); i >= 0 {
42                 s = s[:i]
43         }
44         // Names like {uuid}--collections.example.com serve the same
45         // purpose as {uuid}.collections.example.com but can reduce
46         // cost/effort of using [additional] wildcard certificates.
47         if i := strings.Index(s, "--"); i >= 0 {
48                 s = s[:i]
49         }
50         if arvadosclient.UUIDMatch(s) {
51                 return s
52         }
53         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
54                 return pdh
55         }
56         return ""
57 }
58
59 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
60
61 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
62 // with "+" replaced by " " or "-"); otherwise return "".
63 func parseCollectionIDFromURL(s string) string {
64         if arvadosclient.UUIDMatch(s) {
65                 return s
66         }
67         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
68                 return pdh
69         }
70         return ""
71 }
72
73 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
74         var statusCode = 0
75         var statusText string
76
77         remoteAddr := r.RemoteAddr
78         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
79                 remoteAddr = xff + "," + remoteAddr
80         }
81
82         w := httpserver.WrapResponseWriter(wOrig)
83         defer func() {
84                 if statusCode == 0 {
85                         statusCode = w.WroteStatus()
86                 } else if w.WroteStatus() == 0 {
87                         w.WriteHeader(statusCode)
88                 } else if w.WroteStatus() != statusCode {
89                         httpserver.Log(r.RemoteAddr, "WARNING",
90                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
91                 }
92                 if statusText == "" {
93                         statusText = http.StatusText(statusCode)
94                 }
95                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
96         }()
97
98         if r.Method != "GET" && r.Method != "POST" {
99                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
100                 return
101         }
102
103         arv := clientPool.Get()
104         if arv == nil {
105                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
106                 return
107         }
108         defer clientPool.Put(arv)
109
110         pathParts := strings.Split(r.URL.Path[1:], "/")
111
112         var targetID string
113         var targetPath []string
114         var tokens []string
115         var reqTokens []string
116         var pathToken bool
117         var attachment bool
118         credentialsOK := trustAllContent
119
120         if r.Host != "" && r.Host == attachmentOnlyHost {
121                 credentialsOK = true
122                 attachment = true
123         } else if r.FormValue("disposition") == "attachment" {
124                 attachment = true
125         }
126
127         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
128                 // http://ID.collections.example/PATH...
129                 credentialsOK = true
130                 targetPath = pathParts
131         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
132                 // /c=ID/PATH...
133                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
134                 targetPath = pathParts[1:]
135         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
136                 if len(pathParts) >= 5 && pathParts[1] == "download" {
137                         // /collections/download/ID/TOKEN/PATH...
138                         targetID = pathParts[2]
139                         tokens = []string{pathParts[3]}
140                         targetPath = pathParts[4:]
141                         pathToken = true
142                 } else {
143                         // /collections/ID/PATH...
144                         targetID = pathParts[1]
145                         tokens = anonymousTokens
146                         targetPath = pathParts[2:]
147                 }
148         } else {
149                 statusCode = http.StatusNotFound
150                 return
151         }
152         if t := r.FormValue("api_token"); t != "" {
153                 // The client provided an explicit token in the query
154                 // string, or a form in POST body. We must put the
155                 // token in an HttpOnly cookie, and redirect to the
156                 // same URL with the query param redacted and method =
157                 // GET.
158
159                 if !credentialsOK {
160                         // It is not safe to copy the provided token
161                         // into a cookie unless the current vhost
162                         // (origin) serves only a single collection or
163                         // we are in trustAllContent mode.
164                         statusCode = http.StatusBadRequest
165                         return
166                 }
167
168                 // The HttpOnly flag is necessary to prevent
169                 // JavaScript code (included in, or loaded by, a page
170                 // in the collection being served) from employing the
171                 // user's token beyond reading other files in the same
172                 // domain, i.e., same collection.
173                 //
174                 // The 303 redirect is necessary in the case of a GET
175                 // request to avoid exposing the token in the Location
176                 // bar, and in the case of a POST request to avoid
177                 // raising warnings when the user refreshes the
178                 // resulting page.
179
180                 http.SetCookie(w, &http.Cookie{
181                         Name:     "arvados_api_token",
182                         Value:    auth.EncodeTokenCookie([]byte(t)),
183                         Path:     "/",
184                         Expires:  time.Now().AddDate(10, 0, 0),
185                         HttpOnly: true,
186                 })
187                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
188
189                 w.Header().Add("Location", redir)
190                 statusCode, statusText = http.StatusSeeOther, redir
191                 w.WriteHeader(statusCode)
192                 io.WriteString(w, `<A href="`)
193                 io.WriteString(w, html.EscapeString(redir))
194                 io.WriteString(w, `">Continue</A>`)
195                 return
196         }
197
198         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
199                 // http://ID.example/t=TOKEN/PATH...
200                 // /c=ID/t=TOKEN/PATH...
201                 //
202                 // This form must only be used to pass scoped tokens
203                 // that give permission for a single collection. See
204                 // FormValue case above.
205                 tokens = []string{targetPath[0][2:]}
206                 pathToken = true
207                 targetPath = targetPath[1:]
208         }
209
210         if tokens == nil {
211                 if credentialsOK {
212                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
213                 }
214                 tokens = append(reqTokens, anonymousTokens...)
215         }
216
217         if len(targetPath) > 0 && targetPath[0] == "_" {
218                 // If a collection has a directory called "t=foo" or
219                 // "_", it can be served at
220                 // //collections.example/_/t=foo/ or
221                 // //collections.example/_/_/ respectively:
222                 // //collections.example/t=foo/ won't work because
223                 // t=foo will be interpreted as a token "foo".
224                 targetPath = targetPath[1:]
225         }
226
227         tokenResult := make(map[string]int)
228         collection := make(map[string]interface{})
229         found := false
230         for _, arv.ApiToken = range tokens {
231                 err := arv.Get("collections", targetID, nil, &collection)
232                 if err == nil {
233                         // Success
234                         found = true
235                         break
236                 }
237                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
238                         switch srvErr.HttpStatusCode {
239                         case 404, 401:
240                                 // Token broken or insufficient to
241                                 // retrieve collection
242                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
243                                 continue
244                         }
245                 }
246                 // Something more serious is wrong
247                 statusCode, statusText = http.StatusInternalServerError, err.Error()
248                 return
249         }
250         if !found {
251                 if pathToken || !credentialsOK {
252                         // Either the URL is a "secret sharing link"
253                         // that didn't work out (and asking the client
254                         // for additional credentials would just be
255                         // confusing), or we don't even accept
256                         // credentials at this path.
257                         statusCode = http.StatusNotFound
258                         return
259                 }
260                 for _, t := range reqTokens {
261                         if tokenResult[t] == 404 {
262                                 // The client provided valid token(s), but the
263                                 // collection was not found.
264                                 statusCode = http.StatusNotFound
265                                 return
266                         }
267                 }
268                 // The client's token was invalid (e.g., expired), or
269                 // the client didn't even provide one.  Propagate the
270                 // 401 to encourage the client to use a [different]
271                 // token.
272                 //
273                 // TODO(TC): This response would be confusing to
274                 // someone trying (anonymously) to download public
275                 // data that has been deleted.  Allow a referrer to
276                 // provide this context somehow?
277                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
278                 statusCode = http.StatusUnauthorized
279                 return
280         }
281
282         filename := strings.Join(targetPath, "/")
283         kc, err := keepclient.MakeKeepClient(arv)
284         if err != nil {
285                 statusCode, statusText = http.StatusInternalServerError, err.Error()
286                 return
287         }
288         rdr, err := kc.CollectionFileReader(collection, filename)
289         if os.IsNotExist(err) {
290                 statusCode = http.StatusNotFound
291                 return
292         } else if err != nil {
293                 statusCode, statusText = http.StatusBadGateway, err.Error()
294                 return
295         }
296         defer rdr.Close()
297
298         // One or both of these can be -1 if not found:
299         basenamePos := strings.LastIndex(filename, "/")
300         extPos := strings.LastIndex(filename, ".")
301         if extPos > basenamePos {
302                 // Now extPos is safely >= 0.
303                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
304                         w.Header().Set("Content-Type", t)
305                 }
306         }
307         if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
308                 w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
309         }
310         if attachment {
311                 w.Header().Set("Content-Disposition", "attachment")
312         }
313
314         w.WriteHeader(http.StatusOK)
315         _, err = io.Copy(w, rdr)
316         if err != nil {
317                 statusCode, statusText = http.StatusBadGateway, err.Error()
318         }
319 }