5824: Fail at startup if ARVADOS_API_HOST is not set.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "fmt"
5         "html"
6         "io"
7         "mime"
8         "net/http"
9         "net/url"
10         "os"
11         "strings"
12         "time"
13
14         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
15         "git.curoverse.com/arvados.git/sdk/go/auth"
16         "git.curoverse.com/arvados.git/sdk/go/httpserver"
17         "git.curoverse.com/arvados.git/sdk/go/keepclient"
18 )
19
20 type handler struct{}
21
22 var clientPool = arvadosclient.MakeClientPool()
23 var anonymousTokens []string
24
25 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
26 // otherwise return "".
27 func parseCollectionIdFromDNSName(s string) string {
28         // Strip domain.
29         if i := strings.IndexRune(s, '.'); i >= 0 {
30                 s = s[:i]
31         }
32         // Names like {uuid}--dl.example.com serve the same purpose as
33         // {uuid}.dl.example.com but can reduce cost/effort of using
34         // [additional] wildcard certificates.
35         if i := strings.Index(s, "--"); i >= 0 {
36                 s = s[:i]
37         }
38         if arvadosclient.UUIDMatch(s) {
39                 return s
40         }
41         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
42                 return pdh
43         }
44         return ""
45 }
46
47 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
48
49 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
50 // with "+" replaced by " " or "-"); otherwise return "".
51 func parseCollectionIdFromURL(s string) string {
52         if arvadosclient.UUIDMatch(s) {
53                 return s
54         }
55         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
56                 return pdh
57         }
58         return ""
59 }
60
61 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
62         var statusCode = 0
63         var statusText string
64
65         w := httpserver.WrapResponseWriter(wOrig)
66         defer func() {
67                 if statusCode == 0 {
68                         statusCode = w.WroteStatus()
69                 } else if w.WroteStatus() == 0 {
70                         w.WriteHeader(statusCode)
71                 } else if w.WroteStatus() != statusCode {
72                         httpserver.Log(r.RemoteAddr, "WARNING",
73                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
74                 }
75                 if statusText == "" {
76                         statusText = http.StatusText(statusCode)
77                 }
78                 httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
79         }()
80
81         if r.Method != "GET" && r.Method != "POST" {
82                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
83                 return
84         }
85
86         arv := clientPool.Get()
87         if arv == nil {
88                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
89                 return
90         }
91         defer clientPool.Put(arv)
92
93         pathParts := strings.Split(r.URL.Path[1:], "/")
94
95         var targetId string
96         var targetPath []string
97         var tokens []string
98         var reqTokens []string
99         var pathToken bool
100         var credentialsOK bool
101
102         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
103                 // http://ID.dl.example/PATH...
104                 credentialsOK = true
105                 targetPath = pathParts
106         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
107                 // /c=ID/PATH...
108                 targetId = parseCollectionIdFromURL(pathParts[0][2:])
109                 targetPath = pathParts[1:]
110         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
111                 if len(pathParts) >= 5 && pathParts[1] == "download" {
112                         // /collections/download/ID/TOKEN/PATH...
113                         targetId = pathParts[2]
114                         tokens = []string{pathParts[3]}
115                         targetPath = pathParts[4:]
116                         pathToken = true
117                 } else {
118                         // /collections/ID/PATH...
119                         targetId = pathParts[1]
120                         tokens = anonymousTokens
121                         targetPath = pathParts[2:]
122                 }
123         } else {
124                 statusCode = http.StatusNotFound
125                 return
126         }
127         if t := r.FormValue("api_token"); t != "" {
128                 // The client provided an explicit token in the query
129                 // string, or a form in POST body. We must put the
130                 // token in an HttpOnly cookie, and redirect to the
131                 // same URL with the query param redacted and method =
132                 // GET.
133
134                 if !credentialsOK {
135                         // It is not safe to copy the provided token
136                         // into a cookie unless the current vhost
137                         // (origin) serves only a single collection.
138                         statusCode = http.StatusBadRequest
139                         return
140                 }
141
142                 // The HttpOnly flag is necessary to prevent
143                 // JavaScript code (included in, or loaded by, a page
144                 // in the collection being served) from employing the
145                 // user's token beyond reading other files in the same
146                 // domain, i.e., same collection.
147                 //
148                 // The 303 redirect is necessary in the case of a GET
149                 // request to avoid exposing the token in the Location
150                 // bar, and in the case of a POST request to avoid
151                 // raising warnings when the user refreshes the
152                 // resulting page.
153
154                 http.SetCookie(w, &http.Cookie{
155                         Name:     "api_token",
156                         Value:    auth.EncodeTokenCookie([]byte(t)),
157                         Path:     "/",
158                         Expires:  time.Now().AddDate(10,0,0),
159                         HttpOnly: true,
160                 })
161                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
162
163                 w.Header().Add("Location", redir)
164                 statusCode, statusText = http.StatusSeeOther, redir
165                 w.WriteHeader(statusCode)
166                 io.WriteString(w, `<A href="`)
167                 io.WriteString(w, html.EscapeString(redir))
168                 io.WriteString(w, `">Continue</A>`)
169                 return
170         }
171
172         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
173                 // http://ID.example/t=TOKEN/PATH...
174                 // /c=ID/t=TOKEN/PATH...
175                 //
176                 // This form must only be used to pass scoped tokens
177                 // that give permission for a single collection. See
178                 // FormValue case above.
179                 tokens = []string{targetPath[0][2:]}
180                 pathToken = true
181                 targetPath = targetPath[1:]
182         }
183
184         if tokens == nil {
185                 if credentialsOK {
186                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
187                 }
188                 tokens = append(reqTokens, anonymousTokens...)
189         }
190
191         if len(targetPath) > 0 && targetPath[0] == "_" {
192                 // If a collection has a directory called "t=foo" or
193                 // "_", it can be served at //dl.example/_/t=foo/ or
194                 // //dl.example/_/_/ respectively: //dl.example/t=foo/
195                 // won't work because t=foo will be interpreted as a
196                 // token "foo".
197                 targetPath = targetPath[1:]
198         }
199
200         tokenResult := make(map[string]int)
201         collection := make(map[string]interface{})
202         found := false
203         for _, arv.ApiToken = range tokens {
204                 err := arv.Get("collections", targetId, nil, &collection)
205                 if err == nil {
206                         // Success
207                         found = true
208                         break
209                 }
210                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
211                         switch srvErr.HttpStatusCode {
212                         case 404, 401:
213                                 // Token broken or insufficient to
214                                 // retrieve collection
215                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
216                                 continue
217                         }
218                 }
219                 // Something more serious is wrong
220                 statusCode, statusText = http.StatusInternalServerError, err.Error()
221                 return
222         }
223         if !found {
224                 if pathToken || !credentialsOK {
225                         // Either the URL is a "secret sharing link"
226                         // that didn't work out (and asking the client
227                         // for additional credentials would just be
228                         // confusing), or we don't even accept
229                         // credentials at this path.
230                         statusCode = http.StatusNotFound
231                         return
232                 }
233                 for _, t := range reqTokens {
234                         if tokenResult[t] == 404 {
235                                 // The client provided valid token(s), but the
236                                 // collection was not found.
237                                 statusCode = http.StatusNotFound
238                                 return
239                         }
240                 }
241                 // The client's token was invalid (e.g., expired), or
242                 // the client didn't even provide one.  Propagate the
243                 // 401 to encourage the client to use a [different]
244                 // token.
245                 //
246                 // TODO(TC): This response would be confusing to
247                 // someone trying (anonymously) to download public
248                 // data that has been deleted.  Allow a referrer to
249                 // provide this context somehow?
250                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
251                 statusCode = http.StatusUnauthorized
252                 return
253         }
254
255         filename := strings.Join(targetPath, "/")
256         kc, err := keepclient.MakeKeepClient(arv)
257         if err != nil {
258                 statusCode, statusText = http.StatusInternalServerError, err.Error()
259                 return
260         }
261         rdr, err := kc.CollectionFileReader(collection, filename)
262         if os.IsNotExist(err) {
263                 statusCode = http.StatusNotFound
264                 return
265         } else if err != nil {
266                 statusCode, statusText = http.StatusBadGateway, err.Error()
267                 return
268         }
269         defer rdr.Close()
270
271         // One or both of these can be -1 if not found:
272         basenamePos := strings.LastIndex(filename, "/")
273         extPos := strings.LastIndex(filename, ".")
274         if extPos > basenamePos {
275                 // Now extPos is safely >= 0.
276                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
277                         w.Header().Set("Content-Type", t)
278                 }
279         }
280         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
281
282         w.WriteHeader(http.StatusOK)
283         _, err = io.Copy(w, rdr)
284         if err != nil {
285                 statusCode, statusText = http.StatusBadGateway, err.Error()
286         }
287 }