5824: Fix up support for PDH in vhostname.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "fmt"
5         "html"
6         "io"
7         "mime"
8         "net/http"
9         "net/url"
10         "os"
11         "strings"
12         "time"
13
14         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
15         "git.curoverse.com/arvados.git/sdk/go/auth"
16         "git.curoverse.com/arvados.git/sdk/go/httpserver"
17         "git.curoverse.com/arvados.git/sdk/go/keepclient"
18 )
19
20 var clientPool = arvadosclient.MakeClientPool()
21
22 var anonymousTokens []string
23
24 type handler struct{}
25
26 func init() {
27         // TODO(TC): Get anonymousTokens from flags
28         anonymousTokens = []string{}
29 }
30
31 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
32 // otherwise return "".
33 func parseCollectionIdFromDNSName(s string) string {
34         // Strip domain.
35         if i := strings.IndexRune(s, '.'); i >= 0 {
36                 s = s[:i]
37         }
38         // Names like {uuid}--dl.example.com serve the same purpose as
39         // {uuid}.dl.example.com but can reduce cost/effort of using
40         // [additional] wildcard certificates.
41         if i := strings.Index(s, "--"); i >= 0 {
42                 s = s[:i]
43         }
44         if arvadosclient.UUIDMatch(s) {
45                 return s
46         }
47         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
48                 return pdh
49         }
50         return ""
51 }
52
53 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
54         var statusCode = 0
55         var statusText string
56
57         w := httpserver.WrapResponseWriter(wOrig)
58         defer func() {
59                 if statusCode == 0 {
60                         statusCode = w.WroteStatus()
61                 } else if w.WroteStatus() == 0 {
62                         w.WriteHeader(statusCode)
63                 } else if w.WroteStatus() != statusCode {
64                         httpserver.Log(r.RemoteAddr, "WARNING",
65                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
66                 }
67                 if statusText == "" {
68                         statusText = http.StatusText(statusCode)
69                 }
70                 httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
71         }()
72
73         if r.Method != "GET" && r.Method != "POST" {
74                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
75                 return
76         }
77
78         arv := clientPool.Get()
79         if arv == nil {
80                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
81                 return
82         }
83         defer clientPool.Put(arv)
84
85         pathParts := strings.Split(r.URL.Path[1:], "/")
86
87         var targetId string
88         var targetPath []string
89         var tokens []string
90         var reqTokens []string
91         var pathToken bool
92
93         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
94                 // "http://{id}.domain.example.com/{path}" form
95                 if t := r.FormValue("api_token"); t != "" {
96                         // ...with explicit token in query string or
97                         // form in POST body. We must encrypt the
98                         // token such that it can only be used for
99                         // this collection; put it in an HttpOnly
100                         // cookie; and redirect to the same URL with
101                         // the query param redacted, and method =
102                         // GET.
103                         //
104                         // The HttpOnly flag is necessary to prevent
105                         // JavaScript code (included in, or loaded by,
106                         // a page in the collection being served) from
107                         // employing the user's token beyond reading
108                         // other files in the same domain, i.e., same
109                         // the collection.
110                         //
111                         // The 303 redirect is necessary in the case
112                         // of a GET request to avoid exposing the
113                         // token in the Location bar, and in the case
114                         // of a POST request to avoid raising warnings
115                         // when the user refreshes the resulting page.
116                         http.SetCookie(w, &http.Cookie{
117                                 Name:    "api_token",
118                                 Value:   auth.EncodeTokenCookie([]byte(t)),
119                                 Path:    "/",
120                                 Expires: time.Now().AddDate(10,0,0),
121                         })
122                         redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
123
124                         w.Header().Add("Location", redir)
125                         statusCode, statusText = http.StatusSeeOther, redir
126                         w.WriteHeader(statusCode)
127                         io.WriteString(w, `<A href="`)
128                         io.WriteString(w, html.EscapeString(redir))
129                         io.WriteString(w, `">Continue</A>`)
130                         return
131                 } else if strings.HasPrefix(pathParts[0], "t=") {
132                         // ...with explicit token in path,
133                         // "{...}.com/t={token}/{path}".  This form
134                         // must only be used to pass scoped tokens
135                         // that give permission for a single
136                         // collection. See FormValue case above.
137                         tokens = []string{pathParts[0][2:]}
138                         targetPath = pathParts[1:]
139                         pathToken = true
140                 } else {
141                         // ...with cookie, Authorization header, or
142                         // no token at all
143                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
144                         tokens = append(reqTokens, anonymousTokens...)
145                         targetPath = pathParts
146                 }
147         } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
148                 statusCode = http.StatusNotFound
149                 return
150         } else if len(pathParts) >= 5 && pathParts[1] == "download" {
151                 // "/collections/download/{id}/{token}/path..." form:
152                 // Don't use our configured anonymous tokens,
153                 // Authorization headers, etc.  Just use the token in
154                 // the path.
155                 targetId = pathParts[2]
156                 tokens = []string{pathParts[3]}
157                 targetPath = pathParts[4:]
158                 pathToken = true
159         } else {
160                 // "/collections/{id}/path..." form
161                 targetId = pathParts[1]
162                 reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
163                 tokens = append(reqTokens, anonymousTokens...)
164                 targetPath = pathParts[2:]
165         }
166
167         tokenResult := make(map[string]int)
168         collection := make(map[string]interface{})
169         found := false
170         for _, arv.ApiToken = range tokens {
171                 err := arv.Get("collections", targetId, nil, &collection)
172                 if err == nil {
173                         // Success
174                         found = true
175                         break
176                 }
177                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
178                         switch srvErr.HttpStatusCode {
179                         case 404, 401:
180                                 // Token broken or insufficient to
181                                 // retrieve collection
182                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
183                                 continue
184                         }
185                 }
186                 // Something more serious is wrong
187                 statusCode, statusText = http.StatusInternalServerError, err.Error()
188                 return
189         }
190         if !found {
191                 if pathToken {
192                         // The URL is a "secret sharing link", but it
193                         // didn't work out. Asking the client for
194                         // additional credentials would just be
195                         // confusing.
196                         statusCode = http.StatusNotFound
197                         return
198                 }
199                 for _, t := range reqTokens {
200                         if tokenResult[t] == 404 {
201                                 // The client provided valid token(s), but the
202                                 // collection was not found.
203                                 statusCode = http.StatusNotFound
204                                 return
205                         }
206                 }
207                 // The client's token was invalid (e.g., expired), or
208                 // the client didn't even provide one.  Propagate the
209                 // 401 to encourage the client to use a [different]
210                 // token.
211                 //
212                 // TODO(TC): This response would be confusing to
213                 // someone trying (anonymously) to download public
214                 // data that has been deleted.  Allow a referrer to
215                 // provide this context somehow?
216                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
217                 statusCode = http.StatusUnauthorized
218                 return
219         }
220
221         filename := strings.Join(targetPath, "/")
222         kc, err := keepclient.MakeKeepClient(arv)
223         if err != nil {
224                 statusCode, statusText = http.StatusInternalServerError, err.Error()
225                 return
226         }
227         rdr, err := kc.CollectionFileReader(collection, filename)
228         if os.IsNotExist(err) {
229                 statusCode = http.StatusNotFound
230                 return
231         } else if err != nil {
232                 statusCode, statusText = http.StatusBadGateway, err.Error()
233                 return
234         }
235         defer rdr.Close()
236
237         // One or both of these can be -1 if not found:
238         basenamePos := strings.LastIndex(filename, "/")
239         extPos := strings.LastIndex(filename, ".")
240         if extPos > basenamePos {
241                 // Now extPos is safely >= 0.
242                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
243                         w.Header().Set("Content-Type", t)
244                 }
245         }
246         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
247
248         w.WriteHeader(http.StatusOK)
249         _, err = io.Copy(w, rdr)
250         if err != nil {
251                 statusCode, statusText = http.StatusBadGateway, err.Error()
252         }
253 }