14287: Handle collection/.../provenance and .../used_by requests.
[arvados.git] / services / keep-web / server_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "crypto/md5"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "net"
14         "net/http"
15         "os"
16         "os/exec"
17         "strings"
18         "testing"
19
20         "git.curoverse.com/arvados.git/sdk/go/arvados"
21         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
22         "git.curoverse.com/arvados.git/sdk/go/arvadostest"
23         "git.curoverse.com/arvados.git/sdk/go/keepclient"
24         check "gopkg.in/check.v1"
25 )
26
27 var testAPIHost = os.Getenv("ARVADOS_API_HOST")
28
29 var _ = check.Suite(&IntegrationSuite{})
30
31 // IntegrationSuite tests need an API server and a keep-web server
32 type IntegrationSuite struct {
33         testServer *server
34 }
35
36 func (s *IntegrationSuite) TestNoToken(c *check.C) {
37         for _, token := range []string{
38                 "",
39                 "bogustoken",
40         } {
41                 hdr, body, _ := s.runCurl(c, token, "collections.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
42                 c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
43                 c.Check(body, check.Equals, "")
44
45                 if token != "" {
46                         hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
47                         c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
48                         c.Check(body, check.Equals, "")
49                 }
50
51                 hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/bad-route")
52                 c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
53                 c.Check(body, check.Equals, "")
54         }
55 }
56
57 // TODO: Move most cases to functional tests -- at least use Go's own
58 // http client instead of forking curl. Just leave enough of an
59 // integration test to assure that the documented way of invoking curl
60 // really works against the server.
61 func (s *IntegrationSuite) Test404(c *check.C) {
62         for _, uri := range []string{
63                 // Routing errors (always 404 regardless of what's stored in Keep)
64                 "/foo",
65                 "/download",
66                 "/collections",
67                 "/collections/",
68                 // Implicit/generated index is not implemented yet;
69                 // until then, return 404.
70                 "/collections/" + arvadostest.FooCollection,
71                 "/collections/" + arvadostest.FooCollection + "/",
72                 "/collections/" + arvadostest.FooBarDirCollection + "/dir1",
73                 "/collections/" + arvadostest.FooBarDirCollection + "/dir1/",
74                 // Non-existent file in collection
75                 "/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
76                 "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
77                 // Non-existent collection
78                 "/collections/" + arvadostest.NonexistentCollection,
79                 "/collections/" + arvadostest.NonexistentCollection + "/",
80                 "/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
81                 "/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
82         } {
83                 hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
84                 c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
85                 if len(body) > 0 {
86                         c.Check(body, check.Equals, "404 page not found\n")
87                 }
88         }
89 }
90
91 func (s *IntegrationSuite) Test1GBFile(c *check.C) {
92         if testing.Short() {
93                 c.Skip("skipping 1GB integration test in short mode")
94         }
95         s.test100BlockFile(c, 10000000)
96 }
97
98 func (s *IntegrationSuite) Test100BlockFile(c *check.C) {
99         if testing.Short() {
100                 // 3 MB
101                 s.test100BlockFile(c, 30000)
102         } else {
103                 // 300 MB
104                 s.test100BlockFile(c, 3000000)
105         }
106 }
107
108 func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
109         testdata := make([]byte, blocksize)
110         for i := 0; i < blocksize; i++ {
111                 testdata[i] = byte(' ')
112         }
113         arv, err := arvadosclient.MakeArvadosClient()
114         c.Assert(err, check.Equals, nil)
115         arv.ApiToken = arvadostest.ActiveToken
116         kc, err := keepclient.MakeKeepClient(arv)
117         c.Assert(err, check.Equals, nil)
118         loc, _, err := kc.PutB(testdata[:])
119         c.Assert(err, check.Equals, nil)
120         mtext := "."
121         for i := 0; i < 100; i++ {
122                 mtext = mtext + " " + loc
123         }
124         mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
125         coll := map[string]interface{}{}
126         err = arv.Create("collections",
127                 map[string]interface{}{
128                         "collection": map[string]interface{}{
129                                 "name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
130                                 "manifest_text": mtext,
131                         },
132                 }, &coll)
133         c.Assert(err, check.Equals, nil)
134         uuid := coll["uuid"].(string)
135
136         hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".collections.example.com", "/testdata.bin")
137         c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
138         c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
139         c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
140         c.Check(size, check.Equals, int64(blocksize)*100)
141 }
142
143 type curlCase struct {
144         auth    string
145         host    string
146         path    string
147         dataMD5 string
148 }
149
150 func (s *IntegrationSuite) Test200(c *check.C) {
151         s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
152         for _, spec := range []curlCase{
153                 // My collection
154                 {
155                         auth:    arvadostest.ActiveToken,
156                         host:    arvadostest.FooCollection + "--collections.example.com",
157                         path:    "/foo",
158                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
159                 },
160                 {
161                         auth:    arvadostest.ActiveToken,
162                         host:    arvadostest.FooCollection + ".collections.example.com",
163                         path:    "/foo",
164                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
165                 },
166                 {
167                         host:    strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + ".collections.example.com",
168                         path:    "/t=" + arvadostest.ActiveToken + "/foo",
169                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
170                 },
171                 {
172                         path:    "/c=" + arvadostest.FooCollectionPDH + "/t=" + arvadostest.ActiveToken + "/foo",
173                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
174                 },
175                 {
176                         path:    "/c=" + strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
177                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
178                 },
179                 {
180                         path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
181                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
182                 },
183                 {
184                         auth:    "tokensobogus",
185                         path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
186                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
187                 },
188                 {
189                         auth:    arvadostest.ActiveToken,
190                         path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
191                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
192                 },
193                 {
194                         auth:    arvadostest.AnonymousToken,
195                         path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
196                         dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
197                 },
198
199                 // Anonymously accessible data
200                 {
201                         path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
202                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
203                 },
204                 {
205                         host:    arvadostest.HelloWorldCollection + ".collections.example.com",
206                         path:    "/Hello%20world.txt",
207                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
208                 },
209                 {
210                         host:    arvadostest.HelloWorldCollection + ".collections.example.com",
211                         path:    "/_/Hello%20world.txt",
212                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
213                 },
214                 {
215                         path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
216                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
217                 },
218                 {
219                         auth:    arvadostest.ActiveToken,
220                         path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
221                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
222                 },
223                 {
224                         auth:    arvadostest.SpectatorToken,
225                         path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
226                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
227                 },
228                 {
229                         auth:    arvadostest.SpectatorToken,
230                         host:    arvadostest.HelloWorldCollection + "--collections.example.com",
231                         path:    "/Hello%20world.txt",
232                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
233                 },
234                 {
235                         auth:    arvadostest.SpectatorToken,
236                         path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
237                         dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
238                 },
239         } {
240                 host := spec.host
241                 if host == "" {
242                         host = "collections.example.com"
243                 }
244                 hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
245                 c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
246                 if strings.HasSuffix(spec.path, ".txt") {
247                         c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
248                         // TODO: Check some types that aren't
249                         // automatically detected by Go's http server
250                         // by sniffing the content.
251                 }
252                 c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
253         }
254 }
255
256 // Return header block and body.
257 func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
258         curlArgs := []string{"--silent", "--show-error", "--include"}
259         testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
260         curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
261         if token != "" {
262                 curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
263         }
264         curlArgs = append(curlArgs, args...)
265         curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
266         c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
267         cmd := exec.Command("curl", curlArgs...)
268         stdout, err := cmd.StdoutPipe()
269         c.Assert(err, check.IsNil)
270         cmd.Stderr = os.Stderr
271         err = cmd.Start()
272         c.Assert(err, check.IsNil)
273         buf := make([]byte, 2<<27)
274         n, err := io.ReadFull(stdout, buf)
275         // Discard (but measure size of) anything past 128 MiB.
276         var discarded int64
277         if err == io.ErrUnexpectedEOF {
278                 buf = buf[:n]
279         } else {
280                 c.Assert(err, check.IsNil)
281                 discarded, err = io.Copy(ioutil.Discard, stdout)
282                 c.Assert(err, check.IsNil)
283         }
284         err = cmd.Wait()
285         // Without "-f", curl exits 0 as long as it gets a valid HTTP
286         // response from the server, even if the response status
287         // indicates that the request failed. In our test suite, we
288         // always expect a valid HTTP response, and we parse the
289         // headers ourselves. If curl exits non-zero, our testing
290         // environment is broken.
291         c.Assert(err, check.Equals, nil)
292         hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
293         c.Assert(len(hdrsAndBody), check.Equals, 2)
294         hdr = hdrsAndBody[0]
295         bodyPart = hdrsAndBody[1]
296         bodySize = int64(len(bodyPart)) + discarded
297         return
298 }
299
300 func (s *IntegrationSuite) TestMetrics(c *check.C) {
301         origin := "http://" + s.testServer.Addr
302         req, _ := http.NewRequest("GET", origin+"/notfound", nil)
303         _, err := http.DefaultClient.Do(req)
304         c.Assert(err, check.IsNil)
305         req, _ = http.NewRequest("GET", origin+"/by_id/", nil)
306         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
307         resp, err := http.DefaultClient.Do(req)
308         c.Assert(err, check.IsNil)
309         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
310         for i := 0; i < 2; i++ {
311                 req, _ = http.NewRequest("GET", origin+"/foo", nil)
312                 req.Host = arvadostest.FooCollection + ".example.com"
313                 req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
314                 resp, err = http.DefaultClient.Do(req)
315                 c.Assert(err, check.IsNil)
316                 c.Check(resp.StatusCode, check.Equals, http.StatusOK)
317                 buf, _ := ioutil.ReadAll(resp.Body)
318                 c.Check(buf, check.DeepEquals, []byte("foo"))
319                 resp.Body.Close()
320         }
321
322         s.testServer.Config.Cache.updateGauges()
323
324         req, _ = http.NewRequest("GET", origin+"/metrics.json", nil)
325         resp, err = http.DefaultClient.Do(req)
326         c.Assert(err, check.IsNil)
327         c.Check(resp.StatusCode, check.Equals, http.StatusUnauthorized)
328
329         req, _ = http.NewRequest("GET", origin+"/metrics.json", nil)
330         req.Header.Set("Authorization", "Bearer badtoken")
331         resp, err = http.DefaultClient.Do(req)
332         c.Assert(err, check.IsNil)
333         c.Check(resp.StatusCode, check.Equals, http.StatusForbidden)
334
335         req, _ = http.NewRequest("GET", origin+"/metrics.json", nil)
336         req.Header.Set("Authorization", "Bearer "+arvadostest.ManagementToken)
337         resp, err = http.DefaultClient.Do(req)
338         c.Assert(err, check.IsNil)
339         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
340         type summary struct {
341                 SampleCount string  `json:"sample_count"`
342                 SampleSum   float64 `json:"sample_sum"`
343                 Quantile    []struct {
344                         Quantile float64
345                         Value    float64
346                 }
347         }
348         type counter struct {
349                 Value int64
350         }
351         type gauge struct {
352                 Value float64
353         }
354         var ents []struct {
355                 Name   string
356                 Help   string
357                 Type   string
358                 Metric []struct {
359                         Label []struct {
360                                 Name  string
361                                 Value string
362                         }
363                         Counter counter
364                         Gauge   gauge
365                         Summary summary
366                 }
367         }
368         json.NewDecoder(resp.Body).Decode(&ents)
369         summaries := map[string]summary{}
370         gauges := map[string]gauge{}
371         counters := map[string]counter{}
372         for _, e := range ents {
373                 for _, m := range e.Metric {
374                         labels := map[string]string{}
375                         for _, lbl := range m.Label {
376                                 labels[lbl.Name] = lbl.Value
377                         }
378                         summaries[e.Name+"/"+labels["method"]+"/"+labels["code"]] = m.Summary
379                         counters[e.Name+"/"+labels["method"]+"/"+labels["code"]] = m.Counter
380                         gauges[e.Name+"/"+labels["method"]+"/"+labels["code"]] = m.Gauge
381                 }
382         }
383         c.Check(summaries["request_duration_seconds/get/200"].SampleSum, check.Not(check.Equals), 0)
384         c.Check(summaries["request_duration_seconds/get/200"].SampleCount, check.Equals, "3")
385         c.Check(summaries["request_duration_seconds/get/404"].SampleCount, check.Equals, "1")
386         c.Check(summaries["time_to_status_seconds/get/404"].SampleCount, check.Equals, "1")
387         c.Check(counters["arvados_keepweb_collectioncache_requests//"].Value, check.Equals, int64(2))
388         c.Check(counters["arvados_keepweb_collectioncache_api_calls//"].Value, check.Equals, int64(1))
389         c.Check(counters["arvados_keepweb_collectioncache_hits//"].Value, check.Equals, int64(1))
390         c.Check(counters["arvados_keepweb_collectioncache_pdh_hits//"].Value, check.Equals, int64(1))
391         c.Check(counters["arvados_keepweb_collectioncache_permission_hits//"].Value, check.Equals, int64(1))
392         c.Check(gauges["arvados_keepweb_collectioncache_cached_manifests//"].Value, check.Equals, float64(1))
393         // FooCollection's cached manifest size is 45 ("1f4b0....+45") plus one 51-byte blob signature
394         c.Check(gauges["arvados_keepweb_collectioncache_cached_manifest_bytes//"].Value, check.Equals, float64(45+51))
395
396         // If the Host header indicates a collection, /metrics.json
397         // refers to a file in the collection -- the metrics handler
398         // must not intercept that route.
399         req, _ = http.NewRequest("GET", origin+"/metrics.json", nil)
400         req.Host = strings.Replace(arvadostest.FooCollectionPDH, "+", "-", -1) + ".example.com"
401         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
402         resp, err = http.DefaultClient.Do(req)
403         c.Assert(err, check.IsNil)
404         c.Check(resp.StatusCode, check.Equals, http.StatusNotFound)
405 }
406
407 func (s *IntegrationSuite) SetUpSuite(c *check.C) {
408         arvadostest.StartAPI()
409         arvadostest.StartKeep(2, true)
410
411         arv, err := arvadosclient.MakeArvadosClient()
412         c.Assert(err, check.Equals, nil)
413         arv.ApiToken = arvadostest.ActiveToken
414         kc, err := keepclient.MakeKeepClient(arv)
415         c.Assert(err, check.Equals, nil)
416         kc.PutB([]byte("Hello world\n"))
417         kc.PutB([]byte("foo"))
418         kc.PutB([]byte("foobar"))
419         kc.PutB([]byte("waz"))
420 }
421
422 func (s *IntegrationSuite) TearDownSuite(c *check.C) {
423         arvadostest.StopKeep(2)
424         arvadostest.StopAPI()
425 }
426
427 func (s *IntegrationSuite) SetUpTest(c *check.C) {
428         arvadostest.ResetEnv()
429         cfg := DefaultConfig()
430         cfg.Client = arvados.Client{
431                 APIHost:  testAPIHost,
432                 Insecure: true,
433         }
434         cfg.Listen = "127.0.0.1:0"
435         cfg.ManagementToken = arvadostest.ManagementToken
436         s.testServer = &server{Config: cfg}
437         err := s.testServer.Start()
438         c.Assert(err, check.Equals, nil)
439 }
440
441 func (s *IntegrationSuite) TearDownTest(c *check.C) {
442         var err error
443         if s.testServer != nil {
444                 err = s.testServer.Close()
445         }
446         c.Check(err, check.Equals, nil)
447 }
448
449 // Gocheck boilerplate
450 func Test(t *testing.T) {
451         check.TestingT(t)
452 }