1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
19 "git.curoverse.com/arvados.git/lib/config"
20 "git.curoverse.com/arvados.git/sdk/go/arvados"
21 "git.curoverse.com/arvados.git/sdk/go/arvadostest"
22 "git.curoverse.com/arvados.git/sdk/go/ctxlog"
23 "github.com/prometheus/client_golang/prometheus"
24 check "gopkg.in/check.v1"
27 var _ = check.Suite(&runSuite{})
29 type reqTracker struct {
34 func (rt *reqTracker) Count() int {
40 func (rt *reqTracker) Add(req *http.Request) int {
43 rt.reqs = append(rt.reqs, *req)
47 var stubServices = []arvados.KeepService{
49 UUID: "zzzzz-bi6l4-000000000000000",
50 ServiceHost: "keep0.zzzzz.arvadosapi.com",
52 ServiceSSLFlag: false,
56 UUID: "zzzzz-bi6l4-000000000000001",
57 ServiceHost: "keep1.zzzzz.arvadosapi.com",
59 ServiceSSLFlag: false,
63 UUID: "zzzzz-bi6l4-000000000000002",
64 ServiceHost: "keep2.zzzzz.arvadosapi.com",
66 ServiceSSLFlag: false,
70 UUID: "zzzzz-bi6l4-000000000000003",
71 ServiceHost: "keep3.zzzzz.arvadosapi.com",
73 ServiceSSLFlag: false,
77 UUID: "zzzzz-bi6l4-h0a0xwut9qa6g3a",
78 ServiceHost: "keep.zzzzz.arvadosapi.com",
85 var stubMounts = map[string][]arvados.KeepMount{
86 "keep0.zzzzz.arvadosapi.com:25107": {{
87 UUID: "zzzzz-ivpuk-000000000000000",
88 DeviceID: "keep0-vol0",
90 "keep1.zzzzz.arvadosapi.com:25107": {{
91 UUID: "zzzzz-ivpuk-100000000000000",
92 DeviceID: "keep1-vol0",
94 "keep2.zzzzz.arvadosapi.com:25107": {{
95 UUID: "zzzzz-ivpuk-200000000000000",
96 DeviceID: "keep2-vol0",
98 "keep3.zzzzz.arvadosapi.com:25107": {{
99 UUID: "zzzzz-ivpuk-300000000000000",
100 DeviceID: "keep3-vol0",
104 // stubServer is an HTTP transport that intercepts and processes all
105 // requests using its own handlers.
106 type stubServer struct {
111 logf func(string, ...interface{})
114 // Start initializes the stub server and returns an *http.Client that
115 // uses the stub server to handle all requests.
117 // A stubServer that has been started should eventually be shut down
119 func (s *stubServer) Start() *http.Client {
120 // Set up a config.Client that forwards all requests to s.mux
121 // via s.srv. Test cases will attach handlers to s.mux to get
122 // the desired responses.
123 s.mux = http.NewServeMux()
124 s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
128 w.Header().Set("Content-Type", "application/json")
129 s.mux.ServeHTTP(w, r)
131 return &http.Client{Transport: s}
134 func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
135 w := httptest.NewRecorder()
136 s.mux.ServeHTTP(w, req)
137 return &http.Response{
139 Status: fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
141 Body: ioutil.NopCloser(w.Body)}, nil
144 // Close releases resources used by the server.
145 func (s *stubServer) Close() {
149 func (s *stubServer) serveStatic(path, data string) *reqTracker {
151 s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
154 ioutil.ReadAll(r.Body)
157 io.WriteString(w, data)
162 func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
163 return s.serveStatic("/arvados/v1/users/current",
164 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
167 func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
168 return s.serveStatic("/arvados/v1/users/current",
169 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
172 func (s *stubServer) serveDiscoveryDoc() *reqTracker {
173 return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
174 `{"defaultCollectionReplication":2}`)
177 func (s *stubServer) serveZeroCollections() *reqTracker {
178 return s.serveStatic("/arvados/v1/collections",
179 `{"items":[],"items_available":0}`)
182 func (s *stubServer) serveFooBarFileCollections() *reqTracker {
184 s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
187 if strings.Contains(r.Form.Get("filters"), `modified_at`) {
188 io.WriteString(w, `{"items_available":0,"items":[]}`)
190 io.WriteString(w, `{"items_available":3,"items":[
191 {"uuid":"zzzzz-4zz18-aaaaaaaaaaaaaaa","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
192 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
193 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
199 func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
201 s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
204 if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
205 io.WriteString(w, `{"items_available":3,"items":[]}`)
206 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
207 io.WriteString(w, `{"items_available":0,"items":[]}`)
208 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
209 io.WriteString(w, `{"items_available":0,"items":[]}`)
210 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","=",null`) {
211 io.WriteString(w, `{"items_available":0,"items":[]}`)
213 io.WriteString(w, `{"items_available":2,"items":[
214 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
215 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
221 func (s *stubServer) serveZeroKeepServices() *reqTracker {
222 return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
225 func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
226 return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
227 ItemsAvailable: len(svcs),
232 func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
234 s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
236 json.NewEncoder(w).Encode(resp)
241 func (s *stubServer) serveKeepstoreMounts() *reqTracker {
243 s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
245 json.NewEncoder(w).Encode(stubMounts[r.Host])
250 func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
252 s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
254 if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
255 io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
257 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
259 for _, mounts := range stubMounts {
260 for i, mnt := range mounts {
262 s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
264 if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
265 io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
268 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
277 func (s *stubServer) serveKeepstoreIndexFoo1() *reqTracker {
279 s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
281 io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
283 for _, mounts := range stubMounts {
284 for i, mnt := range mounts {
286 s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
289 io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
291 io.WriteString(w, "\n")
299 func (s *stubServer) serveKeepstoreTrash() *reqTracker {
300 return s.serveStatic("/trash", `{}`)
303 func (s *stubServer) serveKeepstorePull() *reqTracker {
304 return s.serveStatic("/pull", `{}`)
307 type runSuite struct {
309 config *arvados.Cluster
310 client *arvados.Client
313 func (s *runSuite) newServer(options *RunOptions) *Server {
317 RunOptions: *options,
318 Metrics: newMetrics(prometheus.NewRegistry()),
319 Logger: options.Logger,
320 Dumper: options.Dumper,
326 func (s *runSuite) SetUpTest(c *check.C) {
327 cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
328 c.Assert(err, check.Equals, nil)
329 s.config, err = cfg.GetCluster("")
330 c.Assert(err, check.Equals, nil)
332 s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
333 arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
335 s.client = &arvados.Client{
337 APIHost: "zzzzz.arvadosapi.com",
338 Client: s.stub.Start()}
340 s.stub.serveDiscoveryDoc()
344 func (s *runSuite) TearDownTest(c *check.C) {
348 func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
352 Logger: ctxlog.TestLogger(c),
354 s.stub.serveCurrentUserAdmin()
355 s.stub.serveZeroCollections()
356 s.stub.serveKeepServices(stubServices)
357 s.stub.serveKeepstoreMounts()
358 s.stub.serveKeepstoreIndexFoo4Bar1()
359 trashReqs := s.stub.serveKeepstoreTrash()
360 pullReqs := s.stub.serveKeepstorePull()
361 srv := s.newServer(&opts)
362 _, err := srv.runOnce()
363 c.Check(err, check.ErrorMatches, "received zero collections")
364 c.Check(trashReqs.Count(), check.Equals, 4)
365 c.Check(pullReqs.Count(), check.Equals, 0)
368 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
372 Logger: ctxlog.TestLogger(c),
374 s.stub.serveCurrentUserNotAdmin()
375 s.stub.serveZeroCollections()
376 s.stub.serveKeepServices(stubServices)
377 s.stub.serveKeepstoreMounts()
378 trashReqs := s.stub.serveKeepstoreTrash()
379 pullReqs := s.stub.serveKeepstorePull()
380 srv := s.newServer(&opts)
381 _, err := srv.runOnce()
382 c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
383 c.Check(trashReqs.Count(), check.Equals, 0)
384 c.Check(pullReqs.Count(), check.Equals, 0)
387 func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
391 Logger: ctxlog.TestLogger(c),
393 s.stub.serveCurrentUserAdmin()
394 s.stub.serveCollectionsButSkipOne()
395 s.stub.serveKeepServices(stubServices)
396 s.stub.serveKeepstoreMounts()
397 s.stub.serveKeepstoreIndexFoo4Bar1()
398 trashReqs := s.stub.serveKeepstoreTrash()
399 pullReqs := s.stub.serveKeepstorePull()
400 srv := s.newServer(&opts)
401 _, err := srv.runOnce()
402 c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
403 c.Check(trashReqs.Count(), check.Equals, 4)
404 c.Check(pullReqs.Count(), check.Equals, 0)
407 func (s *runSuite) TestWriteLostBlocks(c *check.C) {
408 lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
409 c.Assert(err, check.IsNil)
410 s.config.Collections.BlobMissingReport = lostf.Name()
411 defer os.Remove(lostf.Name())
415 Logger: ctxlog.TestLogger(c),
417 s.stub.serveCurrentUserAdmin()
418 s.stub.serveFooBarFileCollections()
419 s.stub.serveKeepServices(stubServices)
420 s.stub.serveKeepstoreMounts()
421 s.stub.serveKeepstoreIndexFoo1()
422 s.stub.serveKeepstoreTrash()
423 s.stub.serveKeepstorePull()
424 srv := s.newServer(&opts)
425 c.Assert(err, check.IsNil)
426 _, err = srv.runOnce()
427 c.Check(err, check.IsNil)
428 lost, err := ioutil.ReadFile(lostf.Name())
429 c.Assert(err, check.IsNil)
430 c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
433 func (s *runSuite) TestDryRun(c *check.C) {
437 Logger: ctxlog.TestLogger(c),
439 s.stub.serveCurrentUserAdmin()
440 collReqs := s.stub.serveFooBarFileCollections()
441 s.stub.serveKeepServices(stubServices)
442 s.stub.serveKeepstoreMounts()
443 s.stub.serveKeepstoreIndexFoo4Bar1()
444 trashReqs := s.stub.serveKeepstoreTrash()
445 pullReqs := s.stub.serveKeepstorePull()
446 srv := s.newServer(&opts)
447 bal, err := srv.runOnce()
448 c.Check(err, check.IsNil)
449 for _, req := range collReqs.reqs {
450 c.Check(req.Form.Get("include_trash"), check.Equals, "true")
451 c.Check(req.Form.Get("include_old_versions"), check.Equals, "true")
453 c.Check(trashReqs.Count(), check.Equals, 0)
454 c.Check(pullReqs.Count(), check.Equals, 0)
455 c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
456 c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
457 c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
460 func (s *runSuite) TestCommit(c *check.C) {
461 lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
462 c.Assert(err, check.IsNil)
463 s.config.Collections.BlobMissingReport = lostf.Name()
464 defer os.Remove(lostf.Name())
466 s.config.ManagementToken = "xyzzy"
470 Logger: ctxlog.TestLogger(c),
471 Dumper: ctxlog.TestLogger(c),
473 s.stub.serveCurrentUserAdmin()
474 s.stub.serveFooBarFileCollections()
475 s.stub.serveKeepServices(stubServices)
476 s.stub.serveKeepstoreMounts()
477 s.stub.serveKeepstoreIndexFoo4Bar1()
478 trashReqs := s.stub.serveKeepstoreTrash()
479 pullReqs := s.stub.serveKeepstorePull()
480 srv := s.newServer(&opts)
481 bal, err := srv.runOnce()
482 c.Check(err, check.IsNil)
483 c.Check(trashReqs.Count(), check.Equals, 8)
484 c.Check(pullReqs.Count(), check.Equals, 4)
485 // "foo" block is overreplicated by 2
486 c.Check(bal.stats.trashes, check.Equals, 2)
487 // "bar" block is underreplicated by 1, and its only copy is
488 // in a poor rendezvous position
489 c.Check(bal.stats.pulls, check.Equals, 2)
491 lost, err := ioutil.ReadFile(lostf.Name())
492 c.Assert(err, check.IsNil)
493 c.Check(string(lost), check.Equals, "")
495 metrics := s.getMetrics(c, srv)
496 c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
497 c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
498 c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
499 c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
500 c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
503 func (s *runSuite) TestRunForever(c *check.C) {
504 s.config.ManagementToken = "xyzzy"
508 Logger: ctxlog.TestLogger(c),
509 Dumper: ctxlog.TestLogger(c),
511 s.stub.serveCurrentUserAdmin()
512 s.stub.serveFooBarFileCollections()
513 s.stub.serveKeepServices(stubServices)
514 s.stub.serveKeepstoreMounts()
515 s.stub.serveKeepstoreIndexFoo4Bar1()
516 trashReqs := s.stub.serveKeepstoreTrash()
517 pullReqs := s.stub.serveKeepstorePull()
519 stop := make(chan interface{})
520 s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
521 srv := s.newServer(&opts)
523 done := make(chan bool)
529 // Each run should send 4 pull lists + 4 trash lists. The
530 // first run should also send 4 empty trash lists at
531 // startup. We should complete all four runs in much less than
533 for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
534 time.Sleep(time.Millisecond)
538 c.Check(pullReqs.Count() >= 16, check.Equals, true)
539 c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
540 c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
543 func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
544 req := httptest.NewRequest("GET", "/metrics", nil)
545 resp := httptest.NewRecorder()
546 srv.ServeHTTP(resp, req)
547 c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
549 req = httptest.NewRequest("GET", "/metrics?api_token=xyzzy", nil)
550 resp = httptest.NewRecorder()
551 srv.ServeHTTP(resp, req)
552 c.Check(resp.Code, check.Equals, http.StatusOK)
554 buf, err := ioutil.ReadAll(resp.Body)
555 c.Check(err, check.IsNil)