1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/lib/config"
21 "git.arvados.org/arvados.git/sdk/go/arvados"
22 "git.arvados.org/arvados.git/sdk/go/arvadostest"
23 "git.arvados.org/arvados.git/sdk/go/ctxlog"
24 "github.com/prometheus/client_golang/prometheus"
25 "github.com/prometheus/common/expfmt"
26 check "gopkg.in/check.v1"
29 var _ = check.Suite(&runSuite{})
31 type reqTracker struct {
36 func (rt *reqTracker) Count() int {
42 func (rt *reqTracker) Add(req *http.Request) int {
45 rt.reqs = append(rt.reqs, *req)
49 var stubServices = []arvados.KeepService{
51 UUID: "zzzzz-bi6l4-000000000000000",
52 ServiceHost: "keep0.zzzzz.arvadosapi.com",
54 ServiceSSLFlag: false,
58 UUID: "zzzzz-bi6l4-000000000000001",
59 ServiceHost: "keep1.zzzzz.arvadosapi.com",
61 ServiceSSLFlag: false,
65 UUID: "zzzzz-bi6l4-000000000000002",
66 ServiceHost: "keep2.zzzzz.arvadosapi.com",
68 ServiceSSLFlag: false,
72 UUID: "zzzzz-bi6l4-000000000000003",
73 ServiceHost: "keep3.zzzzz.arvadosapi.com",
75 ServiceSSLFlag: false,
79 UUID: "zzzzz-bi6l4-h0a0xwut9qa6g3a",
80 ServiceHost: "keep.zzzzz.arvadosapi.com",
87 var stubMounts = map[string][]arvados.KeepMount{
88 "keep0.zzzzz.arvadosapi.com:25107": {{
89 UUID: "zzzzz-ivpuk-000000000000000",
90 DeviceID: "keep0-vol0",
92 "keep1.zzzzz.arvadosapi.com:25107": {{
93 UUID: "zzzzz-ivpuk-100000000000000",
94 DeviceID: "keep1-vol0",
96 "keep2.zzzzz.arvadosapi.com:25107": {{
97 UUID: "zzzzz-ivpuk-200000000000000",
98 DeviceID: "keep2-vol0",
100 "keep3.zzzzz.arvadosapi.com:25107": {{
101 UUID: "zzzzz-ivpuk-300000000000000",
102 DeviceID: "keep3-vol0",
106 // stubServer is an HTTP transport that intercepts and processes all
107 // requests using its own handlers.
108 type stubServer struct {
113 logf func(string, ...interface{})
116 // Start initializes the stub server and returns an *http.Client that
117 // uses the stub server to handle all requests.
119 // A stubServer that has been started should eventually be shut down
121 func (s *stubServer) Start() *http.Client {
122 // Set up a config.Client that forwards all requests to s.mux
123 // via s.srv. Test cases will attach handlers to s.mux to get
124 // the desired responses.
125 s.mux = http.NewServeMux()
126 s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
130 w.Header().Set("Content-Type", "application/json")
131 s.mux.ServeHTTP(w, r)
133 return &http.Client{Transport: s}
136 func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
137 w := httptest.NewRecorder()
138 s.mux.ServeHTTP(w, req)
139 return &http.Response{
141 Status: fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
143 Body: ioutil.NopCloser(w.Body)}, nil
146 // Close releases resources used by the server.
147 func (s *stubServer) Close() {
151 func (s *stubServer) serveStatic(path, data string) *reqTracker {
153 s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
156 ioutil.ReadAll(r.Body)
159 io.WriteString(w, data)
164 func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
165 return s.serveStatic("/arvados/v1/users/current",
166 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
169 func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
170 return s.serveStatic("/arvados/v1/users/current",
171 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
174 func (s *stubServer) serveDiscoveryDoc() *reqTracker {
175 return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
176 `{"defaultCollectionReplication":2}`)
179 func (s *stubServer) serveZeroCollections() *reqTracker {
180 return s.serveStatic("/arvados/v1/collections",
181 `{"items":[],"items_available":0}`)
184 func (s *stubServer) serveFooBarFileCollections() *reqTracker {
186 s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
189 if strings.Contains(r.Form.Get("filters"), `modified_at`) {
190 io.WriteString(w, `{"items_available":0,"items":[]}`)
192 io.WriteString(w, `{"items_available":3,"items":[
193 {"uuid":"zzzzz-4zz18-aaaaaaaaaaaaaaa","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
194 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
195 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
201 func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
203 s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
206 if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
207 io.WriteString(w, `{"items_available":3,"items":[]}`)
208 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
209 io.WriteString(w, `{"items_available":0,"items":[]}`)
210 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
211 io.WriteString(w, `{"items_available":0,"items":[]}`)
212 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","=",null`) {
213 io.WriteString(w, `{"items_available":0,"items":[]}`)
215 io.WriteString(w, `{"items_available":2,"items":[
216 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
217 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
223 func (s *stubServer) serveZeroKeepServices() *reqTracker {
224 return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
227 func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
228 return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
229 ItemsAvailable: len(svcs),
234 func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
236 s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
238 json.NewEncoder(w).Encode(resp)
243 func (s *stubServer) serveKeepstoreMounts() *reqTracker {
245 s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
247 json.NewEncoder(w).Encode(stubMounts[r.Host])
252 func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
254 s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
256 if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
257 io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
259 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
261 for _, mounts := range stubMounts {
262 for i, mnt := range mounts {
264 s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
266 if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
267 io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
270 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
279 func (s *stubServer) serveKeepstoreIndexFoo1() *reqTracker {
281 s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
283 io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
285 for _, mounts := range stubMounts {
286 for i, mnt := range mounts {
288 s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
291 io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
293 io.WriteString(w, "\n")
301 func (s *stubServer) serveKeepstoreTrash() *reqTracker {
302 return s.serveStatic("/trash", `{}`)
305 func (s *stubServer) serveKeepstorePull() *reqTracker {
306 return s.serveStatic("/pull", `{}`)
309 type runSuite struct {
311 config *arvados.Cluster
312 client *arvados.Client
315 func (s *runSuite) newServer(options *RunOptions) *Server {
319 RunOptions: *options,
320 Metrics: newMetrics(prometheus.NewRegistry()),
321 Logger: options.Logger,
322 Dumper: options.Dumper,
327 func (s *runSuite) SetUpTest(c *check.C) {
328 cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
329 c.Assert(err, check.Equals, nil)
330 s.config, err = cfg.GetCluster("")
331 c.Assert(err, check.Equals, nil)
333 s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
334 arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
336 s.client = &arvados.Client{
338 APIHost: "zzzzz.arvadosapi.com",
339 Client: s.stub.Start()}
341 s.stub.serveDiscoveryDoc()
345 func (s *runSuite) TearDownTest(c *check.C) {
349 func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
353 Logger: ctxlog.TestLogger(c),
355 s.stub.serveCurrentUserAdmin()
356 s.stub.serveZeroCollections()
357 s.stub.serveKeepServices(stubServices)
358 s.stub.serveKeepstoreMounts()
359 s.stub.serveKeepstoreIndexFoo4Bar1()
360 trashReqs := s.stub.serveKeepstoreTrash()
361 pullReqs := s.stub.serveKeepstorePull()
362 srv := s.newServer(&opts)
363 _, err := srv.runOnce()
364 c.Check(err, check.ErrorMatches, "received zero collections")
365 c.Check(trashReqs.Count(), check.Equals, 4)
366 c.Check(pullReqs.Count(), check.Equals, 0)
369 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
373 Logger: ctxlog.TestLogger(c),
375 s.stub.serveCurrentUserNotAdmin()
376 s.stub.serveZeroCollections()
377 s.stub.serveKeepServices(stubServices)
378 s.stub.serveKeepstoreMounts()
379 trashReqs := s.stub.serveKeepstoreTrash()
380 pullReqs := s.stub.serveKeepstorePull()
381 srv := s.newServer(&opts)
382 _, err := srv.runOnce()
383 c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
384 c.Check(trashReqs.Count(), check.Equals, 0)
385 c.Check(pullReqs.Count(), check.Equals, 0)
388 func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
392 Logger: ctxlog.TestLogger(c),
394 s.stub.serveCurrentUserAdmin()
395 s.stub.serveCollectionsButSkipOne()
396 s.stub.serveKeepServices(stubServices)
397 s.stub.serveKeepstoreMounts()
398 s.stub.serveKeepstoreIndexFoo4Bar1()
399 trashReqs := s.stub.serveKeepstoreTrash()
400 pullReqs := s.stub.serveKeepstorePull()
401 srv := s.newServer(&opts)
402 _, err := srv.runOnce()
403 c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
404 c.Check(trashReqs.Count(), check.Equals, 4)
405 c.Check(pullReqs.Count(), check.Equals, 0)
408 func (s *runSuite) TestWriteLostBlocks(c *check.C) {
409 lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
410 c.Assert(err, check.IsNil)
411 s.config.Collections.BlobMissingReport = lostf.Name()
412 defer os.Remove(lostf.Name())
416 Logger: ctxlog.TestLogger(c),
418 s.stub.serveCurrentUserAdmin()
419 s.stub.serveFooBarFileCollections()
420 s.stub.serveKeepServices(stubServices)
421 s.stub.serveKeepstoreMounts()
422 s.stub.serveKeepstoreIndexFoo1()
423 s.stub.serveKeepstoreTrash()
424 s.stub.serveKeepstorePull()
425 srv := s.newServer(&opts)
426 c.Assert(err, check.IsNil)
427 _, err = srv.runOnce()
428 c.Check(err, check.IsNil)
429 lost, err := ioutil.ReadFile(lostf.Name())
430 c.Assert(err, check.IsNil)
431 c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
434 func (s *runSuite) TestDryRun(c *check.C) {
438 Logger: ctxlog.TestLogger(c),
440 s.stub.serveCurrentUserAdmin()
441 collReqs := s.stub.serveFooBarFileCollections()
442 s.stub.serveKeepServices(stubServices)
443 s.stub.serveKeepstoreMounts()
444 s.stub.serveKeepstoreIndexFoo4Bar1()
445 trashReqs := s.stub.serveKeepstoreTrash()
446 pullReqs := s.stub.serveKeepstorePull()
447 srv := s.newServer(&opts)
448 bal, err := srv.runOnce()
449 c.Check(err, check.IsNil)
450 for _, req := range collReqs.reqs {
451 c.Check(req.Form.Get("include_trash"), check.Equals, "true")
452 c.Check(req.Form.Get("include_old_versions"), check.Equals, "true")
454 c.Check(trashReqs.Count(), check.Equals, 0)
455 c.Check(pullReqs.Count(), check.Equals, 0)
456 c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
457 c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
458 c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
461 func (s *runSuite) TestCommit(c *check.C) {
462 lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
463 c.Assert(err, check.IsNil)
464 s.config.Collections.BlobMissingReport = lostf.Name()
465 defer os.Remove(lostf.Name())
467 s.config.ManagementToken = "xyzzy"
471 Logger: ctxlog.TestLogger(c),
472 Dumper: ctxlog.TestLogger(c),
474 s.stub.serveCurrentUserAdmin()
475 s.stub.serveFooBarFileCollections()
476 s.stub.serveKeepServices(stubServices)
477 s.stub.serveKeepstoreMounts()
478 s.stub.serveKeepstoreIndexFoo4Bar1()
479 trashReqs := s.stub.serveKeepstoreTrash()
480 pullReqs := s.stub.serveKeepstorePull()
481 srv := s.newServer(&opts)
482 bal, err := srv.runOnce()
483 c.Check(err, check.IsNil)
484 c.Check(trashReqs.Count(), check.Equals, 8)
485 c.Check(pullReqs.Count(), check.Equals, 4)
486 // "foo" block is overreplicated by 2
487 c.Check(bal.stats.trashes, check.Equals, 2)
488 // "bar" block is underreplicated by 1, and its only copy is
489 // in a poor rendezvous position
490 c.Check(bal.stats.pulls, check.Equals, 2)
492 lost, err := ioutil.ReadFile(lostf.Name())
493 c.Assert(err, check.IsNil)
494 c.Check(string(lost), check.Equals, "")
496 buf, err := s.getMetrics(c, srv)
497 c.Check(err, check.IsNil)
498 c.Check(buf, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
499 c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
500 c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
501 c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
502 c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
505 func (s *runSuite) TestRunForever(c *check.C) {
506 s.config.ManagementToken = "xyzzy"
510 Logger: ctxlog.TestLogger(c),
511 Dumper: ctxlog.TestLogger(c),
513 s.stub.serveCurrentUserAdmin()
514 s.stub.serveFooBarFileCollections()
515 s.stub.serveKeepServices(stubServices)
516 s.stub.serveKeepstoreMounts()
517 s.stub.serveKeepstoreIndexFoo4Bar1()
518 trashReqs := s.stub.serveKeepstoreTrash()
519 pullReqs := s.stub.serveKeepstorePull()
521 stop := make(chan interface{})
522 s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
523 srv := s.newServer(&opts)
525 done := make(chan bool)
531 // Each run should send 4 pull lists + 4 trash lists. The
532 // first run should also send 4 empty trash lists at
533 // startup. We should complete all four runs in much less than
535 for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
536 time.Sleep(time.Millisecond)
540 c.Check(pullReqs.Count() >= 16, check.Equals, true)
541 c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
543 buf, err := s.getMetrics(c, srv)
544 c.Check(err, check.IsNil)
545 c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
548 func (s *runSuite) getMetrics(c *check.C, srv *Server) (*bytes.Buffer, error) {
549 mfs, err := srv.Metrics.reg.Gather()
555 for _, mf := range mfs {
556 if _, err := expfmt.MetricFamilyToText(&buf, mf); err != nil {