X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fd38b59aac9b4453cf04fb7d6e1b8ad51549d6c3..9f4f850c2818cbf5eea45e30d27b13c20bd2be0c:/services/keep-balance/balance_run_test.go diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go index a3abc9f96a..aeed517d09 100644 --- a/services/keep-balance/balance_run_test.go +++ b/services/keep-balance/balance_run_test.go @@ -2,10 +2,11 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepbalance import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -17,10 +18,11 @@ import ( "sync" "time" - "git.curoverse.com/arvados.git/lib/config" - "git.curoverse.com/arvados.git/sdk/go/arvados" - "git.curoverse.com/arvados.git/sdk/go/arvadostest" - "git.curoverse.com/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadostest" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/jmoiron/sqlx" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" check "gopkg.in/check.v1" @@ -86,20 +88,24 @@ var stubServices = []arvados.KeepService{ var stubMounts = map[string][]arvados.KeepMount{ "keep0.zzzzz.arvadosapi.com:25107": {{ - UUID: "zzzzz-ivpuk-000000000000000", - DeviceID: "keep0-vol0", + UUID: "zzzzz-ivpuk-000000000000000", + DeviceID: "keep0-vol0", + StorageClasses: map[string]bool{"default": true}, }}, "keep1.zzzzz.arvadosapi.com:25107": {{ - UUID: "zzzzz-ivpuk-100000000000000", - DeviceID: "keep1-vol0", + UUID: "zzzzz-ivpuk-100000000000000", + DeviceID: "keep1-vol0", + StorageClasses: map[string]bool{"default": true}, }}, "keep2.zzzzz.arvadosapi.com:25107": {{ - UUID: "zzzzz-ivpuk-200000000000000", - DeviceID: "keep2-vol0", + UUID: "zzzzz-ivpuk-200000000000000", + DeviceID: "keep2-vol0", + StorageClasses: map[string]bool{"default": true}, }}, "keep3.zzzzz.arvadosapi.com:25107": {{ - UUID: "zzzzz-ivpuk-300000000000000", - DeviceID: "keep3-vol0", + UUID: "zzzzz-ivpuk-300000000000000", + DeviceID: "keep3-vol0", + StorageClasses: map[string]bool{"default": true}, }}, } @@ -250,26 +256,32 @@ func (s *stubServer) serveKeepstoreMounts() *reqTracker { } func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker { + fooLine := func(mt int) string { return fmt.Sprintf("acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+mt) } + barLine := "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n" rt := &reqTracker{} s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) { count := rt.Add(r) - if r.Host == "keep0.zzzzz.arvadosapi.com:25107" { - io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n") + if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.URL.Path[7:]) { + io.WriteString(w, barLine) } - fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count) + if strings.HasPrefix(fooLine(count), r.URL.Path[7:]) { + io.WriteString(w, fooLine(count)) + } + io.WriteString(w, "\n") }) for _, mounts := range stubMounts { for i, mnt := range mounts { i := i s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) { count := rt.Add(r) - if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" { - io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n") + r.ParseForm() + if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.Form.Get("prefix")) { + io.WriteString(w, barLine) } - if i == 0 { - fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count) + if i == 0 && strings.HasPrefix(fooLine(count), r.Form.Get("prefix")) { + io.WriteString(w, fooLine(count)) } - fmt.Fprintf(w, "\n") + io.WriteString(w, "\n") }) } } @@ -277,21 +289,44 @@ func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker { } func (s *stubServer) serveKeepstoreIndexFoo1() *reqTracker { + fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n" rt := &reqTracker{} s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) { rt.Add(r) - io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n") + if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(fooLine, r.URL.Path[7:]) { + io.WriteString(w, fooLine) + } + io.WriteString(w, "\n") }) for _, mounts := range stubMounts { for i, mnt := range mounts { i := i s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) { rt.Add(r) - if i == 0 { - io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n") - } else { - io.WriteString(w, "\n") + if i == 0 && strings.HasPrefix(fooLine, r.Form.Get("prefix")) { + io.WriteString(w, fooLine) } + io.WriteString(w, "\n") + }) + } + } + return rt +} + +func (s *stubServer) serveKeepstoreIndexIgnoringPrefix() *reqTracker { + fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n" + rt := &reqTracker{} + s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) { + rt.Add(r) + io.WriteString(w, fooLine) + io.WriteString(w, "\n") + }) + for _, mounts := range stubMounts { + for _, mnt := range mounts { + s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) { + rt.Add(r) + io.WriteString(w, fooLine) + io.WriteString(w, "\n") }) } } @@ -309,6 +344,7 @@ func (s *stubServer) serveKeepstorePull() *reqTracker { type runSuite struct { stub stubServer config *arvados.Cluster + db *sqlx.DB client *arvados.Client } @@ -320,6 +356,7 @@ func (s *runSuite) newServer(options *RunOptions) *Server { Metrics: newMetrics(prometheus.NewRegistry()), Logger: options.Logger, Dumper: options.Dumper, + DB: s.db, } return srv } @@ -329,6 +366,8 @@ func (s *runSuite) SetUpTest(c *check.C) { c.Assert(err, check.Equals, nil) s.config, err = cfg.GetCluster("") c.Assert(err, check.Equals, nil) + s.db, err = sqlx.Open("postgres", s.config.PostgreSQL.Connection.String()) + c.Assert(err, check.IsNil) s.config.Collections.BalancePeriod = arvados.Duration(time.Second) arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/") @@ -347,6 +386,9 @@ func (s *runSuite) TearDownTest(c *check.C) { } func (s *runSuite) TestRefuseZeroCollections(c *check.C) { + defer arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil) + _, err := s.db.Exec(`delete from collections`) + c.Assert(err, check.IsNil) opts := RunOptions{ CommitPulls: true, CommitTrash: true, @@ -360,12 +402,35 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) { trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() srv := s.newServer(&opts) - _, err := srv.runOnce() + _, err = srv.runOnce(context.Background()) c.Check(err, check.ErrorMatches, "received zero collections") c.Check(trashReqs.Count(), check.Equals, 4) c.Check(pullReqs.Count(), check.Equals, 0) } +func (s *runSuite) TestRefuseBadIndex(c *check.C) { + opts := RunOptions{ + CommitPulls: true, + CommitTrash: true, + ChunkPrefix: "abc", + Logger: ctxlog.TestLogger(c), + } + s.stub.serveCurrentUserAdmin() + s.stub.serveFooBarFileCollections() + s.stub.serveKeepServices(stubServices) + s.stub.serveKeepstoreMounts() + s.stub.serveKeepstoreIndexIgnoringPrefix() + trashReqs := s.stub.serveKeepstoreTrash() + pullReqs := s.stub.serveKeepstorePull() + srv := s.newServer(&opts) + bal, err := srv.runOnce(context.Background()) + c.Check(err, check.ErrorMatches, ".*Index response included block .* despite asking for prefix \"abc\"") + c.Check(trashReqs.Count(), check.Equals, 4) + c.Check(pullReqs.Count(), check.Equals, 0) + c.Check(bal.stats.trashes, check.Equals, 0) + c.Check(bal.stats.pulls, check.Equals, 0) +} + func (s *runSuite) TestRefuseNonAdmin(c *check.C) { opts := RunOptions{ CommitPulls: true, @@ -379,29 +444,66 @@ func (s *runSuite) TestRefuseNonAdmin(c *check.C) { trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() srv := s.newServer(&opts) - _, err := srv.runOnce() + _, err := srv.runOnce(context.Background()) c.Check(err, check.ErrorMatches, "current user .* is not .* admin user") c.Check(trashReqs.Count(), check.Equals, 0) c.Check(pullReqs.Count(), check.Equals, 0) } -func (s *runSuite) TestDetectSkippedCollections(c *check.C) { +func (s *runSuite) TestInvalidChunkPrefix(c *check.C) { + for _, trial := range []struct { + prefix string + errRe string + }{ + {"123ABC", "invalid char \"A\" in chunk prefix.*"}, + {"123xyz", "invalid char \"x\" in chunk prefix.*"}, + {"123456789012345678901234567890123", "invalid chunk prefix .* longer than a block hash"}, + } { + s.SetUpTest(c) + c.Logf("trying invalid prefix %q", trial.prefix) + opts := RunOptions{ + CommitPulls: true, + CommitTrash: true, + ChunkPrefix: trial.prefix, + Logger: ctxlog.TestLogger(c), + } + s.stub.serveCurrentUserAdmin() + s.stub.serveFooBarFileCollections() + s.stub.serveKeepServices(stubServices) + s.stub.serveKeepstoreMounts() + trashReqs := s.stub.serveKeepstoreTrash() + pullReqs := s.stub.serveKeepstorePull() + srv := s.newServer(&opts) + _, err := srv.runOnce(context.Background()) + c.Check(err, check.ErrorMatches, trial.errRe) + c.Check(trashReqs.Count(), check.Equals, 0) + c.Check(pullReqs.Count(), check.Equals, 0) + } +} + +func (s *runSuite) TestRefuseSameDeviceDifferentVolumes(c *check.C) { opts := RunOptions{ CommitPulls: true, CommitTrash: true, Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() - s.stub.serveCollectionsButSkipOne() + s.stub.serveZeroCollections() s.stub.serveKeepServices(stubServices) - s.stub.serveKeepstoreMounts() - s.stub.serveKeepstoreIndexFoo4Bar1() + s.stub.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) { + hostid := r.Host[:5] // "keep0.zzzzz.arvadosapi.com:25107" => "keep0" + json.NewEncoder(w).Encode([]arvados.KeepMount{{ + UUID: "zzzzz-ivpuk-0000000000" + hostid, + DeviceID: "keep0-vol0", + StorageClasses: map[string]bool{"default": true}, + }}) + }) trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() srv := s.newServer(&opts) - _, err := srv.runOnce() - c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`) - c.Check(trashReqs.Count(), check.Equals, 4) + _, err := srv.runOnce(context.Background()) + c.Check(err, check.ErrorMatches, "cannot continue with config errors.*") + c.Check(trashReqs.Count(), check.Equals, 0) c.Check(pullReqs.Count(), check.Equals, 0) } @@ -424,11 +526,11 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) { s.stub.serveKeepstorePull() srv := s.newServer(&opts) c.Assert(err, check.IsNil) - _, err = srv.runOnce() + _, err = srv.runOnce(context.Background()) c.Check(err, check.IsNil) lost, err := ioutil.ReadFile(lostf.Name()) c.Assert(err, check.IsNil) - c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n") + c.Check(string(lost), check.Matches, `(?ms).*37b51d194a7513e45b56f6524f2d51f2.* fa7aeb5140e2848d39b416daeef4ffc5\+45.*`) } func (s *runSuite) TestDryRun(c *check.C) { @@ -445,7 +547,7 @@ func (s *runSuite) TestDryRun(c *check.C) { trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() srv := s.newServer(&opts) - bal, err := srv.runOnce() + bal, err := srv.runOnce(context.Background()) c.Check(err, check.IsNil) for _, req := range collReqs.reqs { c.Check(req.Form.Get("include_trash"), check.Equals, "true") @@ -459,11 +561,7 @@ func (s *runSuite) TestDryRun(c *check.C) { } func (s *runSuite) TestCommit(c *check.C) { - lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-") - c.Assert(err, check.IsNil) - s.config.Collections.BlobMissingReport = lostf.Name() - defer os.Remove(lostf.Name()) - + s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-" s.config.ManagementToken = "xyzzy" opts := RunOptions{ CommitPulls: true, @@ -479,7 +577,7 @@ func (s *runSuite) TestCommit(c *check.C) { trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() srv := s.newServer(&opts) - bal, err := srv.runOnce() + bal, err := srv.runOnce(context.Background()) c.Check(err, check.IsNil) c.Check(trashReqs.Count(), check.Equals, 8) c.Check(pullReqs.Count(), check.Equals, 4) @@ -489,17 +587,49 @@ func (s *runSuite) TestCommit(c *check.C) { // in a poor rendezvous position c.Check(bal.stats.pulls, check.Equals, 2) - lost, err := ioutil.ReadFile(lostf.Name()) + lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport) c.Assert(err, check.IsNil) - c.Check(string(lost), check.Equals, "") + c.Check(string(lost), check.Not(check.Matches), `(?ms).*acbd18db4cc2f85cedef654fccc4a4d8.*`) buf, err := s.getMetrics(c, srv) c.Check(err, check.IsNil) - c.Check(buf, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`) - c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`) - c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`) - c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`) - c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`) + bufstr := buf.String() + c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`) + c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`) + c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`) + c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio [1-9].*`) + c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio [1-9].*`) +} + +func (s *runSuite) TestChunkPrefix(c *check.C) { + s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-" + opts := RunOptions{ + CommitPulls: true, + CommitTrash: true, + ChunkPrefix: "ac", // catch "foo" but not "bar" + Logger: ctxlog.TestLogger(c), + Dumper: ctxlog.TestLogger(c), + } + s.stub.serveCurrentUserAdmin() + s.stub.serveFooBarFileCollections() + s.stub.serveKeepServices(stubServices) + s.stub.serveKeepstoreMounts() + s.stub.serveKeepstoreIndexFoo4Bar1() + trashReqs := s.stub.serveKeepstoreTrash() + pullReqs := s.stub.serveKeepstorePull() + srv := s.newServer(&opts) + bal, err := srv.runOnce(context.Background()) + c.Check(err, check.IsNil) + c.Check(trashReqs.Count(), check.Equals, 8) + c.Check(pullReqs.Count(), check.Equals, 4) + // "foo" block is overreplicated by 2 + c.Check(bal.stats.trashes, check.Equals, 2) + // "bar" block is underreplicated but does not match prefix + c.Check(bal.stats.pulls, check.Equals, 0) + + lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport) + c.Assert(err, check.IsNil) + c.Check(string(lost), check.Equals, "") } func (s *runSuite) TestRunForever(c *check.C) { @@ -518,13 +648,14 @@ func (s *runSuite) TestRunForever(c *check.C) { trashReqs := s.stub.serveKeepstoreTrash() pullReqs := s.stub.serveKeepstorePull() - stop := make(chan interface{}) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond) srv := s.newServer(&opts) done := make(chan bool) go func() { - srv.runForever(stop) + srv.runForever(ctx) close(done) }() @@ -532,10 +663,13 @@ func (s *runSuite) TestRunForever(c *check.C) { // first run should also send 4 empty trash lists at // startup. We should complete all four runs in much less than // a second. - for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; { + for t0 := time.Now(); time.Since(t0) < 10*time.Second; { + if pullReqs.Count() >= 16 && trashReqs.Count() == pullReqs.Count()+4 { + break + } time.Sleep(time.Millisecond) } - stop <- true + cancel() <-done c.Check(pullReqs.Count() >= 16, check.Equals, true) c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)