//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package keepbalance
import (
+ "bytes"
+ "context"
"encoding/json"
"fmt"
"io"
"sync"
"time"
- "git.curoverse.com/arvados.git/lib/config"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
- "git.curoverse.com/arvados.git/sdk/go/arvadostest"
- "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/expfmt"
check "gopkg.in/check.v1"
)
var stubMounts = map[string][]arvados.KeepMount{
"keep0.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-000000000000000",
- DeviceID: "keep0-vol0",
+ UUID: "zzzzz-ivpuk-000000000000000",
+ DeviceID: "keep0-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep1.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-100000000000000",
- DeviceID: "keep1-vol0",
+ UUID: "zzzzz-ivpuk-100000000000000",
+ DeviceID: "keep1-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep2.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-200000000000000",
- DeviceID: "keep2-vol0",
+ UUID: "zzzzz-ivpuk-200000000000000",
+ DeviceID: "keep2-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep3.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-300000000000000",
- DeviceID: "keep3-vol0",
+ UUID: "zzzzz-ivpuk-300000000000000",
+ DeviceID: "keep3-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
}
}
func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
+ fooLine := func(mt int) string { return fmt.Sprintf("acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+mt) }
+ barLine := "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n"
rt := &reqTracker{}
s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
count := rt.Add(r)
- if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
- io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
+ if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.URL.Path[7:]) {
+ io.WriteString(w, barLine)
}
- fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
+ if strings.HasPrefix(fooLine(count), r.URL.Path[7:]) {
+ io.WriteString(w, fooLine(count))
+ }
+ io.WriteString(w, "\n")
})
for _, mounts := range stubMounts {
for i, mnt := range mounts {
i := i
s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
count := rt.Add(r)
- if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
- io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
+ r.ParseForm()
+ if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.Form.Get("prefix")) {
+ io.WriteString(w, barLine)
}
- if i == 0 {
- fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
+ if i == 0 && strings.HasPrefix(fooLine(count), r.Form.Get("prefix")) {
+ io.WriteString(w, fooLine(count))
}
- fmt.Fprintf(w, "\n")
+ io.WriteString(w, "\n")
})
}
}
}
func (s *stubServer) serveKeepstoreIndexFoo1() *reqTracker {
+ fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n"
rt := &reqTracker{}
s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
rt.Add(r)
- io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
+ if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(fooLine, r.URL.Path[7:]) {
+ io.WriteString(w, fooLine)
+ }
+ io.WriteString(w, "\n")
})
for _, mounts := range stubMounts {
for i, mnt := range mounts {
i := i
s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
rt.Add(r)
- if i == 0 {
- io.WriteString(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n\n")
- } else {
- io.WriteString(w, "\n")
+ if i == 0 && strings.HasPrefix(fooLine, r.Form.Get("prefix")) {
+ io.WriteString(w, fooLine)
}
+ io.WriteString(w, "\n")
+ })
+ }
+ }
+ return rt
+}
+
+func (s *stubServer) serveKeepstoreIndexIgnoringPrefix() *reqTracker {
+ fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n"
+ rt := &reqTracker{}
+ s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
+ rt.Add(r)
+ io.WriteString(w, fooLine)
+ io.WriteString(w, "\n")
+ })
+ for _, mounts := range stubMounts {
+ for _, mnt := range mounts {
+ s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
+ rt.Add(r)
+ io.WriteString(w, fooLine)
+ io.WriteString(w, "\n")
})
}
}
type runSuite struct {
stub stubServer
config *arvados.Cluster
+ db *sqlx.DB
client *arvados.Client
}
Metrics: newMetrics(prometheus.NewRegistry()),
Logger: options.Logger,
Dumper: options.Dumper,
+ DB: s.db,
}
- srv.setup()
return srv
}
c.Assert(err, check.Equals, nil)
s.config, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
+ s.db, err = sqlx.Open("postgres", s.config.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
}
func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
+ defer arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+ _, err := s.db.Exec(`delete from collections`)
+ c.Assert(err, check.IsNil)
opts := RunOptions{
CommitPulls: true,
CommitTrash: true,
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- _, err := srv.runOnce()
+ _, err = srv.runOnce(context.Background())
c.Check(err, check.ErrorMatches, "received zero collections")
c.Check(trashReqs.Count(), check.Equals, 4)
c.Check(pullReqs.Count(), check.Equals, 0)
}
+func (s *runSuite) TestRefuseBadIndex(c *check.C) {
+ opts := RunOptions{
+ CommitPulls: true,
+ CommitTrash: true,
+ ChunkPrefix: "abc",
+ Logger: ctxlog.TestLogger(c),
+ }
+ s.stub.serveCurrentUserAdmin()
+ s.stub.serveFooBarFileCollections()
+ s.stub.serveKeepServices(stubServices)
+ s.stub.serveKeepstoreMounts()
+ s.stub.serveKeepstoreIndexIgnoringPrefix()
+ trashReqs := s.stub.serveKeepstoreTrash()
+ pullReqs := s.stub.serveKeepstorePull()
+ srv := s.newServer(&opts)
+ bal, err := srv.runOnce(context.Background())
+ c.Check(err, check.ErrorMatches, ".*Index response included block .* despite asking for prefix \"abc\"")
+ c.Check(trashReqs.Count(), check.Equals, 4)
+ c.Check(pullReqs.Count(), check.Equals, 0)
+ c.Check(bal.stats.trashes, check.Equals, 0)
+ c.Check(bal.stats.pulls, check.Equals, 0)
+}
+
func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
opts := RunOptions{
CommitPulls: true,
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- _, err := srv.runOnce()
+ _, err := srv.runOnce(context.Background())
c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
c.Check(trashReqs.Count(), check.Equals, 0)
c.Check(pullReqs.Count(), check.Equals, 0)
}
-func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
+func (s *runSuite) TestInvalidChunkPrefix(c *check.C) {
+ for _, trial := range []struct {
+ prefix string
+ errRe string
+ }{
+ {"123ABC", "invalid char \"A\" in chunk prefix.*"},
+ {"123xyz", "invalid char \"x\" in chunk prefix.*"},
+ {"123456789012345678901234567890123", "invalid chunk prefix .* longer than a block hash"},
+ } {
+ s.SetUpTest(c)
+ c.Logf("trying invalid prefix %q", trial.prefix)
+ opts := RunOptions{
+ CommitPulls: true,
+ CommitTrash: true,
+ ChunkPrefix: trial.prefix,
+ Logger: ctxlog.TestLogger(c),
+ }
+ s.stub.serveCurrentUserAdmin()
+ s.stub.serveFooBarFileCollections()
+ s.stub.serveKeepServices(stubServices)
+ s.stub.serveKeepstoreMounts()
+ trashReqs := s.stub.serveKeepstoreTrash()
+ pullReqs := s.stub.serveKeepstorePull()
+ srv := s.newServer(&opts)
+ _, err := srv.runOnce(context.Background())
+ c.Check(err, check.ErrorMatches, trial.errRe)
+ c.Check(trashReqs.Count(), check.Equals, 0)
+ c.Check(pullReqs.Count(), check.Equals, 0)
+ }
+}
+
+func (s *runSuite) TestRefuseSameDeviceDifferentVolumes(c *check.C) {
opts := RunOptions{
CommitPulls: true,
CommitTrash: true,
Logger: ctxlog.TestLogger(c),
}
s.stub.serveCurrentUserAdmin()
- s.stub.serveCollectionsButSkipOne()
+ s.stub.serveZeroCollections()
s.stub.serveKeepServices(stubServices)
- s.stub.serveKeepstoreMounts()
- s.stub.serveKeepstoreIndexFoo4Bar1()
+ s.stub.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
+ hostid := r.Host[:5] // "keep0.zzzzz.arvadosapi.com:25107" => "keep0"
+ json.NewEncoder(w).Encode([]arvados.KeepMount{{
+ UUID: "zzzzz-ivpuk-0000000000" + hostid,
+ DeviceID: "keep0-vol0",
+ StorageClasses: map[string]bool{"default": true},
+ }})
+ })
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- _, err := srv.runOnce()
- c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
- c.Check(trashReqs.Count(), check.Equals, 4)
+ _, err := srv.runOnce(context.Background())
+ c.Check(err, check.ErrorMatches, "cannot continue with config errors.*")
+ c.Check(trashReqs.Count(), check.Equals, 0)
c.Check(pullReqs.Count(), check.Equals, 0)
}
s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
c.Assert(err, check.IsNil)
- _, err = srv.runOnce()
+ _, err = srv.runOnce(context.Background())
c.Check(err, check.IsNil)
lost, err := ioutil.ReadFile(lostf.Name())
c.Assert(err, check.IsNil)
- c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
+ c.Check(string(lost), check.Matches, `(?ms).*37b51d194a7513e45b56f6524f2d51f2.* fa7aeb5140e2848d39b416daeef4ffc5\+45.*`)
}
func (s *runSuite) TestDryRun(c *check.C) {
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- bal, err := srv.runOnce()
+ bal, err := srv.runOnce(context.Background())
c.Check(err, check.IsNil)
for _, req := range collReqs.reqs {
c.Check(req.Form.Get("include_trash"), check.Equals, "true")
}
func (s *runSuite) TestCommit(c *check.C) {
- lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
- c.Assert(err, check.IsNil)
- s.config.Collections.BlobMissingReport = lostf.Name()
- defer os.Remove(lostf.Name())
-
+ s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
s.config.ManagementToken = "xyzzy"
opts := RunOptions{
CommitPulls: true,
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- bal, err := srv.runOnce()
+ bal, err := srv.runOnce(context.Background())
c.Check(err, check.IsNil)
c.Check(trashReqs.Count(), check.Equals, 8)
c.Check(pullReqs.Count(), check.Equals, 4)
// in a poor rendezvous position
c.Check(bal.stats.pulls, check.Equals, 2)
- lost, err := ioutil.ReadFile(lostf.Name())
+ lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport)
c.Assert(err, check.IsNil)
- c.Check(string(lost), check.Equals, "")
+ c.Check(string(lost), check.Not(check.Matches), `(?ms).*acbd18db4cc2f85cedef654fccc4a4d8.*`)
- metrics := s.getMetrics(c, srv)
- c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
- c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
- c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
- c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
- c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
+ buf, err := s.getMetrics(c, srv)
+ c.Check(err, check.IsNil)
+ bufstr := buf.String()
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio [1-9].*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio [1-9].*`)
+}
+
+func (s *runSuite) TestChunkPrefix(c *check.C) {
+ s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
+ opts := RunOptions{
+ CommitPulls: true,
+ CommitTrash: true,
+ ChunkPrefix: "ac", // catch "foo" but not "bar"
+ Logger: ctxlog.TestLogger(c),
+ Dumper: ctxlog.TestLogger(c),
+ }
+ s.stub.serveCurrentUserAdmin()
+ s.stub.serveFooBarFileCollections()
+ s.stub.serveKeepServices(stubServices)
+ s.stub.serveKeepstoreMounts()
+ s.stub.serveKeepstoreIndexFoo4Bar1()
+ trashReqs := s.stub.serveKeepstoreTrash()
+ pullReqs := s.stub.serveKeepstorePull()
+ srv := s.newServer(&opts)
+ bal, err := srv.runOnce(context.Background())
+ c.Check(err, check.IsNil)
+ c.Check(trashReqs.Count(), check.Equals, 8)
+ c.Check(pullReqs.Count(), check.Equals, 4)
+ // "foo" block is overreplicated by 2
+ c.Check(bal.stats.trashes, check.Equals, 2)
+ // "bar" block is underreplicated but does not match prefix
+ c.Check(bal.stats.pulls, check.Equals, 0)
+
+ lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport)
+ c.Assert(err, check.IsNil)
+ c.Check(string(lost), check.Equals, "")
}
func (s *runSuite) TestRunForever(c *check.C) {
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
- stop := make(chan interface{})
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
srv := s.newServer(&opts)
done := make(chan bool)
go func() {
- srv.runForever(stop)
+ srv.runForever(ctx)
close(done)
}()
// first run should also send 4 empty trash lists at
// startup. We should complete all four runs in much less than
// a second.
- for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
+ for t0 := time.Now(); time.Since(t0) < 10*time.Second; {
+ if pullReqs.Count() >= 16 && trashReqs.Count() == pullReqs.Count()+4 {
+ break
+ }
time.Sleep(time.Millisecond)
}
- stop <- true
+ cancel()
<-done
c.Check(pullReqs.Count() >= 16, check.Equals, true)
c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
- c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
+
+ buf, err := s.getMetrics(c, srv)
+ c.Check(err, check.IsNil)
+ c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
}
-func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
- req := httptest.NewRequest("GET", "/metrics", nil)
- resp := httptest.NewRecorder()
- srv.ServeHTTP(resp, req)
- c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
+func (s *runSuite) getMetrics(c *check.C, srv *Server) (*bytes.Buffer, error) {
+ mfs, err := srv.Metrics.reg.Gather()
+ if err != nil {
+ return nil, err
+ }
- req = httptest.NewRequest("GET", "/metrics?api_token=xyzzy", nil)
- resp = httptest.NewRecorder()
- srv.ServeHTTP(resp, req)
- c.Check(resp.Code, check.Equals, http.StatusOK)
+ var buf bytes.Buffer
+ for _, mf := range mfs {
+ if _, err := expfmt.MetricFamilyToText(&buf, mf); err != nil {
+ return nil, err
+ }
+ }
- buf, err := ioutil.ReadAll(resp.Body)
- c.Check(err, check.IsNil)
- return string(buf)
+ return &buf, nil
}