// Copyright (C) The Arvados Authors. All rights reserved.
//
// SPDX-License-Identifier: AGPL-3.0

package keepbalance

import (
	"context"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"net/http"
	"net/http/httptest"
	"os"
	"strings"
	"sync"
	"syscall"
	"time"

	"git.arvados.org/arvados.git/lib/config"
	"git.arvados.org/arvados.git/sdk/go/arvados"
	"git.arvados.org/arvados.git/sdk/go/arvadostest"
	"git.arvados.org/arvados.git/sdk/go/ctxlog"
	"github.com/jmoiron/sqlx"
	"github.com/prometheus/client_golang/prometheus"
	check "gopkg.in/check.v1"
)

var _ = check.Suite(&runSuite{})

type reqTracker struct {
	reqs []http.Request
	sync.Mutex
}

func (rt *reqTracker) Count() int {
	rt.Lock()
	defer rt.Unlock()
	return len(rt.reqs)
}

func (rt *reqTracker) Add(req *http.Request) int {
	rt.Lock()
	defer rt.Unlock()
	rt.reqs = append(rt.reqs, *req)
	return len(rt.reqs)
}

var stubServices = []arvados.KeepService{
	{
		UUID:           "zzzzz-bi6l4-000000000000000",
		ServiceHost:    "keep0.zzzzz.arvadosapi.com",
		ServicePort:    25107,
		ServiceSSLFlag: false,
		ServiceType:    "disk",
	},
	{
		UUID:           "zzzzz-bi6l4-000000000000001",
		ServiceHost:    "keep1.zzzzz.arvadosapi.com",
		ServicePort:    25107,
		ServiceSSLFlag: false,
		ServiceType:    "disk",
	},
	{
		UUID:           "zzzzz-bi6l4-000000000000002",
		ServiceHost:    "keep2.zzzzz.arvadosapi.com",
		ServicePort:    25107,
		ServiceSSLFlag: false,
		ServiceType:    "disk",
	},
	{
		UUID:           "zzzzz-bi6l4-000000000000003",
		ServiceHost:    "keep3.zzzzz.arvadosapi.com",
		ServicePort:    25107,
		ServiceSSLFlag: false,
		ServiceType:    "disk",
	},
	{
		UUID:           "zzzzz-bi6l4-h0a0xwut9qa6g3a",
		ServiceHost:    "keep.zzzzz.arvadosapi.com",
		ServicePort:    25333,
		ServiceSSLFlag: true,
		ServiceType:    "proxy",
	},
}

var stubMounts = map[string][]arvados.KeepMount{
	"keep0.zzzzz.arvadosapi.com:25107": {{
		UUID:           "zzzzz-ivpuk-000000000000000",
		DeviceID:       "keep0-vol0",
		StorageClasses: map[string]bool{"default": true},
		AllowWrite:     true,
		AllowTrash:     true,
	}},
	"keep1.zzzzz.arvadosapi.com:25107": {{
		UUID:           "zzzzz-ivpuk-100000000000000",
		DeviceID:       "keep1-vol0",
		StorageClasses: map[string]bool{"default": true},
		AllowWrite:     true,
		AllowTrash:     true,
	}},
	"keep2.zzzzz.arvadosapi.com:25107": {{
		UUID:           "zzzzz-ivpuk-200000000000000",
		DeviceID:       "keep2-vol0",
		StorageClasses: map[string]bool{"default": true},
		AllowWrite:     true,
		AllowTrash:     true,
	}},
	"keep3.zzzzz.arvadosapi.com:25107": {{
		UUID:           "zzzzz-ivpuk-300000000000000",
		DeviceID:       "keep3-vol0",
		StorageClasses: map[string]bool{"default": true},
		AllowWrite:     true,
		AllowTrash:     true,
	}},
}

// stubServer is an HTTP transport that intercepts and processes all
// requests using its own handlers.
type stubServer struct {
	mux      *http.ServeMux
	srv      *httptest.Server
	mutex    sync.Mutex
	Requests reqTracker
	logf     func(string, ...interface{})
}

// Start initializes the stub server and returns an *http.Client that
// uses the stub server to handle all requests.
//
// A stubServer that has been started should eventually be shut down
// with Close().
func (s *stubServer) Start() *http.Client {
	// Set up a config.Client that forwards all requests to s.mux
	// via s.srv. Test cases will attach handlers to s.mux to get
	// the desired responses.
	s.mux = http.NewServeMux()
	s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		s.mutex.Lock()
		s.Requests.Add(r)
		s.mutex.Unlock()
		w.Header().Set("Content-Type", "application/json")
		s.mux.ServeHTTP(w, r)
	}))
	return &http.Client{Transport: s}
}

func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
	w := httptest.NewRecorder()
	s.mux.ServeHTTP(w, req)
	return &http.Response{
		StatusCode: w.Code,
		Status:     fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
		Header:     w.HeaderMap,
		Body:       ioutil.NopCloser(w.Body)}, nil
}

// Close releases resources used by the server.
func (s *stubServer) Close() {
	s.srv.Close()
}

func (s *stubServer) serveStatic(path, data string) *reqTracker {
	rt := &reqTracker{}
	s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
		rt.Add(r)
		if r.Body != nil {
			ioutil.ReadAll(r.Body)
			r.Body.Close()
		}
		io.WriteString(w, data)
	})
	return rt
}

func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
	return s.serveStatic("/arvados/v1/users/current",
		`{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
}

func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
	return s.serveStatic("/arvados/v1/users/current",
		`{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
}

func (s *stubServer) serveDiscoveryDoc() *reqTracker {
	return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
		`{"defaultCollectionReplication":2}`)
}

func (s *stubServer) serveZeroCollections() *reqTracker {
	return s.serveStatic("/arvados/v1/collections",
		`{"items":[],"items_available":0}`)
}

func (s *stubServer) serveFooBarFileCollections() *reqTracker {
	rt := &reqTracker{}
	s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
		r.ParseForm()
		rt.Add(r)
		if strings.Contains(r.Form.Get("filters"), `modified_at`) {
			io.WriteString(w, `{"items_available":0,"items":[]}`)
		} else {
			io.WriteString(w, `{"items_available":3,"items":[
				{"uuid":"zzzzz-4zz18-aaaaaaaaaaaaaaa","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
				{"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
				{"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
		}
	})
	return rt
}

func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
	rt := &reqTracker{}
	s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
		r.ParseForm()
		rt.Add(r)
		if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
			io.WriteString(w, `{"items_available":3,"items":[]}`)
		} else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
			io.WriteString(w, `{"items_available":0,"items":[]}`)
		} else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
			io.WriteString(w, `{"items_available":0,"items":[]}`)
		} else if strings.Contains(r.Form.Get("filters"), `"modified_at","=",null`) {
			io.WriteString(w, `{"items_available":0,"items":[]}`)
		} else {
			io.WriteString(w, `{"items_available":2,"items":[
				{"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
				{"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
		}
	})
	return rt
}

func (s *stubServer) serveZeroKeepServices() *reqTracker {
	return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
}

func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
	return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
		ItemsAvailable: len(svcs),
		Items:          svcs,
	})
}

func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
	rt := &reqTracker{}
	s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
		rt.Add(r)
		json.NewEncoder(w).Encode(resp)
	})
	return rt
}

func (s *stubServer) serveKeepstoreMounts() *reqTracker {
	rt := &reqTracker{}
	s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
		rt.Add(r)
		json.NewEncoder(w).Encode(stubMounts[r.Host])
	})
	return rt
}

func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
	fooLine := func(mt int) string { return fmt.Sprintf("acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+mt) }
	barLine := "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n"
	rt := &reqTracker{}
	s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
		count := rt.Add(r)
		if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.URL.Path[7:]) {
			io.WriteString(w, barLine)
		}
		if strings.HasPrefix(fooLine(count), r.URL.Path[7:]) {
			io.WriteString(w, fooLine(count))
		}
		io.WriteString(w, "\n")
	})
	for _, mounts := range stubMounts {
		for i, mnt := range mounts {
			i := i
			s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
				count := rt.Add(r)
				r.ParseForm()
				if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(barLine, r.Form.Get("prefix")) {
					io.WriteString(w, barLine)
				}
				if i == 0 && strings.HasPrefix(fooLine(count), r.Form.Get("prefix")) {
					io.WriteString(w, fooLine(count))
				}
				io.WriteString(w, "\n")
			})
		}
	}
	return rt
}

func (s *stubServer) serveKeepstoreIndexFoo1() *reqTracker {
	fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n"
	rt := &reqTracker{}
	s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
		rt.Add(r)
		if r.Host == "keep0.zzzzz.arvadosapi.com:25107" && strings.HasPrefix(fooLine, r.URL.Path[7:]) {
			io.WriteString(w, fooLine)
		}
		io.WriteString(w, "\n")
	})
	for _, mounts := range stubMounts {
		for i, mnt := range mounts {
			i := i
			s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
				rt.Add(r)
				if i == 0 && strings.HasPrefix(fooLine, r.Form.Get("prefix")) {
					io.WriteString(w, fooLine)
				}
				io.WriteString(w, "\n")
			})
		}
	}
	return rt
}

func (s *stubServer) serveKeepstoreIndexIgnoringPrefix() *reqTracker {
	fooLine := "acbd18db4cc2f85cedef654fccc4a4d8+3 12345678\n"
	rt := &reqTracker{}
	s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
		rt.Add(r)
		io.WriteString(w, fooLine)
		io.WriteString(w, "\n")
	})
	for _, mounts := range stubMounts {
		for _, mnt := range mounts {
			s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
				rt.Add(r)
				io.WriteString(w, fooLine)
				io.WriteString(w, "\n")
			})
		}
	}
	return rt
}

func (s *stubServer) serveKeepstoreTrash() *reqTracker {
	return s.serveStatic("/trash", `{}`)
}

func (s *stubServer) serveKeepstorePull() *reqTracker {
	return s.serveStatic("/pull", `{}`)
}

type runSuite struct {
	stub   stubServer
	config *arvados.Cluster
	db     *sqlx.DB
	client *arvados.Client
}

func (s *runSuite) newServer(options *RunOptions) *Server {
	srv := &Server{
		Cluster:    s.config,
		ArvClient:  s.client,
		RunOptions: *options,
		Metrics:    newMetrics(prometheus.NewRegistry()),
		Logger:     options.Logger,
		Dumper:     options.Dumper,
		DB:         s.db,
	}
	return srv
}

func (s *runSuite) SetUpTest(c *check.C) {
	cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
	c.Assert(err, check.Equals, nil)
	s.config, err = cfg.GetCluster("")
	c.Assert(err, check.Equals, nil)
	s.db, err = sqlx.Open("postgres", s.config.PostgreSQL.Connection.String())
	c.Assert(err, check.IsNil)

	s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
	arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")

	s.client = &arvados.Client{
		AuthToken: "xyzzy",
		APIHost:   "zzzzz.arvadosapi.com",
		Client:    s.stub.Start()}

	s.stub.serveDiscoveryDoc()
	s.stub.logf = c.Logf
}

func (s *runSuite) TearDownTest(c *check.C) {
	s.stub.Close()
}

func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
	defer arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
	_, err := s.db.Exec(`delete from collections`)
	c.Assert(err, check.IsNil)
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveZeroCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	_, err = srv.runOnce(context.Background())
	c.Check(err, check.ErrorMatches, "received zero collections")
	c.Check(trashReqs.Count(), check.Equals, 4)
	c.Check(pullReqs.Count(), check.Equals, 0)
}

func (s *runSuite) TestRefuseBadIndex(c *check.C) {
	opts := RunOptions{
		ChunkPrefix: "abc",
		Logger:      ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexIgnoringPrefix()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	bal, err := srv.runOnce(context.Background())
	c.Check(err, check.ErrorMatches, ".*Index response included block .* despite asking for prefix \"abc\"")
	c.Check(trashReqs.Count(), check.Equals, 4)
	c.Check(pullReqs.Count(), check.Equals, 0)
	c.Check(bal.stats.trashes, check.Equals, 0)
	c.Check(bal.stats.pulls, check.Equals, 0)
}

func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserNotAdmin()
	s.stub.serveZeroCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	_, err := srv.runOnce(context.Background())
	c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
	c.Check(trashReqs.Count(), check.Equals, 0)
	c.Check(pullReqs.Count(), check.Equals, 0)
}

func (s *runSuite) TestInvalidChunkPrefix(c *check.C) {
	for _, trial := range []struct {
		prefix string
		errRe  string
	}{
		{"123ABC", "invalid char \"A\" in chunk prefix.*"},
		{"123xyz", "invalid char \"x\" in chunk prefix.*"},
		{"123456789012345678901234567890123", "invalid chunk prefix .* longer than a block hash"},
	} {
		s.SetUpTest(c)
		c.Logf("trying invalid prefix %q", trial.prefix)
		opts := RunOptions{
			ChunkPrefix: trial.prefix,
			Logger:      ctxlog.TestLogger(c),
		}
		s.stub.serveCurrentUserAdmin()
		s.stub.serveFooBarFileCollections()
		s.stub.serveKeepServices(stubServices)
		s.stub.serveKeepstoreMounts()
		trashReqs := s.stub.serveKeepstoreTrash()
		pullReqs := s.stub.serveKeepstorePull()
		srv := s.newServer(&opts)
		_, err := srv.runOnce(context.Background())
		c.Check(err, check.ErrorMatches, trial.errRe)
		c.Check(trashReqs.Count(), check.Equals, 0)
		c.Check(pullReqs.Count(), check.Equals, 0)
	}
}

func (s *runSuite) TestRefuseSameDeviceDifferentVolumes(c *check.C) {
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveZeroCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
		hostid := r.Host[:5] // "keep0.zzzzz.arvadosapi.com:25107" => "keep0"
		json.NewEncoder(w).Encode([]arvados.KeepMount{{
			UUID:           "zzzzz-ivpuk-0000000000" + hostid,
			DeviceID:       "keep0-vol0",
			StorageClasses: map[string]bool{"default": true},
		}})
	})
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	_, err := srv.runOnce(context.Background())
	c.Check(err, check.ErrorMatches, "cannot continue with config errors.*")
	c.Check(trashReqs.Count(), check.Equals, 0)
	c.Check(pullReqs.Count(), check.Equals, 0)
}

func (s *runSuite) TestWriteLostBlocks(c *check.C) {
	lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
	c.Assert(err, check.IsNil)
	s.config.Collections.BlobMissingReport = lostf.Name()
	defer os.Remove(lostf.Name())
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo1()
	s.stub.serveKeepstoreTrash()
	s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	c.Assert(err, check.IsNil)
	_, err = srv.runOnce(context.Background())
	c.Check(err, check.IsNil)
	lost, err := ioutil.ReadFile(lostf.Name())
	c.Assert(err, check.IsNil)
	c.Check(string(lost), check.Matches, `(?ms).*37b51d194a7513e45b56f6524f2d51f2.* fa7aeb5140e2848d39b416daeef4ffc5\+45.*`)
}

func (s *runSuite) TestDryRun(c *check.C) {
	s.config.Collections.BalanceTrashLimit = 0
	s.config.Collections.BalancePullLimit = 0
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	collReqs := s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	bal, err := srv.runOnce(context.Background())
	c.Check(err, check.IsNil)
	for _, req := range collReqs.reqs {
		c.Check(req.Form.Get("include_trash"), check.Equals, "true")
		c.Check(req.Form.Get("include_old_versions"), check.Equals, "true")
	}
	c.Check(trashReqs.Count(), check.Equals, 0)
	c.Check(pullReqs.Count(), check.Equals, 0)
	c.Check(bal.stats.pulls, check.Equals, 0)
	c.Check(bal.stats.pullsDeferred, check.Not(check.Equals), 0)
	c.Check(bal.stats.trashes, check.Equals, 0)
	c.Check(bal.stats.trashesDeferred, check.Not(check.Equals), 0)
	c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
	c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)

	metrics := arvadostest.GatherMetricsAsString(srv.Metrics.reg)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_trash_entries_deferred_count [1-9].*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_pull_entries_deferred_count [1-9].*`)
}

func (s *runSuite) TestCommit(c *check.C) {
	s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
	s.config.ManagementToken = "xyzzy"
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
		Dumper: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	bal, err := srv.runOnce(context.Background())
	c.Check(err, check.IsNil)
	c.Check(trashReqs.Count(), check.Equals, 8)
	c.Check(pullReqs.Count(), check.Equals, 4)
	// "foo" block is overreplicated by 2
	c.Check(bal.stats.trashes, check.Equals, 2)
	// "bar" block is underreplicated by 1, and its only copy is
	// in a poor rendezvous position
	c.Check(bal.stats.pulls, check.Equals, 2)

	lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport)
	c.Assert(err, check.IsNil)
	c.Check(string(lost), check.Not(check.Matches), `(?ms).*acbd18db4cc2f85cedef654fccc4a4d8.*`)

	metrics := arvadostest.GatherMetricsAsString(srv.Metrics.reg)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio [1-9].*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio [1-9].*`)

	for _, cat := range []string{
		"dedup_byte_ratio", "dedup_block_ratio", "collection_bytes",
		"referenced_bytes", "referenced_blocks", "reference_count",
		"pull_entries_sent_count",
		"trash_entries_sent_count",
	} {
		c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+` [1-9].*`)
	}

	for _, cat := range []string{
		"pull_entries_deferred_count",
		"trash_entries_deferred_count",
	} {
		c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+` 0\n.*`)
	}

	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="0"} [1-9].*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="1"} [1-9].*`)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_replicated_block_count{replicas="9"} 0\n.*`)

	for _, sub := range []string{"replicas", "blocks", "bytes"} {
		for _, cat := range []string{"needed", "unneeded", "unachievable", "pulling"} {
			c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_usage_`+sub+`{status="`+cat+`",storage_class="default"} [1-9].*`)
		}
		for _, cat := range []string{"total", "garbage", "transient", "overreplicated", "underreplicated", "unachievable", "balanced", "desired", "lost"} {
			c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_`+cat+`_`+sub+` [0-9].*`)
		}
	}
	c.Logf("%s", metrics)
}

func (s *runSuite) TestChunkPrefix(c *check.C) {
	s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
	opts := RunOptions{
		ChunkPrefix: "ac", // catch "foo" but not "bar"
		Logger:      ctxlog.TestLogger(c),
		Dumper:      ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()
	srv := s.newServer(&opts)
	bal, err := srv.runOnce(context.Background())
	c.Check(err, check.IsNil)
	c.Check(trashReqs.Count(), check.Equals, 8)
	c.Check(pullReqs.Count(), check.Equals, 4)
	// "foo" block is overreplicated by 2
	c.Check(bal.stats.trashes, check.Equals, 2)
	// "bar" block is underreplicated but does not match prefix
	c.Check(bal.stats.pulls, check.Equals, 0)

	lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport)
	c.Assert(err, check.IsNil)
	c.Check(string(lost), check.Equals, "")
}

func (s *runSuite) TestRunForever_TriggeredByTimer(c *check.C) {
	s.config.ManagementToken = "xyzzy"
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
		Dumper: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	s.config.Collections.BalancePeriod = arvados.Duration(10 * time.Millisecond)
	srv := s.newServer(&opts)

	done := make(chan bool)
	go func() {
		srv.runForever(ctx)
		close(done)
	}()

	// Each run should send 4 pull lists + 4 trash lists. The
	// first run should also send 4 empty trash lists at
	// startup. We should complete at least four runs in much less
	// than 10s.
	for t0 := time.Now(); time.Since(t0) < 10*time.Second; {
		pulls := pullReqs.Count()
		if pulls >= 16 && trashReqs.Count() == pulls+4 {
			break
		}
		time.Sleep(time.Millisecond)
	}
	cancel()
	<-done
	c.Check(pullReqs.Count() >= 16, check.Equals, true)
	c.Check(trashReqs.Count() >= 20, check.Equals, true)

	// We should have completed 4 runs before calling cancel().
	// But the next run might also have started before we called
	// cancel(), in which case the extra run will be included in
	// the changeset_compute_seconds_count metric.
	completed := pullReqs.Count() / 4
	metrics := arvadostest.GatherMetricsAsString(srv.Metrics.reg)
	c.Check(metrics, check.Matches, fmt.Sprintf(`(?ms).*\narvados_keepbalance_changeset_compute_seconds_count (%d|%d)\n.*`, completed, completed+1))
}

func (s *runSuite) TestRunForever_TriggeredBySignal(c *check.C) {
	s.config.ManagementToken = "xyzzy"
	opts := RunOptions{
		Logger: ctxlog.TestLogger(c),
		Dumper: ctxlog.TestLogger(c),
	}
	s.stub.serveCurrentUserAdmin()
	s.stub.serveFooBarFileCollections()
	s.stub.serveKeepServices(stubServices)
	s.stub.serveKeepstoreMounts()
	s.stub.serveKeepstoreIndexFoo4Bar1()
	trashReqs := s.stub.serveKeepstoreTrash()
	pullReqs := s.stub.serveKeepstorePull()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	s.config.Collections.BalancePeriod = arvados.Duration(time.Minute)
	srv := s.newServer(&opts)

	done := make(chan bool)
	go func() {
		srv.runForever(ctx)
		close(done)
	}()

	procself, err := os.FindProcess(os.Getpid())
	c.Assert(err, check.IsNil)

	// Each run should send 4 pull lists + 4 trash lists. The
	// first run should also send 4 empty trash lists at
	// startup. We should be able to complete four runs in much
	// less than 10s.
	completedRuns := 0
	for t0 := time.Now(); time.Since(t0) < 10*time.Second; {
		pulls := pullReqs.Count()
		if pulls >= 16 && trashReqs.Count() == pulls+4 {
			break
		}
		// Once the 1st run has started automatically, we
		// start sending a single SIGUSR1 at the end of each
		// run, to ensure we get exactly 4 runs in total.
		if pulls > 0 && pulls%4 == 0 && pulls <= 12 && pulls/4 > completedRuns {
			completedRuns = pulls / 4
			c.Logf("completed run %d, sending SIGUSR1 to trigger next run", completedRuns)
			procself.Signal(syscall.SIGUSR1)
		}
		time.Sleep(time.Millisecond)
	}
	cancel()
	<-done
	c.Check(pullReqs.Count(), check.Equals, 16)
	c.Check(trashReqs.Count(), check.Equals, 20)

	metrics := arvadostest.GatherMetricsAsString(srv.Metrics.reg)
	c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 4\n.*`)
}