14285: Export stats as prometheus metrics.
[arvados.git] / services / keep-balance / balance_run_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "fmt"
10         "io"
11         "io/ioutil"
12         "net/http"
13         "net/http/httptest"
14         "strings"
15         "sync"
16         "time"
17
18         "git.curoverse.com/arvados.git/sdk/go/arvados"
19         "github.com/Sirupsen/logrus"
20
21         check "gopkg.in/check.v1"
22 )
23
24 var _ = check.Suite(&runSuite{})
25
26 type reqTracker struct {
27         reqs []http.Request
28         sync.Mutex
29 }
30
31 func (rt *reqTracker) Count() int {
32         rt.Lock()
33         defer rt.Unlock()
34         return len(rt.reqs)
35 }
36
37 func (rt *reqTracker) Add(req *http.Request) int {
38         rt.Lock()
39         defer rt.Unlock()
40         rt.reqs = append(rt.reqs, *req)
41         return len(rt.reqs)
42 }
43
44 var stubServices = []arvados.KeepService{
45         {
46                 UUID:           "zzzzz-bi6l4-000000000000000",
47                 ServiceHost:    "keep0.zzzzz.arvadosapi.com",
48                 ServicePort:    25107,
49                 ServiceSSLFlag: false,
50                 ServiceType:    "disk",
51         },
52         {
53                 UUID:           "zzzzz-bi6l4-000000000000001",
54                 ServiceHost:    "keep1.zzzzz.arvadosapi.com",
55                 ServicePort:    25107,
56                 ServiceSSLFlag: false,
57                 ServiceType:    "disk",
58         },
59         {
60                 UUID:           "zzzzz-bi6l4-000000000000002",
61                 ServiceHost:    "keep2.zzzzz.arvadosapi.com",
62                 ServicePort:    25107,
63                 ServiceSSLFlag: false,
64                 ServiceType:    "disk",
65         },
66         {
67                 UUID:           "zzzzz-bi6l4-000000000000003",
68                 ServiceHost:    "keep3.zzzzz.arvadosapi.com",
69                 ServicePort:    25107,
70                 ServiceSSLFlag: false,
71                 ServiceType:    "disk",
72         },
73         {
74                 UUID:           "zzzzz-bi6l4-h0a0xwut9qa6g3a",
75                 ServiceHost:    "keep.zzzzz.arvadosapi.com",
76                 ServicePort:    25333,
77                 ServiceSSLFlag: true,
78                 ServiceType:    "proxy",
79         },
80 }
81
82 var stubMounts = map[string][]arvados.KeepMount{
83         "keep0.zzzzz.arvadosapi.com:25107": {{
84                 UUID:     "zzzzz-ivpuk-000000000000000",
85                 DeviceID: "keep0-vol0",
86         }},
87         "keep1.zzzzz.arvadosapi.com:25107": {{
88                 UUID:     "zzzzz-ivpuk-100000000000000",
89                 DeviceID: "keep1-vol0",
90         }},
91         "keep2.zzzzz.arvadosapi.com:25107": {{
92                 UUID:     "zzzzz-ivpuk-200000000000000",
93                 DeviceID: "keep2-vol0",
94         }},
95         "keep3.zzzzz.arvadosapi.com:25107": {{
96                 UUID:     "zzzzz-ivpuk-300000000000000",
97                 DeviceID: "keep3-vol0",
98         }},
99 }
100
101 // stubServer is an HTTP transport that intercepts and processes all
102 // requests using its own handlers.
103 type stubServer struct {
104         mux      *http.ServeMux
105         srv      *httptest.Server
106         mutex    sync.Mutex
107         Requests reqTracker
108         logf     func(string, ...interface{})
109 }
110
111 // Start initializes the stub server and returns an *http.Client that
112 // uses the stub server to handle all requests.
113 //
114 // A stubServer that has been started should eventually be shut down
115 // with Close().
116 func (s *stubServer) Start() *http.Client {
117         // Set up a config.Client that forwards all requests to s.mux
118         // via s.srv. Test cases will attach handlers to s.mux to get
119         // the desired responses.
120         s.mux = http.NewServeMux()
121         s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
122                 s.mutex.Lock()
123                 s.Requests.Add(r)
124                 s.mutex.Unlock()
125                 w.Header().Set("Content-Type", "application/json")
126                 s.mux.ServeHTTP(w, r)
127         }))
128         return &http.Client{Transport: s}
129 }
130
131 func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
132         w := httptest.NewRecorder()
133         s.mux.ServeHTTP(w, req)
134         return &http.Response{
135                 StatusCode: w.Code,
136                 Status:     fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
137                 Header:     w.HeaderMap,
138                 Body:       ioutil.NopCloser(w.Body)}, nil
139 }
140
141 // Close releases resources used by the server.
142 func (s *stubServer) Close() {
143         s.srv.Close()
144 }
145
146 func (s *stubServer) serveStatic(path, data string) *reqTracker {
147         rt := &reqTracker{}
148         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
149                 rt.Add(r)
150                 if r.Body != nil {
151                         ioutil.ReadAll(r.Body)
152                         r.Body.Close()
153                 }
154                 io.WriteString(w, data)
155         })
156         return rt
157 }
158
159 func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
160         return s.serveStatic("/arvados/v1/users/current",
161                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
162 }
163
164 func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
165         return s.serveStatic("/arvados/v1/users/current",
166                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
167 }
168
169 func (s *stubServer) serveDiscoveryDoc() *reqTracker {
170         return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
171                 `{"defaultCollectionReplication":2}`)
172 }
173
174 func (s *stubServer) serveZeroCollections() *reqTracker {
175         return s.serveStatic("/arvados/v1/collections",
176                 `{"items":[],"items_available":0}`)
177 }
178
179 func (s *stubServer) serveFooBarFileCollections() *reqTracker {
180         rt := &reqTracker{}
181         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
182                 r.ParseForm()
183                 rt.Add(r)
184                 if strings.Contains(r.Form.Get("filters"), `modified_at`) {
185                         io.WriteString(w, `{"items_available":0,"items":[]}`)
186                 } else {
187                         io.WriteString(w, `{"items_available":2,"items":[
188                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
189                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
190                 }
191         })
192         return rt
193 }
194
195 func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
196         rt := &reqTracker{}
197         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
198                 r.ParseForm()
199                 rt.Add(r)
200                 if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
201                         io.WriteString(w, `{"items_available":3,"items":[]}`)
202                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
203                         io.WriteString(w, `{"items_available":0,"items":[]}`)
204                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
205                         io.WriteString(w, `{"items_available":0,"items":[]}`)
206                 } else {
207                         io.WriteString(w, `{"items_available":2,"items":[
208                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
209                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
210                 }
211         })
212         return rt
213 }
214
215 func (s *stubServer) serveZeroKeepServices() *reqTracker {
216         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
217 }
218
219 func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
220         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
221                 ItemsAvailable: len(svcs),
222                 Items:          svcs,
223         })
224 }
225
226 func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
227         rt := &reqTracker{}
228         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
229                 rt.Add(r)
230                 json.NewEncoder(w).Encode(resp)
231         })
232         return rt
233 }
234
235 func (s *stubServer) serveKeepstoreMounts() *reqTracker {
236         rt := &reqTracker{}
237         s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
238                 rt.Add(r)
239                 json.NewEncoder(w).Encode(stubMounts[r.Host])
240         })
241         return rt
242 }
243
244 func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
245         rt := &reqTracker{}
246         s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
247                 count := rt.Add(r)
248                 if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
249                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
250                 }
251                 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
252         })
253         for _, mounts := range stubMounts {
254                 for i, mnt := range mounts {
255                         i := i
256                         s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
257                                 count := rt.Add(r)
258                                 if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
259                                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
260                                 }
261                                 if i == 0 {
262                                         fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
263                                 }
264                                 fmt.Fprintf(w, "\n")
265                         })
266                 }
267         }
268         return rt
269 }
270
271 func (s *stubServer) serveKeepstoreTrash() *reqTracker {
272         return s.serveStatic("/trash", `{}`)
273 }
274
275 func (s *stubServer) serveKeepstorePull() *reqTracker {
276         return s.serveStatic("/pull", `{}`)
277 }
278
279 type runSuite struct {
280         stub   stubServer
281         config Config
282 }
283
284 // make a log.Logger that writes to the current test's c.Log().
285 func (s *runSuite) logger(c *check.C) *logrus.Logger {
286         r, w := io.Pipe()
287         go func() {
288                 buf := make([]byte, 10000)
289                 for {
290                         n, err := r.Read(buf)
291                         if n > 0 {
292                                 if buf[n-1] == '\n' {
293                                         n--
294                                 }
295                                 c.Log(string(buf[:n]))
296                         }
297                         if err != nil {
298                                 break
299                         }
300                 }
301         }()
302         logger := logrus.New()
303         logger.Out = w
304         return logger
305 }
306
307 func (s *runSuite) SetUpTest(c *check.C) {
308         s.config = Config{
309                 Client: arvados.Client{
310                         AuthToken: "xyzzy",
311                         APIHost:   "zzzzz.arvadosapi.com",
312                         Client:    s.stub.Start()},
313                 KeepServiceTypes: []string{"disk"},
314                 RunPeriod:        arvados.Duration(time.Second),
315         }
316         s.stub.serveDiscoveryDoc()
317         s.stub.logf = c.Logf
318 }
319
320 func (s *runSuite) TearDownTest(c *check.C) {
321         s.stub.Close()
322 }
323
324 func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
325         opts := RunOptions{
326                 CommitPulls: true,
327                 CommitTrash: true,
328                 Logger:      s.logger(c),
329         }
330         s.stub.serveCurrentUserAdmin()
331         s.stub.serveZeroCollections()
332         s.stub.serveKeepServices(stubServices)
333         s.stub.serveKeepstoreMounts()
334         s.stub.serveKeepstoreIndexFoo4Bar1()
335         trashReqs := s.stub.serveKeepstoreTrash()
336         pullReqs := s.stub.serveKeepstorePull()
337         srv, err := NewServer(s.config, opts)
338         c.Assert(err, check.IsNil)
339         _, err = srv.Run()
340         c.Check(err, check.ErrorMatches, "received zero collections")
341         c.Check(trashReqs.Count(), check.Equals, 4)
342         c.Check(pullReqs.Count(), check.Equals, 0)
343 }
344
345 func (s *runSuite) TestServiceTypes(c *check.C) {
346         opts := RunOptions{
347                 CommitPulls: true,
348                 CommitTrash: true,
349                 Logger:      s.logger(c),
350         }
351         s.config.KeepServiceTypes = []string{"unlisted-type"}
352         s.stub.serveCurrentUserAdmin()
353         s.stub.serveFooBarFileCollections()
354         s.stub.serveKeepServices(stubServices)
355         s.stub.serveKeepstoreMounts()
356         indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
357         trashReqs := s.stub.serveKeepstoreTrash()
358         srv, err := NewServer(s.config, opts)
359         c.Assert(err, check.IsNil)
360         _, err = srv.Run()
361         c.Check(err, check.IsNil)
362         c.Check(indexReqs.Count(), check.Equals, 0)
363         c.Check(trashReqs.Count(), check.Equals, 0)
364 }
365
366 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
367         opts := RunOptions{
368                 CommitPulls: true,
369                 CommitTrash: true,
370                 Logger:      s.logger(c),
371         }
372         s.stub.serveCurrentUserNotAdmin()
373         s.stub.serveZeroCollections()
374         s.stub.serveKeepServices(stubServices)
375         s.stub.serveKeepstoreMounts()
376         trashReqs := s.stub.serveKeepstoreTrash()
377         pullReqs := s.stub.serveKeepstorePull()
378         srv, err := NewServer(s.config, opts)
379         c.Assert(err, check.IsNil)
380         _, err = srv.Run()
381         c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
382         c.Check(trashReqs.Count(), check.Equals, 0)
383         c.Check(pullReqs.Count(), check.Equals, 0)
384 }
385
386 func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
387         opts := RunOptions{
388                 CommitPulls: true,
389                 CommitTrash: true,
390                 Logger:      s.logger(c),
391         }
392         s.stub.serveCurrentUserAdmin()
393         s.stub.serveCollectionsButSkipOne()
394         s.stub.serveKeepServices(stubServices)
395         s.stub.serveKeepstoreMounts()
396         s.stub.serveKeepstoreIndexFoo4Bar1()
397         trashReqs := s.stub.serveKeepstoreTrash()
398         pullReqs := s.stub.serveKeepstorePull()
399         srv, err := NewServer(s.config, opts)
400         c.Assert(err, check.IsNil)
401         _, err = srv.Run()
402         c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
403         c.Check(trashReqs.Count(), check.Equals, 4)
404         c.Check(pullReqs.Count(), check.Equals, 0)
405 }
406
407 func (s *runSuite) TestDryRun(c *check.C) {
408         opts := RunOptions{
409                 CommitPulls: false,
410                 CommitTrash: false,
411                 Logger:      s.logger(c),
412         }
413         s.stub.serveCurrentUserAdmin()
414         collReqs := s.stub.serveFooBarFileCollections()
415         s.stub.serveKeepServices(stubServices)
416         s.stub.serveKeepstoreMounts()
417         s.stub.serveKeepstoreIndexFoo4Bar1()
418         trashReqs := s.stub.serveKeepstoreTrash()
419         pullReqs := s.stub.serveKeepstorePull()
420         srv, err := NewServer(s.config, opts)
421         c.Assert(err, check.IsNil)
422         bal, err := srv.Run()
423         c.Check(err, check.IsNil)
424         for _, req := range collReqs.reqs {
425                 c.Check(req.Form.Get("include_trash"), check.Equals, "true")
426         }
427         c.Check(trashReqs.Count(), check.Equals, 0)
428         c.Check(pullReqs.Count(), check.Equals, 0)
429         c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
430         c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
431         c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
432 }
433
434 func (s *runSuite) TestCommit(c *check.C) {
435         s.config.Listen = ":"
436         opts := RunOptions{
437                 CommitPulls: true,
438                 CommitTrash: true,
439                 Logger:      s.logger(c),
440                 Dumper:      s.logger(c),
441         }
442         s.stub.serveCurrentUserAdmin()
443         s.stub.serveFooBarFileCollections()
444         s.stub.serveKeepServices(stubServices)
445         s.stub.serveKeepstoreMounts()
446         s.stub.serveKeepstoreIndexFoo4Bar1()
447         trashReqs := s.stub.serveKeepstoreTrash()
448         pullReqs := s.stub.serveKeepstorePull()
449         srv, err := NewServer(s.config, opts)
450         c.Assert(err, check.IsNil)
451         bal, err := srv.Run()
452         c.Check(err, check.IsNil)
453         c.Check(trashReqs.Count(), check.Equals, 8)
454         c.Check(pullReqs.Count(), check.Equals, 4)
455         // "foo" block is overreplicated by 2
456         c.Check(bal.stats.trashes, check.Equals, 2)
457         // "bar" block is underreplicated by 1, and its only copy is
458         // in a poor rendezvous position
459         c.Check(bal.stats.pulls, check.Equals, 2)
460
461         metrics := s.getMetrics(c, srv)
462         c.Check(metrics, check.Matches, `(?ms).*\nkeep_total_bytes 15\n.*`)
463         c.Check(metrics, check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
464         c.Check(metrics, check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_count 1\n.*`)
465 }
466
467 func (s *runSuite) TestRunForever(c *check.C) {
468         s.config.Listen = ":"
469         opts := RunOptions{
470                 CommitPulls: true,
471                 CommitTrash: true,
472                 Logger:      s.logger(c),
473                 Dumper:      s.logger(c),
474         }
475         s.stub.serveCurrentUserAdmin()
476         s.stub.serveFooBarFileCollections()
477         s.stub.serveKeepServices(stubServices)
478         s.stub.serveKeepstoreMounts()
479         s.stub.serveKeepstoreIndexFoo4Bar1()
480         trashReqs := s.stub.serveKeepstoreTrash()
481         pullReqs := s.stub.serveKeepstorePull()
482
483         stop := make(chan interface{})
484         s.config.RunPeriod = arvados.Duration(time.Millisecond)
485         srv, err := NewServer(s.config, opts)
486         c.Assert(err, check.IsNil)
487
488         done := make(chan bool)
489         go func() {
490                 srv.RunForever(stop)
491                 close(done)
492         }()
493
494         // Each run should send 4 pull lists + 4 trash lists. The
495         // first run should also send 4 empty trash lists at
496         // startup. We should complete all four runs in much less than
497         // a second.
498         for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
499                 time.Sleep(time.Millisecond)
500         }
501         stop <- true
502         <-done
503         c.Check(pullReqs.Count() >= 16, check.Equals, true)
504         c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
505         c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
506 }
507
508 func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
509         resp, err := http.Get("http://" + srv.listening + "/metrics")
510         c.Assert(err, check.IsNil)
511         buf, err := ioutil.ReadAll(resp.Body)
512         c.Check(err, check.IsNil)
513         return string(buf)
514 }