14285: Merge branch 'master' into 14285-keep-balance-metrics
[arvados.git] / services / keep-balance / balance_run_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "fmt"
10         "io"
11         "io/ioutil"
12         "net/http"
13         "net/http/httptest"
14         "strings"
15         "sync"
16         "time"
17
18         "git.curoverse.com/arvados.git/sdk/go/arvados"
19         "github.com/Sirupsen/logrus"
20
21         check "gopkg.in/check.v1"
22 )
23
24 var _ = check.Suite(&runSuite{})
25
26 type reqTracker struct {
27         reqs []http.Request
28         sync.Mutex
29 }
30
31 func (rt *reqTracker) Count() int {
32         rt.Lock()
33         defer rt.Unlock()
34         return len(rt.reqs)
35 }
36
37 func (rt *reqTracker) Add(req *http.Request) int {
38         rt.Lock()
39         defer rt.Unlock()
40         rt.reqs = append(rt.reqs, *req)
41         return len(rt.reqs)
42 }
43
44 var stubServices = []arvados.KeepService{
45         {
46                 UUID:           "zzzzz-bi6l4-000000000000000",
47                 ServiceHost:    "keep0.zzzzz.arvadosapi.com",
48                 ServicePort:    25107,
49                 ServiceSSLFlag: false,
50                 ServiceType:    "disk",
51         },
52         {
53                 UUID:           "zzzzz-bi6l4-000000000000001",
54                 ServiceHost:    "keep1.zzzzz.arvadosapi.com",
55                 ServicePort:    25107,
56                 ServiceSSLFlag: false,
57                 ServiceType:    "disk",
58         },
59         {
60                 UUID:           "zzzzz-bi6l4-000000000000002",
61                 ServiceHost:    "keep2.zzzzz.arvadosapi.com",
62                 ServicePort:    25107,
63                 ServiceSSLFlag: false,
64                 ServiceType:    "disk",
65         },
66         {
67                 UUID:           "zzzzz-bi6l4-000000000000003",
68                 ServiceHost:    "keep3.zzzzz.arvadosapi.com",
69                 ServicePort:    25107,
70                 ServiceSSLFlag: false,
71                 ServiceType:    "disk",
72         },
73         {
74                 UUID:           "zzzzz-bi6l4-h0a0xwut9qa6g3a",
75                 ServiceHost:    "keep.zzzzz.arvadosapi.com",
76                 ServicePort:    25333,
77                 ServiceSSLFlag: true,
78                 ServiceType:    "proxy",
79         },
80 }
81
82 var stubMounts = map[string][]arvados.KeepMount{
83         "keep0.zzzzz.arvadosapi.com:25107": {{
84                 UUID:     "zzzzz-ivpuk-000000000000000",
85                 DeviceID: "keep0-vol0",
86         }},
87         "keep1.zzzzz.arvadosapi.com:25107": {{
88                 UUID:     "zzzzz-ivpuk-100000000000000",
89                 DeviceID: "keep1-vol0",
90         }},
91         "keep2.zzzzz.arvadosapi.com:25107": {{
92                 UUID:     "zzzzz-ivpuk-200000000000000",
93                 DeviceID: "keep2-vol0",
94         }},
95         "keep3.zzzzz.arvadosapi.com:25107": {{
96                 UUID:     "zzzzz-ivpuk-300000000000000",
97                 DeviceID: "keep3-vol0",
98         }},
99 }
100
101 // stubServer is an HTTP transport that intercepts and processes all
102 // requests using its own handlers.
103 type stubServer struct {
104         mux      *http.ServeMux
105         srv      *httptest.Server
106         mutex    sync.Mutex
107         Requests reqTracker
108         logf     func(string, ...interface{})
109 }
110
111 // Start initializes the stub server and returns an *http.Client that
112 // uses the stub server to handle all requests.
113 //
114 // A stubServer that has been started should eventually be shut down
115 // with Close().
116 func (s *stubServer) Start() *http.Client {
117         // Set up a config.Client that forwards all requests to s.mux
118         // via s.srv. Test cases will attach handlers to s.mux to get
119         // the desired responses.
120         s.mux = http.NewServeMux()
121         s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
122                 s.mutex.Lock()
123                 s.Requests.Add(r)
124                 s.mutex.Unlock()
125                 w.Header().Set("Content-Type", "application/json")
126                 s.mux.ServeHTTP(w, r)
127         }))
128         return &http.Client{Transport: s}
129 }
130
131 func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
132         w := httptest.NewRecorder()
133         s.mux.ServeHTTP(w, req)
134         return &http.Response{
135                 StatusCode: w.Code,
136                 Status:     fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
137                 Header:     w.HeaderMap,
138                 Body:       ioutil.NopCloser(w.Body)}, nil
139 }
140
141 // Close releases resources used by the server.
142 func (s *stubServer) Close() {
143         s.srv.Close()
144 }
145
146 func (s *stubServer) serveStatic(path, data string) *reqTracker {
147         rt := &reqTracker{}
148         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
149                 rt.Add(r)
150                 if r.Body != nil {
151                         ioutil.ReadAll(r.Body)
152                         r.Body.Close()
153                 }
154                 io.WriteString(w, data)
155         })
156         return rt
157 }
158
159 func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
160         return s.serveStatic("/arvados/v1/users/current",
161                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
162 }
163
164 func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
165         return s.serveStatic("/arvados/v1/users/current",
166                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
167 }
168
169 func (s *stubServer) serveDiscoveryDoc() *reqTracker {
170         return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
171                 `{"defaultCollectionReplication":2}`)
172 }
173
174 func (s *stubServer) serveZeroCollections() *reqTracker {
175         return s.serveStatic("/arvados/v1/collections",
176                 `{"items":[],"items_available":0}`)
177 }
178
179 func (s *stubServer) serveFooBarFileCollections() *reqTracker {
180         rt := &reqTracker{}
181         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
182                 r.ParseForm()
183                 rt.Add(r)
184                 if strings.Contains(r.Form.Get("filters"), `modified_at`) {
185                         io.WriteString(w, `{"items_available":0,"items":[]}`)
186                 } else {
187                         io.WriteString(w, `{"items_available":2,"items":[
188                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
189                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
190                 }
191         })
192         return rt
193 }
194
195 func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
196         rt := &reqTracker{}
197         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
198                 r.ParseForm()
199                 rt.Add(r)
200                 if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
201                         io.WriteString(w, `{"items_available":3,"items":[]}`)
202                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
203                         io.WriteString(w, `{"items_available":0,"items":[]}`)
204                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
205                         io.WriteString(w, `{"items_available":0,"items":[]}`)
206                 } else {
207                         io.WriteString(w, `{"items_available":2,"items":[
208                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
209                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
210                 }
211         })
212         return rt
213 }
214
215 func (s *stubServer) serveZeroKeepServices() *reqTracker {
216         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
217 }
218
219 func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
220         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
221                 ItemsAvailable: len(svcs),
222                 Items:          svcs,
223         })
224 }
225
226 func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
227         rt := &reqTracker{}
228         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
229                 rt.Add(r)
230                 json.NewEncoder(w).Encode(resp)
231         })
232         return rt
233 }
234
235 func (s *stubServer) serveKeepstoreMounts() *reqTracker {
236         rt := &reqTracker{}
237         s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
238                 rt.Add(r)
239                 json.NewEncoder(w).Encode(stubMounts[r.Host])
240         })
241         return rt
242 }
243
244 func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
245         rt := &reqTracker{}
246         s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
247                 count := rt.Add(r)
248                 if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
249                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
250                 }
251                 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
252         })
253         for _, mounts := range stubMounts {
254                 for i, mnt := range mounts {
255                         i := i
256                         s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
257                                 count := rt.Add(r)
258                                 if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
259                                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
260                                 }
261                                 if i == 0 {
262                                         fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
263                                 }
264                                 fmt.Fprintf(w, "\n")
265                         })
266                 }
267         }
268         return rt
269 }
270
271 func (s *stubServer) serveKeepstoreTrash() *reqTracker {
272         return s.serveStatic("/trash", `{}`)
273 }
274
275 func (s *stubServer) serveKeepstorePull() *reqTracker {
276         return s.serveStatic("/pull", `{}`)
277 }
278
279 type runSuite struct {
280         stub   stubServer
281         config Config
282 }
283
284 // make a log.Logger that writes to the current test's c.Log().
285 func (s *runSuite) logger(c *check.C) *logrus.Logger {
286         r, w := io.Pipe()
287         go func() {
288                 buf := make([]byte, 10000)
289                 for {
290                         n, err := r.Read(buf)
291                         if n > 0 {
292                                 if buf[n-1] == '\n' {
293                                         n--
294                                 }
295                                 c.Log(string(buf[:n]))
296                         }
297                         if err != nil {
298                                 break
299                         }
300                 }
301         }()
302         logger := logrus.New()
303         logger.Out = w
304         return logger
305 }
306
307 func (s *runSuite) SetUpTest(c *check.C) {
308         s.config = Config{
309                 Client: arvados.Client{
310                         AuthToken: "xyzzy",
311                         APIHost:   "zzzzz.arvadosapi.com",
312                         Client:    s.stub.Start()},
313                 KeepServiceTypes: []string{"disk"},
314                 RunPeriod:        arvados.Duration(time.Second),
315         }
316         s.stub.serveDiscoveryDoc()
317         s.stub.logf = c.Logf
318 }
319
320 func (s *runSuite) TearDownTest(c *check.C) {
321         s.stub.Close()
322 }
323
324 func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
325         opts := RunOptions{
326                 CommitPulls: true,
327                 CommitTrash: true,
328                 Logger:      s.logger(c),
329         }
330         s.stub.serveCurrentUserAdmin()
331         s.stub.serveZeroCollections()
332         s.stub.serveKeepServices(stubServices)
333         s.stub.serveKeepstoreMounts()
334         s.stub.serveKeepstoreIndexFoo4Bar1()
335         trashReqs := s.stub.serveKeepstoreTrash()
336         pullReqs := s.stub.serveKeepstorePull()
337         srv, err := NewServer(s.config, opts)
338         c.Assert(err, check.IsNil)
339         _, err = srv.Run()
340         c.Check(err, check.ErrorMatches, "received zero collections")
341         c.Check(trashReqs.Count(), check.Equals, 4)
342         c.Check(pullReqs.Count(), check.Equals, 0)
343 }
344
345 func (s *runSuite) TestServiceTypes(c *check.C) {
346         opts := RunOptions{
347                 CommitPulls: true,
348                 CommitTrash: true,
349                 Logger:      s.logger(c),
350         }
351         s.config.KeepServiceTypes = []string{"unlisted-type"}
352         s.stub.serveCurrentUserAdmin()
353         s.stub.serveFooBarFileCollections()
354         s.stub.serveKeepServices(stubServices)
355         s.stub.serveKeepstoreMounts()
356         indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
357         trashReqs := s.stub.serveKeepstoreTrash()
358         srv, err := NewServer(s.config, opts)
359         c.Assert(err, check.IsNil)
360         _, err = srv.Run()
361         c.Check(err, check.IsNil)
362         c.Check(indexReqs.Count(), check.Equals, 0)
363         c.Check(trashReqs.Count(), check.Equals, 0)
364 }
365
366 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
367         opts := RunOptions{
368                 CommitPulls: true,
369                 CommitTrash: true,
370                 Logger:      s.logger(c),
371         }
372         s.stub.serveCurrentUserNotAdmin()
373         s.stub.serveZeroCollections()
374         s.stub.serveKeepServices(stubServices)
375         s.stub.serveKeepstoreMounts()
376         trashReqs := s.stub.serveKeepstoreTrash()
377         pullReqs := s.stub.serveKeepstorePull()
378         srv, err := NewServer(s.config, opts)
379         c.Assert(err, check.IsNil)
380         _, err = srv.Run()
381         c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
382         c.Check(trashReqs.Count(), check.Equals, 0)
383         c.Check(pullReqs.Count(), check.Equals, 0)
384 }
385
386 func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
387         opts := RunOptions{
388                 CommitPulls: true,
389                 CommitTrash: true,
390                 Logger:      s.logger(c),
391         }
392         s.stub.serveCurrentUserAdmin()
393         s.stub.serveCollectionsButSkipOne()
394         s.stub.serveKeepServices(stubServices)
395         s.stub.serveKeepstoreMounts()
396         s.stub.serveKeepstoreIndexFoo4Bar1()
397         trashReqs := s.stub.serveKeepstoreTrash()
398         pullReqs := s.stub.serveKeepstorePull()
399         srv, err := NewServer(s.config, opts)
400         c.Assert(err, check.IsNil)
401         _, err = srv.Run()
402         c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
403         c.Check(trashReqs.Count(), check.Equals, 4)
404         c.Check(pullReqs.Count(), check.Equals, 0)
405 }
406
407 func (s *runSuite) TestDryRun(c *check.C) {
408         opts := RunOptions{
409                 CommitPulls: false,
410                 CommitTrash: false,
411                 Logger:      s.logger(c),
412         }
413         s.stub.serveCurrentUserAdmin()
414         collReqs := s.stub.serveFooBarFileCollections()
415         s.stub.serveKeepServices(stubServices)
416         s.stub.serveKeepstoreMounts()
417         s.stub.serveKeepstoreIndexFoo4Bar1()
418         trashReqs := s.stub.serveKeepstoreTrash()
419         pullReqs := s.stub.serveKeepstorePull()
420         srv, err := NewServer(s.config, opts)
421         c.Assert(err, check.IsNil)
422         bal, err := srv.Run()
423         c.Check(err, check.IsNil)
424         for _, req := range collReqs.reqs {
425                 c.Check(req.Form.Get("include_trash"), check.Equals, "true")
426         }
427         c.Check(trashReqs.Count(), check.Equals, 0)
428         c.Check(pullReqs.Count(), check.Equals, 0)
429         c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
430         c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
431         c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
432 }
433
434 func (s *runSuite) TestCommit(c *check.C) {
435         s.config.Listen = ":"
436         s.config.ManagementToken = "xyzzy"
437         opts := RunOptions{
438                 CommitPulls: true,
439                 CommitTrash: true,
440                 Logger:      s.logger(c),
441                 Dumper:      s.logger(c),
442         }
443         s.stub.serveCurrentUserAdmin()
444         s.stub.serveFooBarFileCollections()
445         s.stub.serveKeepServices(stubServices)
446         s.stub.serveKeepstoreMounts()
447         s.stub.serveKeepstoreIndexFoo4Bar1()
448         trashReqs := s.stub.serveKeepstoreTrash()
449         pullReqs := s.stub.serveKeepstorePull()
450         srv, err := NewServer(s.config, opts)
451         c.Assert(err, check.IsNil)
452         bal, err := srv.Run()
453         c.Check(err, check.IsNil)
454         c.Check(trashReqs.Count(), check.Equals, 8)
455         c.Check(pullReqs.Count(), check.Equals, 4)
456         // "foo" block is overreplicated by 2
457         c.Check(bal.stats.trashes, check.Equals, 2)
458         // "bar" block is underreplicated by 1, and its only copy is
459         // in a poor rendezvous position
460         c.Check(bal.stats.pulls, check.Equals, 2)
461
462         metrics := s.getMetrics(c, srv)
463         c.Check(metrics, check.Matches, `(?ms).*\nkeep_total_bytes 15\n.*`)
464         c.Check(metrics, check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
465         c.Check(metrics, check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_count 1\n.*`)
466 }
467
468 func (s *runSuite) TestRunForever(c *check.C) {
469         s.config.Listen = ":"
470         s.config.ManagementToken = "xyzzy"
471         opts := RunOptions{
472                 CommitPulls: true,
473                 CommitTrash: true,
474                 Logger:      s.logger(c),
475                 Dumper:      s.logger(c),
476         }
477         s.stub.serveCurrentUserAdmin()
478         s.stub.serveFooBarFileCollections()
479         s.stub.serveKeepServices(stubServices)
480         s.stub.serveKeepstoreMounts()
481         s.stub.serveKeepstoreIndexFoo4Bar1()
482         trashReqs := s.stub.serveKeepstoreTrash()
483         pullReqs := s.stub.serveKeepstorePull()
484
485         stop := make(chan interface{})
486         s.config.RunPeriod = arvados.Duration(time.Millisecond)
487         srv, err := NewServer(s.config, opts)
488         c.Assert(err, check.IsNil)
489
490         done := make(chan bool)
491         go func() {
492                 srv.RunForever(stop)
493                 close(done)
494         }()
495
496         // Each run should send 4 pull lists + 4 trash lists. The
497         // first run should also send 4 empty trash lists at
498         // startup. We should complete all four runs in much less than
499         // a second.
500         for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
501                 time.Sleep(time.Millisecond)
502         }
503         stop <- true
504         <-done
505         c.Check(pullReqs.Count() >= 16, check.Equals, true)
506         c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
507         c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\nkeepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
508 }
509
510 func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
511         resp, err := http.Get("http://" + srv.listening + "/metrics")
512         c.Assert(err, check.IsNil)
513         c.Check(resp.StatusCode, check.Equals, http.StatusUnauthorized)
514
515         resp, err = http.Get("http://" + srv.listening + "/metrics?api_token=xyzzy")
516         c.Assert(err, check.IsNil)
517         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
518         buf, err := ioutil.ReadAll(resp.Body)
519         c.Check(err, check.IsNil)
520         return string(buf)
521 }