Merge branch '14596-no-requeue'
[arvados.git] / services / keep-balance / balance_run_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "fmt"
10         "io"
11         "io/ioutil"
12         "net/http"
13         "net/http/httptest"
14         "strings"
15         "sync"
16         "time"
17
18         "git.curoverse.com/arvados.git/sdk/go/arvados"
19         "github.com/Sirupsen/logrus"
20         check "gopkg.in/check.v1"
21 )
22
23 var _ = check.Suite(&runSuite{})
24
25 type reqTracker struct {
26         reqs []http.Request
27         sync.Mutex
28 }
29
30 func (rt *reqTracker) Count() int {
31         rt.Lock()
32         defer rt.Unlock()
33         return len(rt.reqs)
34 }
35
36 func (rt *reqTracker) Add(req *http.Request) int {
37         rt.Lock()
38         defer rt.Unlock()
39         rt.reqs = append(rt.reqs, *req)
40         return len(rt.reqs)
41 }
42
43 var stubServices = []arvados.KeepService{
44         {
45                 UUID:           "zzzzz-bi6l4-000000000000000",
46                 ServiceHost:    "keep0.zzzzz.arvadosapi.com",
47                 ServicePort:    25107,
48                 ServiceSSLFlag: false,
49                 ServiceType:    "disk",
50         },
51         {
52                 UUID:           "zzzzz-bi6l4-000000000000001",
53                 ServiceHost:    "keep1.zzzzz.arvadosapi.com",
54                 ServicePort:    25107,
55                 ServiceSSLFlag: false,
56                 ServiceType:    "disk",
57         },
58         {
59                 UUID:           "zzzzz-bi6l4-000000000000002",
60                 ServiceHost:    "keep2.zzzzz.arvadosapi.com",
61                 ServicePort:    25107,
62                 ServiceSSLFlag: false,
63                 ServiceType:    "disk",
64         },
65         {
66                 UUID:           "zzzzz-bi6l4-000000000000003",
67                 ServiceHost:    "keep3.zzzzz.arvadosapi.com",
68                 ServicePort:    25107,
69                 ServiceSSLFlag: false,
70                 ServiceType:    "disk",
71         },
72         {
73                 UUID:           "zzzzz-bi6l4-h0a0xwut9qa6g3a",
74                 ServiceHost:    "keep.zzzzz.arvadosapi.com",
75                 ServicePort:    25333,
76                 ServiceSSLFlag: true,
77                 ServiceType:    "proxy",
78         },
79 }
80
81 var stubMounts = map[string][]arvados.KeepMount{
82         "keep0.zzzzz.arvadosapi.com:25107": {{
83                 UUID:     "zzzzz-ivpuk-000000000000000",
84                 DeviceID: "keep0-vol0",
85         }},
86         "keep1.zzzzz.arvadosapi.com:25107": {{
87                 UUID:     "zzzzz-ivpuk-100000000000000",
88                 DeviceID: "keep1-vol0",
89         }},
90         "keep2.zzzzz.arvadosapi.com:25107": {{
91                 UUID:     "zzzzz-ivpuk-200000000000000",
92                 DeviceID: "keep2-vol0",
93         }},
94         "keep3.zzzzz.arvadosapi.com:25107": {{
95                 UUID:     "zzzzz-ivpuk-300000000000000",
96                 DeviceID: "keep3-vol0",
97         }},
98 }
99
100 // stubServer is an HTTP transport that intercepts and processes all
101 // requests using its own handlers.
102 type stubServer struct {
103         mux      *http.ServeMux
104         srv      *httptest.Server
105         mutex    sync.Mutex
106         Requests reqTracker
107         logf     func(string, ...interface{})
108 }
109
110 // Start initializes the stub server and returns an *http.Client that
111 // uses the stub server to handle all requests.
112 //
113 // A stubServer that has been started should eventually be shut down
114 // with Close().
115 func (s *stubServer) Start() *http.Client {
116         // Set up a config.Client that forwards all requests to s.mux
117         // via s.srv. Test cases will attach handlers to s.mux to get
118         // the desired responses.
119         s.mux = http.NewServeMux()
120         s.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
121                 s.mutex.Lock()
122                 s.Requests.Add(r)
123                 s.mutex.Unlock()
124                 w.Header().Set("Content-Type", "application/json")
125                 s.mux.ServeHTTP(w, r)
126         }))
127         return &http.Client{Transport: s}
128 }
129
130 func (s *stubServer) RoundTrip(req *http.Request) (*http.Response, error) {
131         w := httptest.NewRecorder()
132         s.mux.ServeHTTP(w, req)
133         return &http.Response{
134                 StatusCode: w.Code,
135                 Status:     fmt.Sprintf("%d %s", w.Code, http.StatusText(w.Code)),
136                 Header:     w.HeaderMap,
137                 Body:       ioutil.NopCloser(w.Body)}, nil
138 }
139
140 // Close releases resources used by the server.
141 func (s *stubServer) Close() {
142         s.srv.Close()
143 }
144
145 func (s *stubServer) serveStatic(path, data string) *reqTracker {
146         rt := &reqTracker{}
147         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
148                 rt.Add(r)
149                 if r.Body != nil {
150                         ioutil.ReadAll(r.Body)
151                         r.Body.Close()
152                 }
153                 io.WriteString(w, data)
154         })
155         return rt
156 }
157
158 func (s *stubServer) serveCurrentUserAdmin() *reqTracker {
159         return s.serveStatic("/arvados/v1/users/current",
160                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":true,"is_active":true}`)
161 }
162
163 func (s *stubServer) serveCurrentUserNotAdmin() *reqTracker {
164         return s.serveStatic("/arvados/v1/users/current",
165                 `{"uuid":"zzzzz-tpzed-000000000000000","is_admin":false,"is_active":true}`)
166 }
167
168 func (s *stubServer) serveDiscoveryDoc() *reqTracker {
169         return s.serveStatic("/discovery/v1/apis/arvados/v1/rest",
170                 `{"defaultCollectionReplication":2}`)
171 }
172
173 func (s *stubServer) serveZeroCollections() *reqTracker {
174         return s.serveStatic("/arvados/v1/collections",
175                 `{"items":[],"items_available":0}`)
176 }
177
178 func (s *stubServer) serveFooBarFileCollections() *reqTracker {
179         rt := &reqTracker{}
180         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
181                 r.ParseForm()
182                 rt.Add(r)
183                 if strings.Contains(r.Form.Get("filters"), `modified_at`) {
184                         io.WriteString(w, `{"items_available":0,"items":[]}`)
185                 } else {
186                         io.WriteString(w, `{"items_available":3,"items":[
187                                 {"uuid":"zzzzz-4zz18-aaaaaaaaaaaaaaa","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
188                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
189                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
190                 }
191         })
192         return rt
193 }
194
195 func (s *stubServer) serveCollectionsButSkipOne() *reqTracker {
196         rt := &reqTracker{}
197         s.mux.HandleFunc("/arvados/v1/collections", func(w http.ResponseWriter, r *http.Request) {
198                 r.ParseForm()
199                 rt.Add(r)
200                 if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003c="`) {
201                         io.WriteString(w, `{"items_available":3,"items":[]}`)
202                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","\u003e`) {
203                         io.WriteString(w, `{"items_available":0,"items":[]}`)
204                 } else if strings.Contains(r.Form.Get("filters"), `"modified_at","="`) && strings.Contains(r.Form.Get("filters"), `"uuid","\u003e"`) {
205                         io.WriteString(w, `{"items_available":0,"items":[]}`)
206                 } else {
207                         io.WriteString(w, `{"items_available":2,"items":[
208                                 {"uuid":"zzzzz-4zz18-ehbhgtheo8909or","portable_data_hash":"fa7aeb5140e2848d39b416daeef4ffc5+45","manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n","modified_at":"2014-02-03T17:22:54Z"},
209                                 {"uuid":"zzzzz-4zz18-znfnqtbbv4spc3w","portable_data_hash":"1f4b0bc7583c2a7f9102c395f4ffc5e3+45","manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n","modified_at":"2014-02-03T17:22:54Z"}]}`)
210                 }
211         })
212         return rt
213 }
214
215 func (s *stubServer) serveZeroKeepServices() *reqTracker {
216         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{})
217 }
218
219 func (s *stubServer) serveKeepServices(svcs []arvados.KeepService) *reqTracker {
220         return s.serveJSON("/arvados/v1/keep_services", arvados.KeepServiceList{
221                 ItemsAvailable: len(svcs),
222                 Items:          svcs,
223         })
224 }
225
226 func (s *stubServer) serveJSON(path string, resp interface{}) *reqTracker {
227         rt := &reqTracker{}
228         s.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
229                 rt.Add(r)
230                 json.NewEncoder(w).Encode(resp)
231         })
232         return rt
233 }
234
235 func (s *stubServer) serveKeepstoreMounts() *reqTracker {
236         rt := &reqTracker{}
237         s.mux.HandleFunc("/mounts", func(w http.ResponseWriter, r *http.Request) {
238                 rt.Add(r)
239                 json.NewEncoder(w).Encode(stubMounts[r.Host])
240         })
241         return rt
242 }
243
244 func (s *stubServer) serveKeepstoreIndexFoo4Bar1() *reqTracker {
245         rt := &reqTracker{}
246         s.mux.HandleFunc("/index/", func(w http.ResponseWriter, r *http.Request) {
247                 count := rt.Add(r)
248                 if r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
249                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
250                 }
251                 fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n\n", 12345678+count)
252         })
253         for _, mounts := range stubMounts {
254                 for i, mnt := range mounts {
255                         i := i
256                         s.mux.HandleFunc(fmt.Sprintf("/mounts/%s/blocks", mnt.UUID), func(w http.ResponseWriter, r *http.Request) {
257                                 count := rt.Add(r)
258                                 if i == 0 && r.Host == "keep0.zzzzz.arvadosapi.com:25107" {
259                                         io.WriteString(w, "37b51d194a7513e45b56f6524f2d51f2+3 12345678\n")
260                                 }
261                                 if i == 0 {
262                                         fmt.Fprintf(w, "acbd18db4cc2f85cedef654fccc4a4d8+3 %d\n", 12345678+count)
263                                 }
264                                 fmt.Fprintf(w, "\n")
265                         })
266                 }
267         }
268         return rt
269 }
270
271 func (s *stubServer) serveKeepstoreTrash() *reqTracker {
272         return s.serveStatic("/trash", `{}`)
273 }
274
275 func (s *stubServer) serveKeepstorePull() *reqTracker {
276         return s.serveStatic("/pull", `{}`)
277 }
278
279 type runSuite struct {
280         stub   stubServer
281         config Config
282 }
283
284 // make a log.Logger that writes to the current test's c.Log().
285 func (s *runSuite) logger(c *check.C) *logrus.Logger {
286         r, w := io.Pipe()
287         go func() {
288                 buf := make([]byte, 10000)
289                 for {
290                         n, err := r.Read(buf)
291                         if n > 0 {
292                                 if buf[n-1] == '\n' {
293                                         n--
294                                 }
295                                 c.Log(string(buf[:n]))
296                         }
297                         if err != nil {
298                                 break
299                         }
300                 }
301         }()
302         logger := logrus.New()
303         logger.Out = w
304         return logger
305 }
306
307 func (s *runSuite) SetUpTest(c *check.C) {
308         s.config = Config{
309                 Client: arvados.Client{
310                         AuthToken: "xyzzy",
311                         APIHost:   "zzzzz.arvadosapi.com",
312                         Client:    s.stub.Start()},
313                 KeepServiceTypes: []string{"disk"},
314                 RunPeriod:        arvados.Duration(time.Second),
315         }
316         s.stub.serveDiscoveryDoc()
317         s.stub.logf = c.Logf
318 }
319
320 func (s *runSuite) TearDownTest(c *check.C) {
321         s.stub.Close()
322 }
323
324 func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
325         opts := RunOptions{
326                 CommitPulls: true,
327                 CommitTrash: true,
328                 Logger:      s.logger(c),
329         }
330         s.stub.serveCurrentUserAdmin()
331         s.stub.serveZeroCollections()
332         s.stub.serveKeepServices(stubServices)
333         s.stub.serveKeepstoreMounts()
334         s.stub.serveKeepstoreIndexFoo4Bar1()
335         trashReqs := s.stub.serveKeepstoreTrash()
336         pullReqs := s.stub.serveKeepstorePull()
337         srv, err := NewServer(s.config, opts)
338         c.Assert(err, check.IsNil)
339         _, err = srv.Run()
340         c.Check(err, check.ErrorMatches, "received zero collections")
341         c.Check(trashReqs.Count(), check.Equals, 4)
342         c.Check(pullReqs.Count(), check.Equals, 0)
343 }
344
345 func (s *runSuite) TestServiceTypes(c *check.C) {
346         opts := RunOptions{
347                 CommitPulls: true,
348                 CommitTrash: true,
349                 Logger:      s.logger(c),
350         }
351         s.config.KeepServiceTypes = []string{"unlisted-type"}
352         s.stub.serveCurrentUserAdmin()
353         s.stub.serveFooBarFileCollections()
354         s.stub.serveKeepServices(stubServices)
355         s.stub.serveKeepstoreMounts()
356         indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
357         trashReqs := s.stub.serveKeepstoreTrash()
358         srv, err := NewServer(s.config, opts)
359         c.Assert(err, check.IsNil)
360         _, err = srv.Run()
361         c.Check(err, check.IsNil)
362         c.Check(indexReqs.Count(), check.Equals, 0)
363         c.Check(trashReqs.Count(), check.Equals, 0)
364 }
365
366 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
367         opts := RunOptions{
368                 CommitPulls: true,
369                 CommitTrash: true,
370                 Logger:      s.logger(c),
371         }
372         s.stub.serveCurrentUserNotAdmin()
373         s.stub.serveZeroCollections()
374         s.stub.serveKeepServices(stubServices)
375         s.stub.serveKeepstoreMounts()
376         trashReqs := s.stub.serveKeepstoreTrash()
377         pullReqs := s.stub.serveKeepstorePull()
378         srv, err := NewServer(s.config, opts)
379         c.Assert(err, check.IsNil)
380         _, err = srv.Run()
381         c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
382         c.Check(trashReqs.Count(), check.Equals, 0)
383         c.Check(pullReqs.Count(), check.Equals, 0)
384 }
385
386 func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
387         opts := RunOptions{
388                 CommitPulls: true,
389                 CommitTrash: true,
390                 Logger:      s.logger(c),
391         }
392         s.stub.serveCurrentUserAdmin()
393         s.stub.serveCollectionsButSkipOne()
394         s.stub.serveKeepServices(stubServices)
395         s.stub.serveKeepstoreMounts()
396         s.stub.serveKeepstoreIndexFoo4Bar1()
397         trashReqs := s.stub.serveKeepstoreTrash()
398         pullReqs := s.stub.serveKeepstorePull()
399         srv, err := NewServer(s.config, opts)
400         c.Assert(err, check.IsNil)
401         _, err = srv.Run()
402         c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
403         c.Check(trashReqs.Count(), check.Equals, 4)
404         c.Check(pullReqs.Count(), check.Equals, 0)
405 }
406
407 func (s *runSuite) TestDryRun(c *check.C) {
408         opts := RunOptions{
409                 CommitPulls: false,
410                 CommitTrash: false,
411                 Logger:      s.logger(c),
412         }
413         s.stub.serveCurrentUserAdmin()
414         collReqs := s.stub.serveFooBarFileCollections()
415         s.stub.serveKeepServices(stubServices)
416         s.stub.serveKeepstoreMounts()
417         s.stub.serveKeepstoreIndexFoo4Bar1()
418         trashReqs := s.stub.serveKeepstoreTrash()
419         pullReqs := s.stub.serveKeepstorePull()
420         srv, err := NewServer(s.config, opts)
421         c.Assert(err, check.IsNil)
422         bal, err := srv.Run()
423         c.Check(err, check.IsNil)
424         for _, req := range collReqs.reqs {
425                 c.Check(req.Form.Get("include_trash"), check.Equals, "true")
426                 c.Check(req.Form.Get("include_old_versions"), check.Equals, "true")
427         }
428         c.Check(trashReqs.Count(), check.Equals, 0)
429         c.Check(pullReqs.Count(), check.Equals, 0)
430         c.Check(bal.stats.pulls, check.Not(check.Equals), 0)
431         c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0)
432         c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0)
433 }
434
435 func (s *runSuite) TestCommit(c *check.C) {
436         s.config.Listen = ":"
437         s.config.ManagementToken = "xyzzy"
438         opts := RunOptions{
439                 CommitPulls: true,
440                 CommitTrash: true,
441                 Logger:      s.logger(c),
442                 Dumper:      s.logger(c),
443         }
444         s.stub.serveCurrentUserAdmin()
445         s.stub.serveFooBarFileCollections()
446         s.stub.serveKeepServices(stubServices)
447         s.stub.serveKeepstoreMounts()
448         s.stub.serveKeepstoreIndexFoo4Bar1()
449         trashReqs := s.stub.serveKeepstoreTrash()
450         pullReqs := s.stub.serveKeepstorePull()
451         srv, err := NewServer(s.config, opts)
452         c.Assert(err, check.IsNil)
453         bal, err := srv.Run()
454         c.Check(err, check.IsNil)
455         c.Check(trashReqs.Count(), check.Equals, 8)
456         c.Check(pullReqs.Count(), check.Equals, 4)
457         // "foo" block is overreplicated by 2
458         c.Check(bal.stats.trashes, check.Equals, 2)
459         // "bar" block is underreplicated by 1, and its only copy is
460         // in a poor rendezvous position
461         c.Check(bal.stats.pulls, check.Equals, 2)
462
463         metrics := s.getMetrics(c, srv)
464         c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
465         c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
466         c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
467         c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
468         c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
469 }
470
471 func (s *runSuite) TestRunForever(c *check.C) {
472         s.config.Listen = ":"
473         s.config.ManagementToken = "xyzzy"
474         opts := RunOptions{
475                 CommitPulls: true,
476                 CommitTrash: true,
477                 Logger:      s.logger(c),
478                 Dumper:      s.logger(c),
479         }
480         s.stub.serveCurrentUserAdmin()
481         s.stub.serveFooBarFileCollections()
482         s.stub.serveKeepServices(stubServices)
483         s.stub.serveKeepstoreMounts()
484         s.stub.serveKeepstoreIndexFoo4Bar1()
485         trashReqs := s.stub.serveKeepstoreTrash()
486         pullReqs := s.stub.serveKeepstorePull()
487
488         stop := make(chan interface{})
489         s.config.RunPeriod = arvados.Duration(time.Millisecond)
490         srv, err := NewServer(s.config, opts)
491         c.Assert(err, check.IsNil)
492
493         done := make(chan bool)
494         go func() {
495                 srv.RunForever(stop)
496                 close(done)
497         }()
498
499         // Each run should send 4 pull lists + 4 trash lists. The
500         // first run should also send 4 empty trash lists at
501         // startup. We should complete all four runs in much less than
502         // a second.
503         for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
504                 time.Sleep(time.Millisecond)
505         }
506         stop <- true
507         <-done
508         c.Check(pullReqs.Count() >= 16, check.Equals, true)
509         c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
510         c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
511 }
512
513 func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
514         resp, err := http.Get("http://" + srv.listening + "/metrics")
515         c.Assert(err, check.IsNil)
516         c.Check(resp.StatusCode, check.Equals, http.StatusUnauthorized)
517
518         resp, err = http.Get("http://" + srv.listening + "/metrics?api_token=xyzzy")
519         c.Assert(err, check.IsNil)
520         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
521         buf, err := ioutil.ReadAll(resp.Body)
522         c.Check(err, check.IsNil)
523         return string(buf)
524 }