16585: simplify the code: use a context instead of a channel for the
[arvados.git] / tools / keep-exercise / keep-exercise.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 // Testing tool for Keep services.
6 //
7 // keepexercise helps measure throughput and test reliability under
8 // various usage patterns.
9 //
10 // By default, it reads and writes blocks containing 2^26 NUL
11 // bytes. This generates network traffic without consuming much disk
12 // space.
13 //
14 // For a more realistic test, enable -vary-request. Warning: this will
15 // fill your storage volumes with random data if you leave it running,
16 // which can cost you money or leave you with too little room for
17 // useful data.
18 //
19 package main
20
21 import (
22         "context"
23         "crypto/rand"
24         "encoding/binary"
25         "flag"
26         "fmt"
27         "io"
28         "io/ioutil"
29         "log"
30         "net/http"
31         "os"
32         "os/signal"
33         "sync/atomic"
34         "syscall"
35         "time"
36
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/keepclient"
39 )
40
41 var version = "dev"
42
43 // Command line config knobs
44 var (
45         BlockSize     = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
46         ReadThreads   = flag.Int("rthreads", 1, "number of concurrent readers")
47         WriteThreads  = flag.Int("wthreads", 1, "number of concurrent writers")
48         VaryRequest   = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
49         VaryThread    = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
50         Replicas      = flag.Int("replicas", 1, "replication level for writing")
51         StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
52         ServiceURL    = flag.String("url", "", "specify scheme://host of a single keep service to exercise (instead of using all advertised services like normal clients)")
53         ServiceUUID   = flag.String("uuid", "", "specify UUID of a single advertised keep service to exercise")
54         getVersion    = flag.Bool("version", false, "Print version information and exit.")
55         RunTime       = flag.Duration("run-time", 0, "time to run (e.g. 60s), or 0 to run indefinitely (default)")
56         Repeat        = flag.Int("repeat", 1, "number of times to repeat the experiment (default 1)")
57 )
58
59 // Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
60 var bytesInChan = make(chan uint64)
61 var bytesOutChan = make(chan uint64)
62
63 // Send struct{}{} to errorsChan when an error happens.
64 var errorsChan = make(chan struct{})
65
66 func main() {
67         flag.Parse()
68
69         // Print version information if requested
70         if *getVersion {
71                 fmt.Printf("keep-exercise %s\n", version)
72                 os.Exit(0)
73         }
74
75         stderr := log.New(os.Stderr, "", log.LstdFlags)
76
77         if *ReadThreads > 0 && *WriteThreads == 0 {
78                 stderr.Fatal("At least one write thread is required if rthreads is non-zero")
79         }
80
81         if *ReadThreads == 0 && *WriteThreads == 0 {
82                 stderr.Fatal("Nothing to do!")
83         }
84
85         arv, err := arvadosclient.MakeArvadosClient()
86         if err != nil {
87                 stderr.Fatal(err)
88         }
89         kc, err := keepclient.MakeKeepClient(arv)
90         if err != nil {
91                 stderr.Fatal(err)
92         }
93         kc.Want_replicas = *Replicas
94
95         kc.HTTPClient = &http.Client{
96                 Timeout: 10 * time.Minute,
97                 // It's not safe to copy *http.DefaultTransport
98                 // because it has a mutex (which might be locked)
99                 // protecting a private map (which might not be nil).
100                 // So we build our own, using the Go 1.12 default
101                 // values.
102                 Transport: &http.Transport{
103                         TLSClientConfig: arvadosclient.MakeTLSConfig(arv.ApiInsecure),
104                 },
105         }
106
107         ctx, cancel := context.WithCancel(context.Background())
108         defer cancel()
109         sigChan := make(chan os.Signal, 1)
110         signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
111         go func() {
112                 <-sigChan
113                 fmt.Print("\r") // Suppress the ^C print
114                 cancel()
115         }()
116
117         overrideServices(kc, stderr)
118         csvHeader := "Timestamp,Elapsed,Read (bytes),Avg Read Speed (MiB/s),Peak Read Speed (MiB/s),Written (bytes),Avg Write Speed (MiB/s),Peak Write Speed (MiB/s),Errors,ReadThreads,WriteThreads,VaryRequest,VaryThread,BlockSize,Replicas,StatsInterval,ServiceURL,ServiceUUID,RunTime,Repeat"
119         var summary string
120
121         for i := 0; i < *Repeat; i++ {
122                 if ctx.Err() == nil {
123                         summary = runExperiment(ctx, kc, summary, csvHeader, stderr)
124                         stderr.Printf("*************************** experiment %d complete ******************************\n", i)
125                         summary += fmt.Sprintf(",%d\n", i)
126                 }
127         }
128         stderr.Println("Summary:")
129         stderr.Println()
130         fmt.Println()
131         fmt.Println(csvHeader + ",Experiment")
132         fmt.Println(summary)
133 }
134
135 func runExperiment(ctx context.Context, kc *keepclient.KeepClient, summary string, csvHeader string, stderr *log.Logger) (newSummary string) {
136         newSummary = summary
137         var nextLocator atomic.Value
138
139         // Start warmup
140         ready := make(chan struct{})
141         var warmup bool
142         if *ReadThreads > 0 {
143                 warmup = true
144                 stderr.Printf("Start warmup phase, waiting for 1 available block before reading starts\n")
145         }
146         nextBuf := make(chan []byte, 1)
147         go makeBufs(nextBuf, 0, stderr)
148         if warmup {
149                 go func() {
150                         locator, _, err := kc.PutB(<-nextBuf)
151                         if err != nil {
152                                 stderr.Print(err)
153                                 errorsChan <- struct{}{}
154                         }
155                         nextLocator.Store(locator)
156                         stderr.Println("Warmup complete!")
157                         close(ready)
158                 }()
159         } else {
160                 close(ready)
161         }
162         select {
163         case <-ctx.Done():
164                 return
165         case <-ready:
166         }
167
168         // Warmup complete
169         ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*RunTime))
170         defer cancel()
171
172         for i := 0; i < *WriteThreads; i++ {
173                 if i > 0 {
174                         // the makeBufs goroutine with index 0 was already started for the warmup phase, above
175                         nextBuf := make(chan []byte, 1)
176                         go makeBufs(nextBuf, i, stderr)
177                 }
178                 go doWrites(ctx, kc, nextBuf, &nextLocator, stderr)
179         }
180         for i := 0; i < *ReadThreads; i++ {
181                 go doReads(ctx, kc, &nextLocator, stderr)
182         }
183
184         t0 := time.Now()
185         var tickChan <-chan time.Time
186         if *StatsInterval > 0 {
187                 tickChan = time.NewTicker(*StatsInterval).C
188         }
189         var bytesIn uint64
190         var bytesOut uint64
191         var errors uint64
192         var rateIn, rateOut float64
193         var maxRateIn, maxRateOut float64
194         var exit, printCsv bool
195         csv := log.New(os.Stdout, "", 0)
196         csv.Println()
197         csv.Println(csvHeader)
198         for {
199                 select {
200                 case <-ctx.Done():
201                         printCsv = true
202                         exit = true
203                 case <-tickChan:
204                         printCsv = true
205                 case i := <-bytesInChan:
206                         bytesIn += i
207                 case o := <-bytesOutChan:
208                         bytesOut += o
209                 case <-errorsChan:
210                         errors++
211                 }
212                 if printCsv {
213                         elapsed := time.Since(t0)
214                         rateIn = float64(bytesIn) / elapsed.Seconds() / 1048576
215                         if rateIn > maxRateIn {
216                                 maxRateIn = rateIn
217                         }
218                         rateOut = float64(bytesOut) / elapsed.Seconds() / 1048576
219                         if rateOut > maxRateOut {
220                                 maxRateOut = rateOut
221                         }
222                         line := fmt.Sprintf("%v,%v,%v,%.1f,%.1f,%v,%.1f,%.1f,%d,%d,%d,%t,%t,%d,%d,%s,%s,%s,%s,%d",
223                                 time.Now().Format("2006/01/02 15:04:05"),
224                                 elapsed,
225                                 bytesIn, rateIn, maxRateIn,
226                                 bytesOut, rateOut, maxRateOut,
227                                 errors,
228                                 *ReadThreads,
229                                 *WriteThreads,
230                                 *VaryRequest,
231                                 *VaryThread,
232                                 *BlockSize,
233                                 *Replicas,
234                                 *StatsInterval,
235                                 *ServiceURL,
236                                 *ServiceUUID,
237                                 *RunTime,
238                                 *Repeat,
239                         )
240                         csv.Println(line)
241                         if exit {
242                                 newSummary += line
243                                 return
244                         }
245                         printCsv = false
246                 }
247         }
248         return
249 }
250
251 func makeBufs(nextBuf chan<- []byte, threadID int, stderr *log.Logger) {
252         buf := make([]byte, *BlockSize)
253         if *VaryThread {
254                 binary.PutVarint(buf, int64(threadID))
255         }
256         randSize := 524288
257         if randSize > *BlockSize {
258                 randSize = *BlockSize
259         }
260         for {
261                 if *VaryRequest {
262                         rnd := make([]byte, randSize)
263                         if _, err := io.ReadFull(rand.Reader, rnd); err != nil {
264                                 stderr.Fatal(err)
265                         }
266                         buf = append(rnd, buf[randSize:]...)
267                 }
268                 nextBuf <- buf
269         }
270 }
271
272 func doWrites(ctx context.Context, kc *keepclient.KeepClient, nextBuf <-chan []byte, nextLocator *atomic.Value, stderr *log.Logger) {
273         for ctx.Err() == nil {
274                 buf := <-nextBuf
275                 locator, _, err := kc.PutB(buf)
276                 if err != nil {
277                         stderr.Print(err)
278                         errorsChan <- struct{}{}
279                         continue
280                 }
281                 bytesOutChan <- uint64(len(buf))
282                 nextLocator.Store(locator)
283         }
284 }
285
286 func doReads(ctx context.Context, kc *keepclient.KeepClient, nextLocator *atomic.Value, stderr *log.Logger) {
287         var locator string
288         for ctx.Err() == nil {
289                 locator = nextLocator.Load().(string)
290                 rdr, size, url, err := kc.Get(locator)
291                 if err != nil {
292                         stderr.Print(err)
293                         errorsChan <- struct{}{}
294                         continue
295                 }
296                 n, err := io.Copy(ioutil.Discard, rdr)
297                 rdr.Close()
298                 if n != size || err != nil {
299                         stderr.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
300                         errorsChan <- struct{}{}
301                         continue
302                         // Note we don't count the bytes received in
303                         // partial/corrupt responses: we are measuring
304                         // throughput, not resource consumption.
305                 }
306                 bytesInChan <- uint64(n)
307         }
308 }
309
310 func overrideServices(kc *keepclient.KeepClient, stderr *log.Logger) {
311         roots := make(map[string]string)
312         if *ServiceURL != "" {
313                 roots["zzzzz-bi6l4-000000000000000"] = *ServiceURL
314         } else if *ServiceUUID != "" {
315                 for uuid, url := range kc.GatewayRoots() {
316                         if uuid == *ServiceUUID {
317                                 roots[uuid] = url
318                                 break
319                         }
320                 }
321                 if len(roots) == 0 {
322                         stderr.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots())
323                 }
324         } else {
325                 return
326         }
327         kc.SetServiceRoots(roots, roots, roots)
328 }