16513: add an option to keep-exercise for a timed run, with a CSV output
[arvados.git] / tools / keep-exercise / keep-exercise.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 // Testing tool for Keep services.
6 //
7 // keepexercise helps measure throughput and test reliability under
8 // various usage patterns.
9 //
10 // By default, it reads and writes blocks containing 2^26 NUL
11 // bytes. This generates network traffic without consuming much disk
12 // space.
13 //
14 // For a more realistic test, enable -vary-request. Warning: this will
15 // fill your storage volumes with random data if you leave it running,
16 // which can cost you money or leave you with too little room for
17 // useful data.
18 //
19 package main
20
21 import (
22         "crypto/rand"
23         "encoding/binary"
24         "flag"
25         "fmt"
26         "io"
27         "io/ioutil"
28         "log"
29         "net/http"
30         "os"
31         "time"
32
33         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
34         "git.arvados.org/arvados.git/sdk/go/keepclient"
35 )
36
37 var version = "dev"
38
39 // Command line config knobs
40 var (
41         BlockSize     = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
42         ReadThreads   = flag.Int("rthreads", 1, "number of concurrent readers")
43         WriteThreads  = flag.Int("wthreads", 1, "number of concurrent writers")
44         VaryRequest   = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
45         VaryThread    = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
46         Replicas      = flag.Int("replicas", 1, "replication level for writing")
47         StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
48         ServiceURL    = flag.String("url", "", "specify scheme://host of a single keep service to exercise (instead of using all advertised services like normal clients)")
49         ServiceUUID   = flag.String("uuid", "", "specify UUID of a single advertised keep service to exercise")
50         getVersion    = flag.Bool("version", false, "Print version information and exit.")
51         RunTime       = flag.Duration("run-time", 0, "time to run (e.g. 60s), or 0 to run indefinitely (default)")
52 )
53
54 func main() {
55         flag.Parse()
56
57         // Print version information if requested
58         if *getVersion {
59                 fmt.Printf("keep-exercise %s\n", version)
60                 os.Exit(0)
61         }
62
63         log.Printf("keep-exercise %s started", version)
64
65         arv, err := arvadosclient.MakeArvadosClient()
66         if err != nil {
67                 log.Fatal(err)
68         }
69         kc, err := keepclient.MakeKeepClient(arv)
70         if err != nil {
71                 log.Fatal(err)
72         }
73         kc.Want_replicas = *Replicas
74
75         transport := *(http.DefaultTransport.(*http.Transport))
76         transport.TLSClientConfig = arvadosclient.MakeTLSConfig(arv.ApiInsecure)
77         kc.HTTPClient = &http.Client{
78                 Timeout:   10 * time.Minute,
79                 Transport: &transport,
80         }
81
82         overrideServices(kc)
83
84         nextLocator := make(chan string, *ReadThreads+*WriteThreads)
85
86         go countBeans(nextLocator)
87         for i := 0; i < *WriteThreads; i++ {
88                 nextBuf := make(chan []byte, 1)
89                 go makeBufs(nextBuf, i)
90                 go doWrites(kc, nextBuf, nextLocator)
91         }
92         for i := 0; i < *ReadThreads; i++ {
93                 go doReads(kc, nextLocator)
94         }
95         <-make(chan struct{})
96 }
97
98 // Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
99 var bytesInChan = make(chan uint64)
100 var bytesOutChan = make(chan uint64)
101
102 // Send struct{}{} to errorsChan when an error happens.
103 var errorsChan = make(chan struct{})
104
105 func countBeans(nextLocator chan string) {
106         t0 := time.Now()
107         var tickChan <-chan time.Time
108         var endChan <-chan time.Time
109         if *StatsInterval > 0 {
110                 tickChan = time.NewTicker(*StatsInterval).C
111         }
112         if *RunTime > 0 {
113                 endChan = time.NewTicker(*RunTime).C
114         }
115         var bytesIn uint64
116         var bytesOut uint64
117         var errors uint64
118         var maxRateIn, maxRateOut float64
119         for {
120                 select {
121                 case <-tickChan:
122                         elapsed := time.Since(t0)
123                         if float64(bytesIn)/elapsed.Seconds()/1048576 > maxRateIn {
124                                 maxRateIn = float64(bytesIn) / elapsed.Seconds() / 1048576
125                         }
126                         if float64(bytesOut)/elapsed.Seconds()/1048576 > maxRateOut {
127                                 maxRateOut = float64(bytesOut) / elapsed.Seconds() / 1048576
128                         }
129                         log.Printf("%v elapsed: read %v bytes (%.1f MiB/s), wrote %v bytes (%.1f MiB/s), errors %d",
130                                 elapsed,
131                                 bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576),
132                                 bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576),
133                                 errors,
134                         )
135                 case <-endChan:
136                         elapsed := time.Since(t0)
137                         log.Println("\nSummary:")
138                         log.Println("Elapsed,Read (bytes),Avg Read Speed (MiB/s),Peak Read Speed (MiB/s),Written (bytes),Avg Write Speed (MiB/s),Peak Write Speed (MiB/s),Errors,ReadThreads,WriteThreads,VaryRequest,VaryThread,BlockSize,Replicas,StatsInterval,ServiceURL,ServiceUUID,RunTime\n")
139                         log.Printf("%v,%v,%.1f,%.1f,%v,%.1f,%.1f,%d,%d,%d,%t,%t,%d,%d,%s,%s,%s,%s",
140                                 elapsed,
141                                 bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576), maxRateIn,
142                                 bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576), maxRateOut,
143                                 errors,
144                                 *ReadThreads,
145                                 *WriteThreads,
146                                 *VaryRequest,
147                                 *VaryThread,
148                                 *BlockSize,
149                                 *Replicas,
150                                 *StatsInterval,
151                                 *ServiceURL,
152                                 *ServiceUUID,
153                                 *RunTime,
154                         )
155                         os.Exit(0)
156                 case i := <-bytesInChan:
157                         bytesIn += i
158                 case o := <-bytesOutChan:
159                         bytesOut += o
160                 case <-errorsChan:
161                         errors++
162                 }
163         }
164 }
165
166 func makeBufs(nextBuf chan<- []byte, threadID int) {
167         buf := make([]byte, *BlockSize)
168         if *VaryThread {
169                 binary.PutVarint(buf, int64(threadID))
170         }
171         randSize := 524288
172         if randSize > *BlockSize {
173                 randSize = *BlockSize
174         }
175         for {
176                 if *VaryRequest {
177                         rnd := make([]byte, randSize)
178                         if _, err := io.ReadFull(rand.Reader, rnd); err != nil {
179                                 log.Fatal(err)
180                         }
181                         buf = append(rnd, buf[randSize:]...)
182                 }
183                 nextBuf <- buf
184         }
185 }
186
187 func doWrites(kc *keepclient.KeepClient, nextBuf <-chan []byte, nextLocator chan<- string) {
188         for buf := range nextBuf {
189                 locator, _, err := kc.PutB(buf)
190                 if err != nil {
191                         log.Print(err)
192                         errorsChan <- struct{}{}
193                         continue
194                 }
195                 bytesOutChan <- uint64(len(buf))
196                 for cap(nextLocator) > len(nextLocator)+*WriteThreads {
197                         // Give the readers something to do, unless
198                         // they have lots queued up already.
199                         nextLocator <- locator
200                 }
201         }
202 }
203
204 func doReads(kc *keepclient.KeepClient, nextLocator <-chan string) {
205         for locator := range nextLocator {
206                 rdr, size, url, err := kc.Get(locator)
207                 if err != nil {
208                         log.Print(err)
209                         errorsChan <- struct{}{}
210                         continue
211                 }
212                 n, err := io.Copy(ioutil.Discard, rdr)
213                 rdr.Close()
214                 if n != size || err != nil {
215                         log.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
216                         errorsChan <- struct{}{}
217                         continue
218                         // Note we don't count the bytes received in
219                         // partial/corrupt responses: we are measuring
220                         // throughput, not resource consumption.
221                 }
222                 bytesInChan <- uint64(n)
223         }
224 }
225
226 func overrideServices(kc *keepclient.KeepClient) {
227         roots := make(map[string]string)
228         if *ServiceURL != "" {
229                 roots["zzzzz-bi6l4-000000000000000"] = *ServiceURL
230         } else if *ServiceUUID != "" {
231                 for uuid, url := range kc.GatewayRoots() {
232                         if uuid == *ServiceUUID {
233                                 roots[uuid] = url
234                                 break
235                         }
236                 }
237                 if len(roots) == 0 {
238                         log.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots())
239                 }
240         } else {
241                 return
242         }
243         kc.SetServiceRoots(roots, roots, roots)
244 }