1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
5 // Testing tool for Keep services.
7 // keepexercise helps measure throughput and test reliability under
8 // various usage patterns.
10 // By default, it reads and writes blocks containing 2^26 NUL
11 // bytes. This generates network traffic without consuming much disk
14 // For a more realistic test, enable -vary-request. Warning: this will
15 // fill your storage volumes with random data if you leave it running,
16 // which can cost you money or leave you with too little room for
40 "git.arvados.org/arvados.git/lib/config"
41 "git.arvados.org/arvados.git/sdk/go/arvados"
42 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
43 "git.arvados.org/arvados.git/sdk/go/keepclient"
48 // Command line config knobs
50 BlockSize = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
51 ReadThreads = flag.Int("rthreads", 1, "number of concurrent readers")
52 WriteThreads = flag.Int("wthreads", 1, "number of concurrent writers")
53 VaryRequest = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
54 VaryThread = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
55 Replicas = flag.Int("replicas", 1, "replication level for writing")
56 StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
57 ServiceURL = flag.String("url", "", "specify scheme://host of a single keep service to exercise (instead of using all advertised services like normal clients)")
58 ServiceUUID = flag.String("uuid", "", "specify UUID of a single advertised keep service to exercise")
59 getVersion = flag.Bool("version", false, "Print version information and exit.")
60 RunTime = flag.Duration("run-time", 0, "time to run (e.g. 60s), or 0 to run indefinitely (default)")
61 Repeat = flag.Int("repeat", 1, "number of times to repeat the experiment (default 1)")
62 UseIndex = flag.Bool("useIndex", false, "use the GetIndex call to get a list of blocks to read. Requires the SystemRoot token. Use this to rule out caching effects when reading.")
65 func createKeepClient(stderr *log.Logger) (kc *keepclient.KeepClient) {
66 arv, err := arvadosclient.MakeArvadosClient()
70 kc, err = keepclient.MakeKeepClient(arv)
74 kc.Want_replicas = *Replicas
76 kc.HTTPClient = &http.Client{
77 Timeout: 10 * time.Minute,
78 // It's not safe to copy *http.DefaultTransport
79 // because it has a mutex (which might be locked)
80 // protecting a private map (which might not be nil).
81 // So we build our own, using the Go 1.12 default
83 Transport: &http.Transport{
84 TLSClientConfig: arvadosclient.MakeTLSConfig(arv.ApiInsecure),
87 overrideServices(kc, stderr)
94 // Print version information if requested
96 fmt.Printf("keep-exercise %s\n", version)
100 stderr := log.New(os.Stderr, "", log.LstdFlags)
102 if *ReadThreads > 0 && *WriteThreads == 0 && !*UseIndex {
103 stderr.Fatal("At least one write thread is required if rthreads is non-zero and useIndex is not enabled")
106 if *ReadThreads == 0 && *WriteThreads == 0 {
107 stderr.Fatal("Nothing to do!")
110 kc := createKeepClient(stderr)
112 // When UseIndx is set, we need a KeepClient with SystemRoot powers to get
113 // the block index from the Keepstore. We use the SystemRootToken from
114 // the Arvados config.yml for that.
115 var cluster *arvados.Cluster
116 if *ReadThreads > 0 && *UseIndex {
117 cluster = loadConfig(stderr)
118 kc.Arvados.ApiToken = cluster.SystemRootToken
121 ctx, cancel := context.WithCancel(context.Background())
123 sigChan := make(chan os.Signal, 1)
124 signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
127 fmt.Print("\r") // Suppress the ^C print
131 csvHeader := "Timestamp,Elapsed,Read (bytes),Avg Read Speed (MiB/s),Peak Read Speed (MiB/s),Written (bytes),Avg Write Speed (MiB/s),Peak Write Speed (MiB/s),Errors,ReadThreads,WriteThreads,VaryRequest,VaryThread,BlockSize,Replicas,StatsInterval,ServiceURL,ServiceUUID,UseIndex,RunTime,Repeat"
134 var nextBufs []chan []byte
135 for i := 0; i < *WriteThreads; i++ {
136 nextBuf := make(chan []byte, 1)
137 nextBufs = append(nextBufs, nextBuf)
138 go makeBufs(nextBuf, i, stderr)
141 for i := 0; i < *Repeat; i++ {
142 if ctx.Err() == nil {
143 summary = runExperiment(ctx, cluster, kc, nextBufs, summary, csvHeader, stderr)
144 stderr.Printf("*************************** experiment %d complete ******************************\n", i)
145 summary += fmt.Sprintf(",%d\n", i)
148 if ctx.Err() == nil {
149 stderr.Println("Summary:")
152 fmt.Println(csvHeader + ",Experiment")
157 func runExperiment(ctx context.Context, cluster *arvados.Cluster, kc *keepclient.KeepClient, nextBufs []chan []byte, summary string, csvHeader string, stderr *log.Logger) (newSummary string) {
158 // Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
159 var bytesInChan = make(chan uint64)
160 var bytesOutChan = make(chan uint64)
161 // Send struct{}{} to errorsChan when an error happens.
162 var errorsChan = make(chan struct{})
164 var nextLocator atomic.Value
165 // when UseIndex is set, this channel is used instead of nextLocator
166 var indexLocatorChan = make(chan string, 2)
171 ready := make(chan struct{})
173 if *ReadThreads > 0 {
176 stderr.Printf("Start warmup phase, waiting for 1 available block before reading starts\n")
178 stderr.Printf("Start warmup phase, waiting for block index before reading starts\n")
181 if warmup && !*UseIndex {
183 locator, _, err := kc.PutB(<-nextBufs[0])
186 errorsChan <- struct{}{}
188 nextLocator.Store(locator)
189 stderr.Println("Warmup complete!")
192 } else if warmup && *UseIndex {
193 // Get list of blocks to read
194 go getIndexLocators(ctx, cluster, kc, indexLocatorChan, stderr)
198 case <-indexLocatorChan:
199 stderr.Println("Warmup complete!")
212 ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*RunTime))
215 for i := 0; i < *WriteThreads; i++ {
216 go doWrites(ctx, kc, nextBufs[i], &nextLocator, bytesOutChan, errorsChan, stderr)
219 for i := 0; i < *ReadThreads; i++ {
220 go doIndexReads(ctx, kc, cluster, indexLocatorChan, bytesInChan, errorsChan, stderr)
223 for i := 0; i < *ReadThreads; i++ {
224 go doReads(ctx, kc, &nextLocator, bytesInChan, errorsChan, stderr)
229 var tickChan <-chan time.Time
230 if *StatsInterval > 0 {
231 tickChan = time.NewTicker(*StatsInterval).C
236 var rateIn, rateOut float64
237 var maxRateIn, maxRateOut float64
238 var exit, printCsv bool
239 csv := log.New(os.Stdout, "", 0)
241 csv.Println(csvHeader)
249 case i := <-bytesInChan:
251 case o := <-bytesOutChan:
257 elapsed := time.Since(t0)
258 rateIn = float64(bytesIn) / elapsed.Seconds() / 1048576
259 if rateIn > maxRateIn {
262 rateOut = float64(bytesOut) / elapsed.Seconds() / 1048576
263 if rateOut > maxRateOut {
266 line := fmt.Sprintf("%v,%v,%v,%.1f,%.1f,%v,%.1f,%.1f,%d,%d,%d,%t,%t,%d,%d,%s,%s,%s,%t,%s,%d",
267 time.Now().Format("2006/01/02 15:04:05"),
269 bytesIn, rateIn, maxRateIn,
270 bytesOut, rateOut, maxRateOut,
295 func makeBufs(nextBuf chan<- []byte, threadID int, stderr *log.Logger) {
296 buf := make([]byte, *BlockSize)
298 binary.PutVarint(buf, int64(threadID))
301 if randSize > *BlockSize {
302 randSize = *BlockSize
306 rnd := make([]byte, randSize)
307 if _, err := io.ReadFull(rand.Reader, rnd); err != nil {
310 buf = append(rnd, buf[randSize:]...)
316 func doWrites(ctx context.Context, kc *keepclient.KeepClient, nextBuf <-chan []byte, nextLocator *atomic.Value, bytesOutChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) {
317 for ctx.Err() == nil {
319 locator, _, err := kc.PutB(buf)
322 errorsChan <- struct{}{}
325 bytesOutChan <- uint64(len(buf))
326 nextLocator.Store(locator)
330 func getIndexLocators(ctx context.Context, cluster *arvados.Cluster, kc *keepclient.KeepClient, indexLocatorChan chan<- string, stderr *log.Logger) {
331 if ctx.Err() == nil {
332 var locators []string
333 for uuid := range kc.LocalRoots() {
334 reader, err := kc.GetIndex(uuid, "")
336 stderr.Fatalf("Error getting index: %s\n", err)
338 scanner := bufio.NewScanner(reader)
340 locators = append(locators, strings.Split(scanner.Text(), " ")[0])
343 stderr.Printf("Found %d locators\n", len(locators))
344 if len(locators) < 1 {
345 stderr.Fatal("Error: no locators found. The keepstores do not seem to contain any data. Remove the useIndex cli argument.")
348 mathRand.Seed(time.Now().UnixNano())
349 mathRand.Shuffle(len(locators), func(i, j int) { locators[i], locators[j] = locators[j], locators[i] })
351 for _, locator := range locators {
352 // We need the Collections.BlobSigningKey to sign our block requests. This requires access to /etc/arvados/config.yml
353 signedLocator := arvados.SignLocator(locator, kc.Arvados.ApiToken, time.Now().Local().Add(1*time.Hour), cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey))
357 case indexLocatorChan <- signedLocator:
360 stderr.Fatal("Error: ran out of locators to read!")
364 func loadConfig(stderr *log.Logger) (cluster *arvados.Cluster) {
365 loader := config.NewLoader(os.Stdin, nil)
366 loader.SkipLegacy = true
368 cfg, err := loader.Load()
372 cluster, err = cfg.GetCluster("")
379 func doIndexReads(ctx context.Context, kc *keepclient.KeepClient, cluster *arvados.Cluster, indexLocatorChan <-chan string, bytesInChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) {
380 for ctx.Err() == nil {
384 case locator := <-indexLocatorChan:
385 rdr, size, url, err := kc.Get(locator)
388 errorsChan <- struct{}{}
391 n, err := io.Copy(ioutil.Discard, rdr)
393 if n != size || err != nil {
394 stderr.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
395 errorsChan <- struct{}{}
397 // Note we don't count the bytes received in
398 // partial/corrupt responses: we are measuring
399 // throughput, not resource consumption.
401 bytesInChan <- uint64(n)
406 func doReads(ctx context.Context, kc *keepclient.KeepClient, nextLocator *atomic.Value, bytesInChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) {
408 for ctx.Err() == nil {
409 locator = nextLocator.Load().(string)
410 rdr, size, url, err := kc.Get(locator)
413 errorsChan <- struct{}{}
416 n, err := io.Copy(ioutil.Discard, rdr)
418 if n != size || err != nil {
419 stderr.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
420 errorsChan <- struct{}{}
422 // Note we don't count the bytes received in
423 // partial/corrupt responses: we are measuring
424 // throughput, not resource consumption.
426 bytesInChan <- uint64(n)
430 func overrideServices(kc *keepclient.KeepClient, stderr *log.Logger) {
431 roots := make(map[string]string)
432 if *ServiceURL != "" {
433 roots["zzzzz-bi6l4-000000000000000"] = *ServiceURL
434 } else if *ServiceUUID != "" {
435 for uuid, url := range kc.GatewayRoots() {
436 if uuid == *ServiceUUID {
442 stderr.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots())
447 kc.SetServiceRoots(roots, roots, roots)