X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e710f1b2da3095d6152ac7f6ed1ffab8bfc2c0c7..1bc7e16eb168274baea108092184925b7172ce02:/tools/keep-exercise/keep-exercise.go diff --git a/tools/keep-exercise/keep-exercise.go b/tools/keep-exercise/keep-exercise.go index d06b1eb181..2aa0b44fc4 100644 --- a/tools/keep-exercise/keep-exercise.go +++ b/tools/keep-exercise/keep-exercise.go @@ -19,6 +19,8 @@ package main import ( + "bufio" + "context" "crypto/rand" "encoding/binary" "flag" @@ -26,10 +28,17 @@ import ( "io" "io/ioutil" "log" + mathRand "math/rand" "net/http" "os" + "os/signal" + "strings" + "sync/atomic" + "syscall" "time" + "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/keepclient" ) @@ -49,8 +58,36 @@ var ( ServiceUUID = flag.String("uuid", "", "specify UUID of a single advertised keep service to exercise") getVersion = flag.Bool("version", false, "Print version information and exit.") RunTime = flag.Duration("run-time", 0, "time to run (e.g. 60s), or 0 to run indefinitely (default)") + Repeat = flag.Int("repeat", 1, "number of times to repeat the experiment (default 1)") + UseIndex = flag.Bool("useIndex", false, "use the GetIndex call to get a list of blocks to read. Requires the SystemRoot token. Use this to rule out caching effects when reading.") ) +func createKeepClient(stderr *log.Logger) (kc *keepclient.KeepClient) { + arv, err := arvadosclient.MakeArvadosClient() + if err != nil { + stderr.Fatal(err) + } + kc, err = keepclient.MakeKeepClient(arv) + if err != nil { + stderr.Fatal(err) + } + kc.Want_replicas = *Replicas + + kc.HTTPClient = &http.Client{ + Timeout: 10 * time.Minute, + // It's not safe to copy *http.DefaultTransport + // because it has a mutex (which might be locked) + // protecting a private map (which might not be nil). + // So we build our own, using the Go 1.12 default + // values. + Transport: &http.Transport{ + TLSClientConfig: arvadosclient.MakeTLSConfig(arv.ApiInsecure), + }, + } + overrideServices(kc, stderr) + return kc +} + func main() { flag.Parse() @@ -60,86 +97,177 @@ func main() { os.Exit(0) } - log.Printf("keep-exercise %s started", version) + stderr := log.New(os.Stderr, "", log.LstdFlags) - arv, err := arvadosclient.MakeArvadosClient() - if err != nil { - log.Fatal(err) + if *ReadThreads > 0 && *WriteThreads == 0 && !*UseIndex { + stderr.Fatal("At least one write thread is required if rthreads is non-zero and useIndex is not enabled") } - kc, err := keepclient.MakeKeepClient(arv) - if err != nil { - log.Fatal(err) + + if *ReadThreads == 0 && *WriteThreads == 0 { + stderr.Fatal("Nothing to do!") } - kc.Want_replicas = *Replicas - transport := *(http.DefaultTransport.(*http.Transport)) - transport.TLSClientConfig = arvadosclient.MakeTLSConfig(arv.ApiInsecure) - kc.HTTPClient = &http.Client{ - Timeout: 10 * time.Minute, - Transport: &transport, + kc := createKeepClient(stderr) + + // When UseIndx is set, we need a KeepClient with SystemRoot powers to get + // the block index from the Keepstore. We use the SystemRootToken from + // the Arvados config.yml for that. + var cluster *arvados.Cluster + if *ReadThreads > 0 && *UseIndex { + cluster = loadConfig(stderr) + kc.Arvados.ApiToken = cluster.SystemRootToken } - overrideServices(kc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigChan + fmt.Print("\r") // Suppress the ^C print + cancel() + }() - nextLocator := make(chan string, *ReadThreads+*WriteThreads) + csvHeader := "Timestamp,Elapsed,Read (bytes),Avg Read Speed (MiB/s),Peak Read Speed (MiB/s),Written (bytes),Avg Write Speed (MiB/s),Peak Write Speed (MiB/s),Errors,ReadThreads,WriteThreads,VaryRequest,VaryThread,BlockSize,Replicas,StatsInterval,ServiceURL,ServiceUUID,UseIndex,RunTime,Repeat" + var summary string - go countBeans(nextLocator) + var nextBufs []chan []byte for i := 0; i < *WriteThreads; i++ { nextBuf := make(chan []byte, 1) - go makeBufs(nextBuf, i) - go doWrites(kc, nextBuf, nextLocator) + nextBufs = append(nextBufs, nextBuf) + go makeBufs(nextBuf, i, stderr) + } + + for i := 0; i < *Repeat; i++ { + if ctx.Err() == nil { + summary = runExperiment(ctx, cluster, kc, nextBufs, summary, csvHeader, stderr) + stderr.Printf("*************************** experiment %d complete ******************************\n", i) + summary += fmt.Sprintf(",%d\n", i) + } } - for i := 0; i < *ReadThreads; i++ { - go doReads(kc, nextLocator) + if ctx.Err() == nil { + stderr.Println("Summary:") + stderr.Println() + fmt.Println() + fmt.Println(csvHeader + ",Experiment") + fmt.Println(summary) } - <-make(chan struct{}) } -// Send 1234 to bytesInChan when we receive 1234 bytes from keepstore. -var bytesInChan = make(chan uint64) -var bytesOutChan = make(chan uint64) +func runExperiment(ctx context.Context, cluster *arvados.Cluster, kc *keepclient.KeepClient, nextBufs []chan []byte, summary string, csvHeader string, stderr *log.Logger) (newSummary string) { + // Send 1234 to bytesInChan when we receive 1234 bytes from keepstore. + var bytesInChan = make(chan uint64) + var bytesOutChan = make(chan uint64) + // Send struct{}{} to errorsChan when an error happens. + var errorsChan = make(chan struct{}) -// Send struct{}{} to errorsChan when an error happens. -var errorsChan = make(chan struct{}) + var nextLocator atomic.Value + // when UseIndex is set, this channel is used instead of nextLocator + var indexLocatorChan = make(chan string, 2) + + newSummary = summary + + // Start warmup + ready := make(chan struct{}) + var warmup bool + if *ReadThreads > 0 { + warmup = true + if !*UseIndex { + stderr.Printf("Start warmup phase, waiting for 1 available block before reading starts\n") + } else { + stderr.Printf("Start warmup phase, waiting for block index before reading starts\n") + } + } + if warmup && !*UseIndex { + go func() { + locator, _, err := kc.PutB(<-nextBufs[0]) + if err != nil { + stderr.Print(err) + errorsChan <- struct{}{} + } + nextLocator.Store(locator) + stderr.Println("Warmup complete!") + close(ready) + }() + } else if warmup && *UseIndex { + // Get list of blocks to read + go getIndexLocators(ctx, cluster, kc, indexLocatorChan, stderr) + select { + case <-ctx.Done(): + return + case <-indexLocatorChan: + stderr.Println("Warmup complete!") + close(ready) + } + } else { + close(ready) + } + select { + case <-ctx.Done(): + return + case <-ready: + } + + // Warmup complete + ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*RunTime)) + defer cancel() + + for i := 0; i < *WriteThreads; i++ { + go doWrites(ctx, kc, nextBufs[i], &nextLocator, bytesOutChan, errorsChan, stderr) + } + if *UseIndex { + for i := 0; i < *ReadThreads; i++ { + go doIndexReads(ctx, kc, cluster, indexLocatorChan, bytesInChan, errorsChan, stderr) + } + } else { + for i := 0; i < *ReadThreads; i++ { + go doReads(ctx, kc, &nextLocator, bytesInChan, errorsChan, stderr) + } + } -func countBeans(nextLocator chan string) { t0 := time.Now() var tickChan <-chan time.Time - var endChan <-chan time.Time if *StatsInterval > 0 { tickChan = time.NewTicker(*StatsInterval).C } - if *RunTime > 0 { - endChan = time.NewTicker(*RunTime).C - } var bytesIn uint64 var bytesOut uint64 var errors uint64 + var rateIn, rateOut float64 var maxRateIn, maxRateOut float64 + var exit, printCsv bool + csv := log.New(os.Stdout, "", 0) + csv.Println() + csv.Println(csvHeader) for { select { + case <-ctx.Done(): + printCsv = true + exit = true case <-tickChan: + printCsv = true + case i := <-bytesInChan: + bytesIn += i + case o := <-bytesOutChan: + bytesOut += o + case <-errorsChan: + errors++ + } + if printCsv { elapsed := time.Since(t0) - if float64(bytesIn)/elapsed.Seconds()/1048576 > maxRateIn { - maxRateIn = float64(bytesIn) / elapsed.Seconds() / 1048576 + rateIn = float64(bytesIn) / elapsed.Seconds() / 1048576 + if rateIn > maxRateIn { + maxRateIn = rateIn } - if float64(bytesOut)/elapsed.Seconds()/1048576 > maxRateOut { - maxRateOut = float64(bytesOut) / elapsed.Seconds() / 1048576 + rateOut = float64(bytesOut) / elapsed.Seconds() / 1048576 + if rateOut > maxRateOut { + maxRateOut = rateOut } - log.Printf("%v elapsed: read %v bytes (%.1f MiB/s), wrote %v bytes (%.1f MiB/s), errors %d", + line := fmt.Sprintf("%v,%v,%v,%.1f,%.1f,%v,%.1f,%.1f,%d,%d,%d,%t,%t,%d,%d,%s,%s,%s,%t,%s,%d", + time.Now().Format("2006/01/02 15:04:05"), elapsed, - bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576), - bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576), - errors, - ) - case <-endChan: - elapsed := time.Since(t0) - log.Println("\nSummary:") - log.Println("Elapsed,Read (bytes),Avg Read Speed (MiB/s),Peak Read Speed (MiB/s),Written (bytes),Avg Write Speed (MiB/s),Peak Write Speed (MiB/s),Errors,ReadThreads,WriteThreads,VaryRequest,VaryThread,BlockSize,Replicas,StatsInterval,ServiceURL,ServiceUUID,RunTime\n") - log.Printf("%v,%v,%.1f,%.1f,%v,%.1f,%.1f,%d,%d,%d,%t,%t,%d,%d,%s,%s,%s,%s", - elapsed, - bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576), maxRateIn, - bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576), maxRateOut, + bytesIn, rateIn, maxRateIn, + bytesOut, rateOut, maxRateOut, errors, *ReadThreads, *WriteThreads, @@ -150,20 +278,21 @@ func countBeans(nextLocator chan string) { *StatsInterval, *ServiceURL, *ServiceUUID, + *UseIndex, *RunTime, + *Repeat, ) - os.Exit(0) - case i := <-bytesInChan: - bytesIn += i - case o := <-bytesOutChan: - bytesOut += o - case <-errorsChan: - errors++ + csv.Println(line) + if exit { + newSummary += line + return + } + printCsv = false } } } -func makeBufs(nextBuf chan<- []byte, threadID int) { +func makeBufs(nextBuf chan<- []byte, threadID int, stderr *log.Logger) { buf := make([]byte, *BlockSize) if *VaryThread { binary.PutVarint(buf, int64(threadID)) @@ -176,7 +305,7 @@ func makeBufs(nextBuf chan<- []byte, threadID int) { if *VaryRequest { rnd := make([]byte, randSize) if _, err := io.ReadFull(rand.Reader, rnd); err != nil { - log.Fatal(err) + stderr.Fatal(err) } buf = append(rnd, buf[randSize:]...) } @@ -184,35 +313,110 @@ func makeBufs(nextBuf chan<- []byte, threadID int) { } } -func doWrites(kc *keepclient.KeepClient, nextBuf <-chan []byte, nextLocator chan<- string) { - for buf := range nextBuf { +func doWrites(ctx context.Context, kc *keepclient.KeepClient, nextBuf <-chan []byte, nextLocator *atomic.Value, bytesOutChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) { + for ctx.Err() == nil { + buf := <-nextBuf locator, _, err := kc.PutB(buf) if err != nil { - log.Print(err) + stderr.Print(err) errorsChan <- struct{}{} continue } bytesOutChan <- uint64(len(buf)) - for cap(nextLocator) > len(nextLocator)+*WriteThreads { - // Give the readers something to do, unless - // they have lots queued up already. - nextLocator <- locator + nextLocator.Store(locator) + } +} + +func getIndexLocators(ctx context.Context, cluster *arvados.Cluster, kc *keepclient.KeepClient, indexLocatorChan chan<- string, stderr *log.Logger) { + if ctx.Err() == nil { + var locators []string + for uuid := range kc.LocalRoots() { + reader, err := kc.GetIndex(uuid, "") + if err != nil { + stderr.Fatalf("Error getting index: %s\n", err) + } + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + locators = append(locators, strings.Split(scanner.Text(), " ")[0]) + } + } + stderr.Printf("Found %d locators\n", len(locators)) + if len(locators) < 1 { + stderr.Fatal("Error: no locators found. The keepstores do not seem to contain any data. Remove the useIndex cli argument.") + } + + mathRand.Seed(time.Now().UnixNano()) + mathRand.Shuffle(len(locators), func(i, j int) { locators[i], locators[j] = locators[j], locators[i] }) + + for _, locator := range locators { + // We need the Collections.BlobSigningKey to sign our block requests. This requires access to /etc/arvados/config.yml + signedLocator := arvados.SignLocator(locator, kc.Arvados.ApiToken, time.Now().Local().Add(1*time.Hour), cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey)) + select { + case <-ctx.Done(): + return + case indexLocatorChan <- signedLocator: + } + } + stderr.Fatal("Error: ran out of locators to read!") + } +} + +func loadConfig(stderr *log.Logger) (cluster *arvados.Cluster) { + loader := config.NewLoader(os.Stdin, nil) + loader.SkipLegacy = true + + cfg, err := loader.Load() + if err != nil { + stderr.Fatal(err) + } + cluster, err = cfg.GetCluster("") + if err != nil { + stderr.Fatal(err) + } + return +} + +func doIndexReads(ctx context.Context, kc *keepclient.KeepClient, cluster *arvados.Cluster, indexLocatorChan <-chan string, bytesInChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) { + for ctx.Err() == nil { + select { + case <-ctx.Done(): + return + case locator := <-indexLocatorChan: + rdr, size, url, err := kc.Get(locator) + if err != nil { + stderr.Print(err) + errorsChan <- struct{}{} + continue + } + n, err := io.Copy(ioutil.Discard, rdr) + rdr.Close() + if n != size || err != nil { + stderr.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err) + errorsChan <- struct{}{} + continue + // Note we don't count the bytes received in + // partial/corrupt responses: we are measuring + // throughput, not resource consumption. + } + bytesInChan <- uint64(n) } } } -func doReads(kc *keepclient.KeepClient, nextLocator <-chan string) { - for locator := range nextLocator { +func doReads(ctx context.Context, kc *keepclient.KeepClient, nextLocator *atomic.Value, bytesInChan chan<- uint64, errorsChan chan<- struct{}, stderr *log.Logger) { + var locator string + for ctx.Err() == nil { + locator = nextLocator.Load().(string) rdr, size, url, err := kc.Get(locator) if err != nil { - log.Print(err) + stderr.Print(err) errorsChan <- struct{}{} continue } n, err := io.Copy(ioutil.Discard, rdr) rdr.Close() if n != size || err != nil { - log.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err) + stderr.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err) errorsChan <- struct{}{} continue // Note we don't count the bytes received in @@ -223,7 +427,7 @@ func doReads(kc *keepclient.KeepClient, nextLocator <-chan string) { } } -func overrideServices(kc *keepclient.KeepClient) { +func overrideServices(kc *keepclient.KeepClient, stderr *log.Logger) { roots := make(map[string]string) if *ServiceURL != "" { roots["zzzzz-bi6l4-000000000000000"] = *ServiceURL @@ -235,7 +439,7 @@ func overrideServices(kc *keepclient.KeepClient) { } } if len(roots) == 0 { - log.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots()) + stderr.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots()) } } else { return