X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2e0648fb2b8a006664e6225826d78916f682eff5..HEAD:/lib/deduplicationreport/report.go diff --git a/lib/deduplicationreport/report.go b/lib/deduplicationreport/report.go index b7699fcb25..2f9521c65d 100644 --- a/lib/deduplicationreport/report.go +++ b/lib/deduplicationreport/report.go @@ -10,7 +10,7 @@ import ( "io" "strings" - "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/manifest" @@ -22,7 +22,7 @@ import ( func deDuplicate(inputs []string) (trimmed []string) { seen := make(map[string]bool) for _, uuid := range inputs { - if _, ok := seen[uuid]; !ok { + if !seen[uuid] { seen[uuid] = true trimmed = append(trimmed, uuid) } @@ -30,21 +30,22 @@ func deDuplicate(inputs []string) (trimmed []string) { return } -func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitcode int, inputs []string) { - flags := flag.NewFlagSet("", flag.ContinueOnError) - flags.SetOutput(stderr) +// parseFlags returns either some inputs to process, or (if there are +// no inputs to process) a nil slice and a suitable exit code. +func parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (inputs []string, exitcode int) { + flags := flag.NewFlagSet(prog, flag.ContinueOnError) flags.Usage = func() { fmt.Fprintf(flags.Output(), ` Usage: %s [options ...] ... - %s [options ...] , \ - , ... + %s [options ...] , \ + , ... This program analyzes the overlap in blocks used by 2 or more collections. It - prints a deduplication report that shows the nominal space used by the list - of collection, as well as the actual size and the amount of space that is - saved by Keep's deduplication. + prints a deduplication report that shows the nominal space used by the + collections, as well as the actual size and the amount of space that is saved + by Keep's deduplication. The list of collections may be provided in two ways. A list of collection uuids is sufficient. Alternatively, the PDH for each collection may also be @@ -58,40 +59,34 @@ Example: Use the 'arv' and 'jq' commands to get the list of the 100 largest collections and generate the deduplication report: - arv collection list --order 'file_size_total desc' | \ + arv collection list --order 'file_size_total desc' --limit 100 | \ jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \ - tail -n100 |sed -e 's/"//g'|tr '\n' ' ' | \ + sed -e 's/"//g'|tr '\n' ' ' | \ xargs %s Options: `, prog, prog, prog) flags.PrintDefaults() } - loader.SetupFlags(flags) loglevel := flags.String("log-level", "info", "logging level (debug, info, ...)") - err := flags.Parse(args) - if err == flag.ErrHelp { - return 0, inputs - } else if err != nil { - return 2, inputs + if ok, code := cmd.ParseFlags(flags, prog, args, "collection-uuid [...]", stderr); !ok { + return nil, code } - inputs = flags.Args() + inputs = deDuplicate(flags.Args()) - inputs = deDuplicate(inputs) - - if len(inputs) < 2 { - logger.Error("Error: at least 2 different collections UUIDs required") - flags.Usage() - return 2, inputs + if len(inputs) < 1 { + fmt.Fprintf(stderr, "Error: no collections provided\n") + return nil, 2 } lvl, err := logrus.ParseLevel(*loglevel) if err != nil { - return 2, inputs + fmt.Fprintf(stderr, "Error: cannot parse log level: %s\n", err) + return nil, 2 } logger.SetLevel(lvl) - return + return inputs, 0 } func blockList(collection arvados.Collection) (blocks map[string]int) { @@ -104,18 +99,18 @@ func blockList(collection arvados.Collection) (blocks map[string]int) { return } -func report(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int) { - +func report(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int) { var inputs []string - exitcode, inputs = parseFlags(prog, args, loader, logger, stderr) - if exitcode != 0 { + + inputs, exitcode = parseFlags(prog, args, logger, stderr) + if inputs == nil { return } // Arvados Client setup arv, err := arvadosclient.MakeArvadosClient() if err != nil { - logger.Errorf("error creating Arvados object: %s", err) + logger.Errorf("Error creating Arvados object: %s", err) exitcode = 1 return } @@ -129,7 +124,6 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo pdhs := make(map[string]Col) var nominalSize int64 - fmt.Println() for _, input := range inputs { var uuid string var pdh string @@ -143,7 +137,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo uuid = input } if !strings.Contains(uuid, "-4zz18-") { - logger.Error("uuid must refer to collection object") + logger.Errorf("Error: uuid must refer to collection object") exitcode = 1 return } @@ -158,14 +152,14 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo var collection arvados.Collection err = arv.Get("collections", uuid, nil, &collection) if err != nil { - logger.Errorf("Error: unable to retrieve collection: %s\n", err) + logger.Errorf("Error: unable to retrieve collection: %s", err) exitcode = 1 return } blocks[uuid] = make(map[string]int) blocks[uuid] = blockList(collection) if pdh != "" && collection.PortableDataHash != pdh { - logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s\n", uuid, collection.PortableDataHash, pdh) + logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s", uuid, collection.PortableDataHash, pdh) exitcode = 1 return } @@ -201,7 +195,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo seen := make(map[string]bool) for _, v := range blocks { for pdh, size := range v { - if _, ok := seen[pdh]; !ok { + if !seen[pdh] { seen[pdh] = true totalSize += int64(size) }