X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ccefd86d022a00e89af37c6c7f9e71d4d68178ef..29e7c39df5bbe3eae5007b0e56eb93aace7728bd:/lib/deduplicationreport/report.go diff --git a/lib/deduplicationreport/report.go b/lib/deduplicationreport/report.go index b7699fcb25..8bb3fc4e57 100644 --- a/lib/deduplicationreport/report.go +++ b/lib/deduplicationreport/report.go @@ -22,7 +22,7 @@ import ( func deDuplicate(inputs []string) (trimmed []string) { seen := make(map[string]bool) for _, uuid := range inputs { - if _, ok := seen[uuid]; !ok { + if !seen[uuid] { seen[uuid] = true trimmed = append(trimmed, uuid) } @@ -42,9 +42,9 @@ Usage: , ... This program analyzes the overlap in blocks used by 2 or more collections. It - prints a deduplication report that shows the nominal space used by the list - of collection, as well as the actual size and the amount of space that is - saved by Keep's deduplication. + prints a deduplication report that shows the nominal space used by the + collections, as well as the actual size and the amount of space that is saved + by Keep's deduplication. The list of collections may be provided in two ways. A list of collection uuids is sufficient. Alternatively, the PDH for each collection may also be @@ -58,9 +58,9 @@ Example: Use the 'arv' and 'jq' commands to get the list of the 100 largest collections and generate the deduplication report: - arv collection list --order 'file_size_total desc' | \ + arv collection list --order 'file_size_total desc' --limit 100 | \ jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \ - tail -n100 |sed -e 's/"//g'|tr '\n' ' ' | \ + tail -n+2 |sed -e 's/"//g'|tr '\n' ' ' | \ xargs %s Options: @@ -80,8 +80,8 @@ Options: inputs = deDuplicate(inputs) - if len(inputs) < 2 { - logger.Error("Error: at least 2 different collections UUIDs required") + if len(inputs) < 1 { + logger.Errorf("Error: no collections provided") flags.Usage() return 2, inputs } @@ -115,7 +115,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo // Arvados Client setup arv, err := arvadosclient.MakeArvadosClient() if err != nil { - logger.Errorf("error creating Arvados object: %s", err) + logger.Errorf("Error creating Arvados object: %s", err) exitcode = 1 return } @@ -129,7 +129,6 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo pdhs := make(map[string]Col) var nominalSize int64 - fmt.Println() for _, input := range inputs { var uuid string var pdh string @@ -143,7 +142,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo uuid = input } if !strings.Contains(uuid, "-4zz18-") { - logger.Error("uuid must refer to collection object") + logger.Errorf("Error: uuid must refer to collection object") exitcode = 1 return } @@ -158,14 +157,14 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo var collection arvados.Collection err = arv.Get("collections", uuid, nil, &collection) if err != nil { - logger.Errorf("Error: unable to retrieve collection: %s\n", err) + logger.Errorf("Error: unable to retrieve collection: %s", err) exitcode = 1 return } blocks[uuid] = make(map[string]int) blocks[uuid] = blockList(collection) if pdh != "" && collection.PortableDataHash != pdh { - logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s\n", uuid, collection.PortableDataHash, pdh) + logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s", uuid, collection.PortableDataHash, pdh) exitcode = 1 return } @@ -201,7 +200,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo seen := make(map[string]bool) for _, v := range blocks { for pdh, size := range v { - if _, ok := seen[pdh]; !ok { + if !seen[pdh] { seen[pdh] = true totalSize += int64(size) }