X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/8da2f73faeeecd81f52949451fe07a674ba47df6..HEAD:/lib/costanalyzer/costanalyzer.go diff --git a/lib/costanalyzer/costanalyzer.go b/lib/costanalyzer/costanalyzer.go index e8dd186050..e68e2cb8c1 100644 --- a/lib/costanalyzer/costanalyzer.go +++ b/lib/costanalyzer/costanalyzer.go @@ -9,10 +9,6 @@ import ( "errors" "flag" "fmt" - "git.arvados.org/arvados.git/lib/config" - "git.arvados.org/arvados.git/sdk/go/arvados" - "git.arvados.org/arvados.git/sdk/go/arvadosclient" - "git.arvados.org/arvados.git/sdk/go/keepclient" "io" "io/ioutil" "net/http" @@ -21,9 +17,17 @@ import ( "strings" "time" + "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/sirupsen/logrus" ) +const timestampFormat = "2006-01-02T15:04:05" + +var pagesize = 1000 + type nodeInfo struct { // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3) Properties struct { @@ -35,6 +39,17 @@ type nodeInfo struct { // Modern ProviderType string Price float64 + Preemptible bool +} + +type consumption struct { + cost float64 + duration float64 +} + +func (c *consumption) Add(n consumption) { + c.cost += n.cost + c.duration += n.duration } type arrayFlags []string @@ -50,91 +65,113 @@ func (i *arrayFlags) Set(value string) error { return nil } -func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitCode int, uuids arrayFlags, resultsDir string, cache bool, err error) { +func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (ok bool, exitCode int) { + var beginStr, endStr string flags := flag.NewFlagSet("", flag.ContinueOnError) - flags.SetOutput(stderr) flags.Usage = func() { fmt.Fprintf(flags.Output(), ` Usage: - %s [options ...] ... + %s [options ...] [UUID ...] - This program analyzes the cost of Arvados container requests. For each uuid - supplied, it creates a CSV report that lists all the containers used to - fulfill the container request, together with the machine type and cost of - each container. At least one uuid must be specified. + This program analyzes the cost of Arvados container requests and calculates + the total cost across all requests. At least one UUID or a timestamp range + must be specified. + + When the '-output' option is specified, a set of CSV files with cost details + will be written to the provided directory. Each file is a CSV report that lists + all the containers used to fulfill the container request, together with the + machine type and cost of each container. - When supplied with the uuid of a container request, it will calculate the + When supplied with the UUID of a container request, it will calculate the cost of that container request and all its children. - When supplied with the uuid of a collection, it will see if there is a - container_request uuid in the properties of the collection, and if so, it + When supplied with the UUID of a collection, it will see if there is a + container_request UUID in the properties of the collection, and if so, it will calculate the cost of that container request and all its children. - When supplied with a project uuid or when supplied with multiple container - request or collection uuids, it will create a CSV report for each supplied - uuid, as well as a CSV file with aggregate cost accounting for all supplied - uuids. The aggregate cost report takes container reuse into account: if a - container was reused between several container requests, its cost will only - be counted once. + When supplied with a project UUID or when supplied with multiple container + request or collection UUIDs, it will calculate the total cost for all + supplied UUIDs. - To get the node costs, the progam queries the Arvados API for current cost - data for each node type used. This means that the reported cost always - reflects the cost data as currently defined in the Arvados API configuration - file. + When supplied with a 'begin' and 'end' timestamp (format: + %s), it will calculate the cost for all top-level container + requests whose containers finished during the specified interval. + + The total cost calculation takes container reuse into account: if a container + was reused between several container requests, its cost will only be counted + once. Caveats: - - the Arvados API configuration cost data may be out of sync with the cloud - provider. - - when generating reports for older container requests, the cost data in the - Arvados API configuration file may have changed since the container request - was fulfilled. This program uses the cost data stored at the time of the + + - This program uses the cost data from config.yml at the time of the execution of the container, stored in the 'node.json' file in its log - collection. + collection. If the cost data was not correctly configured at the time the + container was executed, the output from this program will be incorrect. + + - If a container was run on a preemptible ("spot") instance, the cost data + reported by this program may be wildly inaccurate, because it does not have + access to the spot pricing in effect for the node then the container ran. The + UUID report file that is generated when the '-output' option is specified has + a column that indicates the preemptible state of the instance that ran the + container. + + - This program does not take into account overhead costs like the time spent + starting and stopping compute nodes that run containers, the cost of the + permanent cloud nodes that provide the Arvados services, the cost of data + stored in Arvados, etc. - In order to get the data for the uuids supplied, the ARVADOS_API_HOST and + - When provided with a project UUID, subprojects will not be considered. + + In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables must be set. This program prints the total dollar amount from the aggregate cost - accounting across all provided uuids on stdout. - - When the '-output' option is specified, a set of CSV files with cost details - will be written to the provided directory. + accounting across all provided UUIDs on stdout. Options: -`, prog) +`, prog, timestampFormat) flags.PrintDefaults() } loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)") - flags.StringVar(&resultsDir, "output", "", "output `directory` for the CSV reports") - flags.BoolVar(&cache, "cache", true, "create and use a local disk cache of Arvados objects") - err = flags.Parse(args) - if err == flag.ErrHelp { - err = nil - exitCode = 1 - return - } else if err != nil { - exitCode = 2 - return + flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports") + flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat)) + flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat)) + flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects") + if ok, code := cmd.ParseFlags(flags, prog, args, "[uuid ...]", stderr); !ok { + return false, code + } + c.uuids = flags.Args() + + if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) { + fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be specified (try -help)\n") + return false, 2 + } + + if len(beginStr) != 0 { + var errB, errE error + c.begin, errB = time.Parse(timestampFormat, beginStr) + c.end, errE = time.Parse(timestampFormat, endStr) + if (errB != nil) || (errE != nil) { + fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be of the format %s %+v, %+v\n", timestampFormat, errB, errE) + return false, 2 + } } - uuids = flags.Args() - if len(uuids) < 1 { - flags.Usage() - err = fmt.Errorf("error: no uuid(s) provided") - exitCode = 2 - return + if (len(c.uuids) < 1) && (len(beginStr) == 0) { + fmt.Fprintf(stderr, "error: no uuid(s) provided (try -help)\n") + return false, 2 } lvl, err := logrus.ParseLevel(*loglevel) if err != nil { - exitCode = 2 - return + fmt.Fprintf(stderr, "invalid argument to -log-level: %s\n", err) + return false, 2 } logger.SetLevel(lvl) - if !cache { - logger.Debug("Caching disabled\n") + if !c.cache { + logger.Debug("Caching disabled") } - return + return true, 0 } func ensureDirectory(logger *logrus.Logger, dir string) (err error) { @@ -152,7 +189,9 @@ func ensureDirectory(logger *logrus.Logger, dir string) (err error) { return } -func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) { +func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (string, consumption) { + var csv string + var containerConsumption consumption csv = cr.UUID + "," csv += cr.Name + "," csv += container.UUID + "," @@ -167,7 +206,7 @@ func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.Container if container.FinishedAt != nil { csv += container.FinishedAt.String() + "," delta = container.FinishedAt.Sub(*container.StartedAt) - csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + "," + csv += strconv.FormatFloat(delta.Seconds(), 'f', 3, 64) + "," } else { csv += ",," } @@ -180,9 +219,10 @@ func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.Container price = node.Price size = node.ProviderType } - cost = delta.Seconds() / 3600 * price - csv += size + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n" - return + containerConsumption.cost = delta.Seconds() / 3600 * price + containerConsumption.duration = delta.Seconds() + csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(containerConsumption.cost, 'f', 8, 64) + "\n" + return csv, containerConsumption } func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) { @@ -212,12 +252,12 @@ func loadCachedObject(logger *logrus.Logger, file string, uuid string, object in case *arvados.ContainerRequest: if v.State == arvados.ContainerRequestStateFinal { reload = false - logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file) + logger.Debugf("Loaded object %s from local cache (%s)", uuid, file) } case *arvados.Container: if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled { reload = false - logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file) + logger.Debugf("Loaded object %s from local cache (%s)", uuid, file) } } return @@ -317,17 +357,44 @@ func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclien return } -func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) { - cost = make(map[string]float64) +func getContainerRequests(ac *arvados.Client, filters []arvados.Filter) ([]arvados.ContainerRequest, error) { + var allItems []arvados.ContainerRequest + for { + pagefilters := append([]arvados.Filter(nil), filters...) + if len(allItems) > 0 { + pagefilters = append(pagefilters, arvados.Filter{ + Attr: "uuid", + Operator: ">", + Operand: allItems[len(allItems)-1].UUID, + }) + } + var resp arvados.ContainerRequestList + err := ac.RequestAndDecode(&resp, "GET", "arvados/v1/container_requests", nil, arvados.ResourceListParams{ + Filters: pagefilters, + Limit: &pagesize, + Order: "uuid", + Count: "none", + }) + if err != nil { + return nil, fmt.Errorf("error querying container_requests: %w", err) + } + if len(resp.Items) == 0 { + // no more pages + return allItems, nil + } + allItems = append(allItems, resp.Items...) + } +} + +func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) { + cost = make(map[string]consumption) var project arvados.Group err = loadObject(logger, ac, uuid, uuid, cache, &project) if err != nil { return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error()) } - - var childCrs map[string]interface{} - filterset := []arvados.Filter{ + allItems, err := getContainerRequests(ac, []arvados.Filter{ { Attr: "owner_uuid", Operator: "=", @@ -338,41 +405,35 @@ func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado Operator: "=", Operand: nil, }, - } - err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{ - "filters": filterset, - "limit": 10000, }) if err != nil { return nil, fmt.Errorf("error querying container_requests: %s", err.Error()) } - if value, ok := childCrs["items"]; ok { - logger.Infof("Collecting top level container requests in project %s\n", uuid) - items := value.([]interface{}) - for _, item := range items { - itemMap := item.(map[string]interface{}) - crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache) - if err != nil { - return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error()) - } - for k, v := range crCsv { - cost[k] = v - } + if len(allItems) == 0 { + logger.Infof("No top level container requests found in project %s", uuid) + return + } + logger.Infof("Collecting top level container requests in project %s", uuid) + for _, cr := range allItems { + crInfo, err := generateCrInfo(logger, cr.UUID, arv, ac, kc, resultsDir, cache) + if err != nil { + return nil, fmt.Errorf("error generating container_request CSV for %s: %s", cr.UUID, err) + } + for k, v := range crInfo { + cost[k] = v } - } else { - logger.Infof("No top level container requests found in project %s\n", uuid) } return } -func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) { +func generateCrInfo(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) { - cost = make(map[string]float64) + cost = make(map[string]consumption) - csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Hourly node cost,Total cost\n" + csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n" var tmpCsv string - var tmpTotalCost float64 - var totalCost float64 + var total, tmpTotal consumption + logger.Debugf("Processing %s", uuid) var crUUID = uuid if strings.Contains(uuid, "-4zz18-") { @@ -398,6 +459,11 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado if err != nil { return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err) } + if len(cr.ContainerUUID) == 0 { + // Nothing to do! E.g. a CR in 'Uncommitted' state. + logger.Infof("No container associated with container request %s, skipping", crUUID) + return nil, nil + } var container arvados.Container err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container) if err != nil { @@ -406,49 +472,48 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado topNode, err := getNode(arv, ac, kc, cr) if err != nil { - return nil, fmt.Errorf("error getting node %s: %s", cr.UUID, err) + logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err) + return nil, nil } - tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container) + tmpCsv, total = addContainerLine(logger, topNode, cr, container) csv += tmpCsv - totalCost += tmpTotalCost - cost[container.UUID] = totalCost - - // Find all container requests that have the container we found above as requesting_container_uuid - var childCrs arvados.ContainerRequestList - filterset := []arvados.Filter{ - { - Attr: "requesting_container_uuid", - Operator: "=", - Operand: container.UUID, - }} - err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{ - "filters": filterset, - "limit": 10000, - }) - if err != nil { - return nil, fmt.Errorf("error querying container_requests: %s", err.Error()) - } - logger.Infof("Collecting child containers for container request %s", crUUID) - for _, cr2 := range childCrs.Items { - logger.Info(".") + cost[container.UUID] = total + + // Find all container requests that have the container we + // found above as requesting_container_uuid. + allItems, err := getContainerRequests(ac, []arvados.Filter{{ + Attr: "requesting_container_uuid", + Operator: "=", + Operand: container.UUID, + }}) + logger.Infof("Looking up %d child containers for container %s (%s)", len(allItems), container.UUID, container.FinishedAt) + progressTicker := time.NewTicker(5 * time.Second) + defer progressTicker.Stop() + for i, cr2 := range allItems { + select { + case <-progressTicker.C: + logger.Infof("... %d of %d", i+1, len(allItems)) + default: + } node, err := getNode(arv, ac, kc, cr2) if err != nil { - return nil, fmt.Errorf("error getting node %s: %s", cr2.UUID, err) + logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err) + continue } - logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n") + logger.Debug("Child container: " + cr2.ContainerUUID) var c2 arvados.Container err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2) if err != nil { return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err) } - tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2) - cost[cr2.ContainerUUID] = tmpTotalCost + tmpCsv, tmpTotal = addContainerLine(logger, node, cr2, c2) + cost[cr2.ContainerUUID] = tmpTotal csv += tmpCsv - totalCost += tmpTotalCost + total.Add(tmpTotal) } - logger.Info(" done\n") + logger.Debug("Done collecting child containers") - csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n" + csv += "TOTAL,,,,,," + strconv.FormatFloat(total.duration, 'f', 3, 64) + ",,,," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n" if resultsDir != "" { // Write the resulting CSV file @@ -457,25 +522,28 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado if err != nil { return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error()) } - logger.Infof("\nUUID report in %s\n\n", fName) + logger.Infof("\nUUID report in %s", fName) } return } -func costanalyzer(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) { - exitcode, uuids, resultsDir, cache, err := parseFlags(prog, args, loader, logger, stderr) - if exitcode != 0 { +func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) { + var ok bool + ok, exitcode = c.parseFlags(prog, args, logger, stderr) + if !ok { return } - if resultsDir != "" { - err = ensureDirectory(logger, resultsDir) + if c.resultsDir != "" { + err = ensureDirectory(logger, c.resultsDir) if err != nil { exitcode = 3 return } } + uuidChannel := make(chan string) + // Arvados Client setup arv, err := arvadosclient.MakeArvadosClient() if err != nil { @@ -492,11 +560,51 @@ func costanalyzer(prog string, args []string, loader *config.Loader, logger *log ac := arvados.NewClientFromEnv() - cost := make(map[string]float64) - for _, uuid := range uuids { + // Populate uuidChannel with the requested uuid list + go func() { + defer close(uuidChannel) + for _, uuid := range c.uuids { + uuidChannel <- uuid + } + + if !c.begin.IsZero() { + initialParams := arvados.ResourceListParams{ + Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}}, + Order: "created_at", + } + params := initialParams + for { + // This list variable must be a new one declared + // inside the loop: otherwise, items in the API + // response would get deep-merged into the items + // loaded in previous iterations. + var list arvados.ContainerRequestList + + err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params) + if err != nil { + logger.Errorf("Error getting container request list from Arvados API: %s", err) + break + } + if len(list.Items) == 0 { + break + } + + for _, i := range list.Items { + uuidChannel <- i.UUID + } + params.Offset += len(list.Items) + } + + } + }() + + cost := make(map[string]consumption) + + for uuid := range uuidChannel { + logger.Debugf("Considering %s", uuid) if strings.Contains(uuid, "-j7d0g-") { // This is a project (group) - cost, err = handleProject(logger, uuid, arv, ac, kc, resultsDir, cache) + cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache) if err != nil { exitcode = 1 return @@ -505,15 +613,15 @@ func costanalyzer(prog string, args []string, loader *config.Loader, logger *log cost[k] = v } } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") { - // This is a container request - var crCsv map[string]float64 - crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, resultsDir, cache) + // This is a container request or collection + var crInfo map[string]consumption + crInfo, err = generateCrInfo(logger, uuid, arv, ac, kc, c.resultsDir, c.cache) if err != nil { err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error()) exitcode = 2 return } - for k, v := range crCsv { + for k, v := range crInfo { cost[k] = v } } else if strings.Contains(uuid, "-tpzed-") { @@ -523,46 +631,46 @@ func costanalyzer(prog string, args []string, loader *config.Loader, logger *log // keep going. logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid) } else { - logger.Errorf("this argument does not look like a uuid: %s\n", uuid) + logger.Errorf("this argument does not look like a uuid: %s", uuid) exitcode = 3 return } } if len(cost) == 0 { - logger.Info("Nothing to do!\n") + logger.Info("Nothing to do!") return } var csv string - csv = "# Aggregate cost accounting for uuids:\n" - for _, uuid := range uuids { + csv = "# Aggregate cost accounting for uuids:\n# UUID, Duration in seconds, Total cost\n" + for _, uuid := range c.uuids { csv += "# " + uuid + "\n" } - var total float64 + var total consumption for k, v := range cost { - csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n" - total += v + csv += k + "," + strconv.FormatFloat(v.duration, 'f', 3, 64) + "," + strconv.FormatFloat(v.cost, 'f', 8, 64) + "\n" + total.Add(v) } - csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n" + csv += "TOTAL," + strconv.FormatFloat(total.duration, 'f', 3, 64) + "," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n" - if resultsDir != "" { + if c.resultsDir != "" { // Write the resulting CSV file - aFile := resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv" + aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv" err = ioutil.WriteFile(aFile, []byte(csv), 0644) if err != nil { err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error()) exitcode = 1 return } - logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile) + logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile) } // Output the total dollar amount on stdout - fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total, 'f', 8, 64)) + fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total.cost, 'f', 2, 64)) return }