1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/sdk/go/arvados"
21 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
22 "git.arvados.org/arvados.git/sdk/go/keepclient"
23 "github.com/sirupsen/logrus"
26 const timestampFormat = "2006-01-02T15:04:05"
28 type nodeInfo struct {
29 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
42 type consumption struct {
47 func (c *consumption) Add(n consumption) {
49 c.duration += n.duration
52 type arrayFlags []string
54 func (i *arrayFlags) String() string {
58 func (i *arrayFlags) Set(value string) error {
59 for _, s := range strings.Split(value, ",") {
65 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (exitCode int, err error) {
66 var beginStr, endStr string
67 flags := flag.NewFlagSet("", flag.ContinueOnError)
68 flags.SetOutput(stderr)
69 flags.Usage = func() {
70 fmt.Fprintf(flags.Output(), `
72 %s [options ...] [UUID ...]
74 This program analyzes the cost of Arvados container requests and calculates
75 the total cost across all requests. At least one UUID or a timestamp range
78 When the '-output' option is specified, a set of CSV files with cost details
79 will be written to the provided directory. Each file is a CSV report that lists
80 all the containers used to fulfill the container request, together with the
81 machine type and cost of each container.
83 When supplied with the UUID of a container request, it will calculate the
84 cost of that container request and all its children.
86 When supplied with the UUID of a collection, it will see if there is a
87 container_request UUID in the properties of the collection, and if so, it
88 will calculate the cost of that container request and all its children.
90 When supplied with a project UUID or when supplied with multiple container
91 request or collection UUIDs, it will calculate the total cost for all
94 When supplied with a 'begin' and 'end' timestamp (format:
95 %s), it will calculate the cost for all top-level container
96 requests whose containers finished during the specified interval.
98 The total cost calculation takes container reuse into account: if a container
99 was reused between several container requests, its cost will only be counted
104 - This program uses the cost data from config.yml at the time of the
105 execution of the container, stored in the 'node.json' file in its log
106 collection. If the cost data was not correctly configured at the time the
107 container was executed, the output from this program will be incorrect.
109 - If a container was run on a preemptible ("spot") instance, the cost data
110 reported by this program may be wildly inaccurate, because it does not have
111 access to the spot pricing in effect for the node then the container ran. The
112 UUID report file that is generated when the '-output' option is specified has
113 a column that indicates the preemptible state of the instance that ran the
116 - This program does not take into account overhead costs like the time spent
117 starting and stopping compute nodes that run containers, the cost of the
118 permanent cloud nodes that provide the Arvados services, the cost of data
119 stored in Arvados, etc.
121 - When provided with a project UUID, subprojects will not be considered.
123 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
124 ARVADOS_API_TOKEN environment variables must be set.
126 This program prints the total dollar amount from the aggregate cost
127 accounting across all provided UUIDs on stdout.
130 `, prog, timestampFormat)
131 flags.PrintDefaults()
133 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
134 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
135 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
136 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
137 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
138 err = flags.Parse(args)
139 if err == flag.ErrHelp {
143 } else if err != nil {
147 c.uuids = flags.Args()
149 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
151 err = fmt.Errorf("When specifying a date range, both begin and end must be specified")
156 if len(beginStr) != 0 {
158 c.begin, errB = time.Parse(timestampFormat, beginStr)
159 c.end, errE = time.Parse(timestampFormat, endStr)
160 if (errB != nil) || (errE != nil) {
162 err = fmt.Errorf("When specifying a date range, both begin and end must be of the format %s %+v, %+v", timestampFormat, errB, errE)
168 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
170 err = fmt.Errorf("error: no uuid(s) provided")
175 lvl, err := logrus.ParseLevel(*loglevel)
182 logger.Debug("Caching disabled")
187 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
188 statData, err := os.Stat(dir)
189 if os.IsNotExist(err) {
190 err = os.MkdirAll(dir, 0700)
192 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
195 if !statData.IsDir() {
196 return fmt.Errorf("the path %s is not a directory", dir)
202 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (string, consumption) {
204 var containerConsumption consumption
207 csv += container.UUID + ","
208 csv += string(container.State) + ","
209 if container.StartedAt != nil {
210 csv += container.StartedAt.String() + ","
215 var delta time.Duration
216 if container.FinishedAt != nil {
217 csv += container.FinishedAt.String() + ","
218 delta = container.FinishedAt.Sub(*container.StartedAt)
219 csv += strconv.FormatFloat(delta.Seconds(), 'f', 3, 64) + ","
225 if node.Properties.CloudNode.Price != 0 {
226 price = node.Properties.CloudNode.Price
227 size = node.Properties.CloudNode.Size
230 size = node.ProviderType
232 containerConsumption.cost = delta.Seconds() / 3600 * price
233 containerConsumption.duration = delta.Seconds()
234 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(containerConsumption.cost, 'f', 8, 64) + "\n"
235 return csv, containerConsumption
238 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
240 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
241 // We do not cache projects or collections, they have no final state
244 // See if we have a cached copy of this object
245 _, err := os.Stat(file)
249 data, err := ioutil.ReadFile(file)
251 logger.Errorf("error reading %q: %s", file, err)
254 err = json.Unmarshal(data, &object)
256 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
260 // See if it is in a final state, if that makes sense
261 switch v := object.(type) {
262 case *arvados.ContainerRequest:
263 if v.State == arvados.ContainerRequestStateFinal {
265 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
267 case *arvados.Container:
268 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
270 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
276 // Load an Arvados object.
277 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
278 file := uuid + ".json"
286 homeDir, err := os.UserHomeDir()
289 logger.Info("Unable to determine current user home directory, not using cache")
291 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
292 err = ensureDirectory(logger, cacheDir)
295 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
297 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
305 if strings.Contains(uuid, "-j7d0g-") {
306 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
307 } else if strings.Contains(uuid, "-xvhdp-") {
308 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
309 } else if strings.Contains(uuid, "-dz642-") {
310 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
311 } else if strings.Contains(uuid, "-4zz18-") {
312 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
314 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
318 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
321 encoded, err := json.MarshalIndent(object, "", " ")
323 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
327 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
329 err = fmt.Errorf("error writing file %s:\n %s", file, err)
336 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
337 if cr.LogUUID == "" {
338 err = errors.New("no log collection")
342 var collection arvados.Collection
343 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
345 err = fmt.Errorf("error getting collection: %s", err)
349 var fs arvados.CollectionFileSystem
350 fs, err = collection.FileSystem(ac, kc)
352 err = fmt.Errorf("error opening collection as filesystem: %s", err)
356 f, err = fs.Open("node.json")
358 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
362 err = json.NewDecoder(f).Decode(&node)
364 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
370 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
371 cost = make(map[string]consumption)
373 var project arvados.Group
374 err = loadObject(logger, ac, uuid, uuid, cache, &project)
376 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
379 var childCrs map[string]interface{}
380 filterset := []arvados.Filter{
384 Operand: project.UUID,
387 Attr: "requesting_container_uuid",
392 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
393 "filters": filterset,
397 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
399 if value, ok := childCrs["items"]; ok {
400 logger.Infof("Collecting top level container requests in project %s", uuid)
401 items := value.([]interface{})
402 for _, item := range items {
403 itemMap := item.(map[string]interface{})
404 crInfo, err := generateCrInfo(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
406 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
408 for k, v := range crInfo {
413 logger.Infof("No top level container requests found in project %s", uuid)
418 func generateCrInfo(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
420 cost = make(map[string]consumption)
422 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
424 var total, tmpTotal consumption
425 logger.Debugf("Processing %s", uuid)
428 if strings.Contains(uuid, "-4zz18-") {
429 // This is a collection, find the associated container request (if any)
430 var c arvados.Collection
431 err = loadObject(logger, ac, uuid, uuid, cache, &c)
433 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
435 value, ok := c.Properties["container_request"]
437 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
439 crUUID, ok = value.(string)
441 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
445 // This is a container request, find the container
446 var cr arvados.ContainerRequest
447 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
449 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
451 if len(cr.ContainerUUID) == 0 {
452 // Nothing to do! E.g. a CR in 'Uncommitted' state.
453 logger.Infof("No container associated with container request %s, skipping", crUUID)
456 var container arvados.Container
457 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
459 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
462 topNode, err := getNode(arv, ac, kc, cr)
464 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
467 tmpCsv, total = addContainerLine(logger, topNode, cr, container)
469 cost[container.UUID] = total
471 // Find all container requests that have the container we found above as requesting_container_uuid
472 var childCrs arvados.ContainerRequestList
473 filterset := []arvados.Filter{
475 Attr: "requesting_container_uuid",
477 Operand: container.UUID,
479 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
480 "filters": filterset,
484 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
486 logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt)
487 progressTicker := time.NewTicker(5 * time.Second)
488 defer progressTicker.Stop()
489 for i, cr2 := range childCrs.Items {
491 case <-progressTicker.C:
492 logger.Infof("... %d of %d", i+1, len(childCrs.Items))
495 node, err := getNode(arv, ac, kc, cr2)
497 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
500 logger.Debug("Child container: " + cr2.ContainerUUID)
501 var c2 arvados.Container
502 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
504 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
506 tmpCsv, tmpTotal = addContainerLine(logger, node, cr2, c2)
507 cost[cr2.ContainerUUID] = tmpTotal
511 logger.Debug("Done collecting child containers")
513 csv += "TOTAL,,,,,," + strconv.FormatFloat(total.duration, 'f', 3, 64) + ",,,," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
515 if resultsDir != "" {
516 // Write the resulting CSV file
517 fName := resultsDir + "/" + crUUID + ".csv"
518 err = ioutil.WriteFile(fName, []byte(csv), 0644)
520 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
522 logger.Infof("\nUUID report in %s", fName)
528 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
529 exitcode, err = c.parseFlags(prog, args, logger, stderr)
534 if c.resultsDir != "" {
535 err = ensureDirectory(logger, c.resultsDir)
542 uuidChannel := make(chan string)
544 // Arvados Client setup
545 arv, err := arvadosclient.MakeArvadosClient()
547 err = fmt.Errorf("error creating Arvados object: %s", err)
551 kc, err := keepclient.MakeKeepClient(arv)
553 err = fmt.Errorf("error creating Keep object: %s", err)
558 ac := arvados.NewClientFromEnv()
560 // Populate uuidChannel with the requested uuid list
562 defer close(uuidChannel)
563 for _, uuid := range c.uuids {
567 if !c.begin.IsZero() {
568 initialParams := arvados.ResourceListParams{
569 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
572 params := initialParams
574 // This list variable must be a new one declared
575 // inside the loop: otherwise, items in the API
576 // response would get deep-merged into the items
577 // loaded in previous iterations.
578 var list arvados.ContainerRequestList
580 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
582 logger.Errorf("Error getting container request list from Arvados API: %s", err)
585 if len(list.Items) == 0 {
589 for _, i := range list.Items {
590 uuidChannel <- i.UUID
592 params.Offset += len(list.Items)
598 cost := make(map[string]consumption)
600 for uuid := range uuidChannel {
601 logger.Debugf("Considering %s", uuid)
602 if strings.Contains(uuid, "-j7d0g-") {
603 // This is a project (group)
604 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
609 for k, v := range cost {
612 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
613 // This is a container request
614 var crInfo map[string]consumption
615 crInfo, err = generateCrInfo(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
617 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
621 for k, v := range crInfo {
624 } else if strings.Contains(uuid, "-tpzed-") {
625 // This is a user. The "Home" project for a user is not a real project.
626 // It is identified by the user uuid. As such, cost analysis for the
627 // "Home" project is not supported by this program. Skip this uuid, but
629 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
631 logger.Errorf("this argument does not look like a uuid: %s", uuid)
638 logger.Info("Nothing to do!")
644 csv = "# Aggregate cost accounting for uuids:\n# UUID, Duration in seconds, Total cost\n"
645 for _, uuid := range c.uuids {
646 csv += "# " + uuid + "\n"
649 var total consumption
650 for k, v := range cost {
651 csv += k + "," + strconv.FormatFloat(v.duration, 'f', 3, 64) + "," + strconv.FormatFloat(v.cost, 'f', 8, 64) + "\n"
655 csv += "TOTAL," + strconv.FormatFloat(total.duration, 'f', 3, 64) + "," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
657 if c.resultsDir != "" {
658 // Write the resulting CSV file
659 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
660 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
662 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
666 logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile)
669 // Output the total dollar amount on stdout
670 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total.cost, 'f', 2, 64))