1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/lib/cmd"
21 "git.arvados.org/arvados.git/sdk/go/arvados"
22 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
23 "git.arvados.org/arvados.git/sdk/go/keepclient"
24 "github.com/sirupsen/logrus"
27 const timestampFormat = "2006-01-02T15:04:05"
31 type nodeInfo struct {
32 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
45 type consumption struct {
50 func (c *consumption) Add(n consumption) {
52 c.duration += n.duration
55 type arrayFlags []string
57 func (i *arrayFlags) String() string {
61 func (i *arrayFlags) Set(value string) error {
62 for _, s := range strings.Split(value, ",") {
68 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (ok bool, exitCode int) {
69 var beginStr, endStr string
70 flags := flag.NewFlagSet("", flag.ContinueOnError)
71 flags.Usage = func() {
72 fmt.Fprintf(flags.Output(), `
74 %s [options ...] [UUID ...]
76 This program analyzes the cost of Arvados container requests and calculates
77 the total cost across all requests. At least one UUID or a timestamp range
80 When the '-output' option is specified, a set of CSV files with cost details
81 will be written to the provided directory. Each file is a CSV report that lists
82 all the containers used to fulfill the container request, together with the
83 machine type and cost of each container.
85 When supplied with the UUID of a container request, it will calculate the
86 cost of that container request and all its children.
88 When supplied with the UUID of a collection, it will see if there is a
89 container_request UUID in the properties of the collection, and if so, it
90 will calculate the cost of that container request and all its children.
92 When supplied with a project UUID or when supplied with multiple container
93 request or collection UUIDs, it will calculate the total cost for all
96 When supplied with a 'begin' and 'end' timestamp (format:
97 %s), it will calculate the cost for all top-level container
98 requests whose containers finished during the specified interval.
100 The total cost calculation takes container reuse into account: if a container
101 was reused between several container requests, its cost will only be counted
106 - This program uses the cost data from config.yml at the time of the
107 execution of the container, stored in the 'node.json' file in its log
108 collection. If the cost data was not correctly configured at the time the
109 container was executed, the output from this program will be incorrect.
111 - If a container was run on a preemptible ("spot") instance, the cost data
112 reported by this program may be wildly inaccurate, because it does not have
113 access to the spot pricing in effect for the node then the container ran. The
114 UUID report file that is generated when the '-output' option is specified has
115 a column that indicates the preemptible state of the instance that ran the
118 - This program does not take into account overhead costs like the time spent
119 starting and stopping compute nodes that run containers, the cost of the
120 permanent cloud nodes that provide the Arvados services, the cost of data
121 stored in Arvados, etc.
123 - When provided with a project UUID, subprojects will not be considered.
125 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
126 ARVADOS_API_TOKEN environment variables must be set.
128 This program prints the total dollar amount from the aggregate cost
129 accounting across all provided UUIDs on stdout.
132 `, prog, timestampFormat)
133 flags.PrintDefaults()
135 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
136 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
137 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
138 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
139 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
140 if ok, code := cmd.ParseFlags(flags, prog, args, "[uuid ...]", stderr); !ok {
143 c.uuids = flags.Args()
145 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
146 fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be specified (try -help)\n")
150 if len(beginStr) != 0 {
152 c.begin, errB = time.Parse(timestampFormat, beginStr)
153 c.end, errE = time.Parse(timestampFormat, endStr)
154 if (errB != nil) || (errE != nil) {
155 fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be of the format %s %+v, %+v\n", timestampFormat, errB, errE)
160 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
161 fmt.Fprintf(stderr, "error: no uuid(s) provided (try -help)\n")
165 lvl, err := logrus.ParseLevel(*loglevel)
167 fmt.Fprintf(stderr, "invalid argument to -log-level: %s\n", err)
172 logger.Debug("Caching disabled")
177 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
178 statData, err := os.Stat(dir)
179 if os.IsNotExist(err) {
180 err = os.MkdirAll(dir, 0700)
182 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
185 if !statData.IsDir() {
186 return fmt.Errorf("the path %s is not a directory", dir)
192 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (string, consumption) {
194 var containerConsumption consumption
197 csv += container.UUID + ","
198 csv += string(container.State) + ","
199 if container.StartedAt != nil {
200 csv += container.StartedAt.String() + ","
205 var delta time.Duration
206 if container.FinishedAt != nil {
207 csv += container.FinishedAt.String() + ","
208 delta = container.FinishedAt.Sub(*container.StartedAt)
209 csv += strconv.FormatFloat(delta.Seconds(), 'f', 3, 64) + ","
215 if node.Properties.CloudNode.Price != 0 {
216 price = node.Properties.CloudNode.Price
217 size = node.Properties.CloudNode.Size
220 size = node.ProviderType
222 containerConsumption.cost = delta.Seconds() / 3600 * price
223 containerConsumption.duration = delta.Seconds()
224 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(containerConsumption.cost, 'f', 8, 64) + "\n"
225 return csv, containerConsumption
228 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
230 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
231 // We do not cache projects or collections, they have no final state
234 // See if we have a cached copy of this object
235 _, err := os.Stat(file)
239 data, err := ioutil.ReadFile(file)
241 logger.Errorf("error reading %q: %s", file, err)
244 err = json.Unmarshal(data, &object)
246 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
250 // See if it is in a final state, if that makes sense
251 switch v := object.(type) {
252 case *arvados.ContainerRequest:
253 if v.State == arvados.ContainerRequestStateFinal {
255 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
257 case *arvados.Container:
258 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
260 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
266 // Load an Arvados object.
267 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
268 file := uuid + ".json"
276 homeDir, err := os.UserHomeDir()
279 logger.Info("Unable to determine current user home directory, not using cache")
281 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
282 err = ensureDirectory(logger, cacheDir)
285 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
287 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
295 if strings.Contains(uuid, "-j7d0g-") {
296 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
297 } else if strings.Contains(uuid, "-xvhdp-") {
298 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
299 } else if strings.Contains(uuid, "-dz642-") {
300 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
301 } else if strings.Contains(uuid, "-4zz18-") {
302 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
304 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
308 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
311 encoded, err := json.MarshalIndent(object, "", " ")
313 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
317 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
319 err = fmt.Errorf("error writing file %s:\n %s", file, err)
326 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
327 if cr.LogUUID == "" {
328 err = errors.New("no log collection")
332 var collection arvados.Collection
333 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
335 err = fmt.Errorf("error getting collection: %s", err)
339 var fs arvados.CollectionFileSystem
340 fs, err = collection.FileSystem(ac, kc)
342 err = fmt.Errorf("error opening collection as filesystem: %s", err)
346 f, err = fs.Open("node.json")
348 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
352 err = json.NewDecoder(f).Decode(&node)
354 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
360 func getContainerRequests(ac *arvados.Client, filters []arvados.Filter) ([]arvados.ContainerRequest, error) {
361 var allItems []arvados.ContainerRequest
363 pagefilters := append([]arvados.Filter(nil), filters...)
364 if len(allItems) > 0 {
365 pagefilters = append(pagefilters, arvados.Filter{
368 Operand: allItems[len(allItems)-1].UUID,
371 var resp arvados.ContainerRequestList
372 err := ac.RequestAndDecode(&resp, "GET", "arvados/v1/container_requests", nil, arvados.ResourceListParams{
373 Filters: pagefilters,
379 return nil, fmt.Errorf("error querying container_requests: %w", err)
381 if len(resp.Items) == 0 {
385 allItems = append(allItems, resp.Items...)
389 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
390 cost = make(map[string]consumption)
392 var project arvados.Group
393 err = loadObject(logger, ac, uuid, uuid, cache, &project)
395 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
397 allItems, err := getContainerRequests(ac, []arvados.Filter{
401 Operand: project.UUID,
404 Attr: "requesting_container_uuid",
410 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
412 if len(allItems) == 0 {
413 logger.Infof("No top level container requests found in project %s", uuid)
416 logger.Infof("Collecting top level container requests in project %s", uuid)
417 for _, cr := range allItems {
418 crInfo, err := generateCrInfo(logger, cr.UUID, arv, ac, kc, resultsDir, cache)
420 return nil, fmt.Errorf("error generating container_request CSV for %s: %s", cr.UUID, err)
422 for k, v := range crInfo {
429 func generateCrInfo(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
431 cost = make(map[string]consumption)
433 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
435 var total, tmpTotal consumption
436 logger.Debugf("Processing %s", uuid)
439 if strings.Contains(uuid, "-4zz18-") {
440 // This is a collection, find the associated container request (if any)
441 var c arvados.Collection
442 err = loadObject(logger, ac, uuid, uuid, cache, &c)
444 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
446 value, ok := c.Properties["container_request"]
448 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
450 crUUID, ok = value.(string)
452 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
456 // This is a container request, find the container
457 var cr arvados.ContainerRequest
458 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
460 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
462 if len(cr.ContainerUUID) == 0 {
463 // Nothing to do! E.g. a CR in 'Uncommitted' state.
464 logger.Infof("No container associated with container request %s, skipping", crUUID)
467 var container arvados.Container
468 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
470 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
473 topNode, err := getNode(arv, ac, kc, cr)
475 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
478 tmpCsv, total = addContainerLine(logger, topNode, cr, container)
480 cost[container.UUID] = total
482 // Find all container requests that have the container we
483 // found above as requesting_container_uuid.
484 allItems, err := getContainerRequests(ac, []arvados.Filter{{
485 Attr: "requesting_container_uuid",
487 Operand: container.UUID,
489 logger.Infof("Looking up %d child containers for container %s (%s)", len(allItems), container.UUID, container.FinishedAt)
490 progressTicker := time.NewTicker(5 * time.Second)
491 defer progressTicker.Stop()
492 for i, cr2 := range allItems {
494 case <-progressTicker.C:
495 logger.Infof("... %d of %d", i+1, len(allItems))
498 node, err := getNode(arv, ac, kc, cr2)
500 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
503 logger.Debug("Child container: " + cr2.ContainerUUID)
504 var c2 arvados.Container
505 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
507 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
509 tmpCsv, tmpTotal = addContainerLine(logger, node, cr2, c2)
510 cost[cr2.ContainerUUID] = tmpTotal
514 logger.Debug("Done collecting child containers")
516 csv += "TOTAL,,,,,," + strconv.FormatFloat(total.duration, 'f', 3, 64) + ",,,," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
518 if resultsDir != "" {
519 // Write the resulting CSV file
520 fName := resultsDir + "/" + crUUID + ".csv"
521 err = ioutil.WriteFile(fName, []byte(csv), 0644)
523 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
525 logger.Infof("\nUUID report in %s", fName)
531 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
533 ok, exitcode = c.parseFlags(prog, args, logger, stderr)
537 if c.resultsDir != "" {
538 err = ensureDirectory(logger, c.resultsDir)
545 uuidChannel := make(chan string)
547 // Arvados Client setup
548 arv, err := arvadosclient.MakeArvadosClient()
550 err = fmt.Errorf("error creating Arvados object: %s", err)
554 kc, err := keepclient.MakeKeepClient(arv)
556 err = fmt.Errorf("error creating Keep object: %s", err)
561 ac := arvados.NewClientFromEnv()
563 // Populate uuidChannel with the requested uuid list
565 defer close(uuidChannel)
566 for _, uuid := range c.uuids {
570 if !c.begin.IsZero() {
571 initialParams := arvados.ResourceListParams{
572 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
575 params := initialParams
577 // This list variable must be a new one declared
578 // inside the loop: otherwise, items in the API
579 // response would get deep-merged into the items
580 // loaded in previous iterations.
581 var list arvados.ContainerRequestList
583 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
585 logger.Errorf("Error getting container request list from Arvados API: %s", err)
588 if len(list.Items) == 0 {
592 for _, i := range list.Items {
593 uuidChannel <- i.UUID
595 params.Offset += len(list.Items)
601 cost := make(map[string]consumption)
603 for uuid := range uuidChannel {
604 logger.Debugf("Considering %s", uuid)
605 if strings.Contains(uuid, "-j7d0g-") {
606 // This is a project (group)
607 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
612 for k, v := range cost {
615 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
616 // This is a container request or collection
617 var crInfo map[string]consumption
618 crInfo, err = generateCrInfo(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
620 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
624 for k, v := range crInfo {
627 } else if strings.Contains(uuid, "-tpzed-") {
628 // This is a user. The "Home" project for a user is not a real project.
629 // It is identified by the user uuid. As such, cost analysis for the
630 // "Home" project is not supported by this program. Skip this uuid, but
632 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
634 logger.Errorf("this argument does not look like a uuid: %s", uuid)
641 logger.Info("Nothing to do!")
647 csv = "# Aggregate cost accounting for uuids:\n# UUID, Duration in seconds, Total cost\n"
648 for _, uuid := range c.uuids {
649 csv += "# " + uuid + "\n"
652 var total consumption
653 for k, v := range cost {
654 csv += k + "," + strconv.FormatFloat(v.duration, 'f', 3, 64) + "," + strconv.FormatFloat(v.cost, 'f', 8, 64) + "\n"
658 csv += "TOTAL," + strconv.FormatFloat(total.duration, 'f', 3, 64) + "," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
660 if c.resultsDir != "" {
661 // Write the resulting CSV file
662 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
663 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
665 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
669 logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile)
672 // Output the total dollar amount on stdout
673 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total.cost, 'f', 2, 64))