1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "git.arvados.org/arvados.git/sdk/go/arvados"
13 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
14 "git.arvados.org/arvados.git/sdk/go/keepclient"
23 "github.com/sirupsen/logrus"
26 const timestampFormat = "2006-01-02T15:04:05"
28 type nodeInfo struct {
29 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
42 type arrayFlags []string
44 func (i *arrayFlags) String() string {
48 func (i *arrayFlags) Set(value string) error {
49 for _, s := range strings.Split(value, ",") {
55 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (exitCode int, err error) {
56 var beginStr, endStr string
57 flags := flag.NewFlagSet("", flag.ContinueOnError)
58 flags.SetOutput(stderr)
59 flags.Usage = func() {
60 fmt.Fprintf(flags.Output(), `
62 %s [options ...] [UUID ...]
64 This program analyzes the cost of Arvados container requests and calculates
65 the total cost across all requests. At least one UUID or a timestamp range
68 When the '-output' option is specified, a set of CSV files with cost details
69 will be written to the provided directory. Each file is a CSV report that lists
70 all the containers used to fulfill the container request, together with the
71 machine type and cost of each container.
73 When supplied with the UUID of a container request, it will calculate the
74 cost of that container request and all its children.
76 When supplied with the UUID of a collection, it will see if there is a
77 container_request UUID in the properties of the collection, and if so, it
78 will calculate the cost of that container request and all its children.
80 When supplied with a project UUID or when supplied with multiple container
81 request or collection UUIDs, it will calculate the total cost for all
84 When supplied with a 'begin' and 'end' timestamp (format:
85 %s), it will calculate the cost for all top-level container
86 requests whose containers finished during the specified interval.
88 The total cost calculation takes container reuse into account: if a container
89 was reused between several container requests, its cost will only be counted
94 - This program uses the cost data from config.yml at the time of the
95 execution of the container, stored in the 'node.json' file in its log
96 collection. If the cost data was not correctly configured at the time the
97 container was executed, the output from this program will be incorrect.
99 - If a container was run on a preemptible ("spot") instance, the cost data
100 reported by this program may be wildly inaccurate, because it does not have
101 access to the spot pricing in effect for the node then the container ran. The
102 UUID report file that is generated when the '-output' option is specified has
103 a column that indicates the preemptible state of the instance that ran the
106 - This program does not take into account overhead costs like the time spent
107 starting and stopping compute nodes that run containers, the cost of the
108 permanent cloud nodes that provide the Arvados services, the cost of data
109 stored in Arvados, etc.
111 - When provided with a project UUID, subprojects will not be considered.
113 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
114 ARVADOS_API_TOKEN environment variables must be set.
116 This program prints the total dollar amount from the aggregate cost
117 accounting across all provided UUIDs on stdout.
120 `, prog, timestampFormat)
121 flags.PrintDefaults()
123 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
124 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
125 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
126 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
127 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
128 err = flags.Parse(args)
129 if err == flag.ErrHelp {
133 } else if err != nil {
137 c.uuids = flags.Args()
139 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
141 err = fmt.Errorf("When specifying a date range, both begin and end must be specified")
146 if len(beginStr) != 0 {
148 c.begin, errB = time.Parse(timestampFormat, beginStr)
149 c.end, errE = time.Parse(timestampFormat, endStr)
150 if (errB != nil) || (errE != nil) {
152 err = fmt.Errorf("When specifying a date range, both begin and end must be of the format %s %+v, %+v", timestampFormat, errB, errE)
158 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
160 err = fmt.Errorf("error: no uuid(s) provided")
165 lvl, err := logrus.ParseLevel(*loglevel)
172 logger.Debug("Caching disabled\n")
177 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
178 statData, err := os.Stat(dir)
179 if os.IsNotExist(err) {
180 err = os.MkdirAll(dir, 0700)
182 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
185 if !statData.IsDir() {
186 return fmt.Errorf("the path %s is not a directory", dir)
192 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) {
195 csv += container.UUID + ","
196 csv += string(container.State) + ","
197 if container.StartedAt != nil {
198 csv += container.StartedAt.String() + ","
203 var delta time.Duration
204 if container.FinishedAt != nil {
205 csv += container.FinishedAt.String() + ","
206 delta = container.FinishedAt.Sub(*container.StartedAt)
207 csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + ","
213 if node.Properties.CloudNode.Price != 0 {
214 price = node.Properties.CloudNode.Price
215 size = node.Properties.CloudNode.Size
218 size = node.ProviderType
220 cost = delta.Seconds() / 3600 * price
221 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
225 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
227 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
228 // We do not cache projects or collections, they have no final state
231 // See if we have a cached copy of this object
232 _, err := os.Stat(file)
236 data, err := ioutil.ReadFile(file)
238 logger.Errorf("error reading %q: %s", file, err)
241 err = json.Unmarshal(data, &object)
243 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
247 // See if it is in a final state, if that makes sense
248 switch v := object.(type) {
249 case *arvados.ContainerRequest:
250 if v.State == arvados.ContainerRequestStateFinal {
252 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
254 case *arvados.Container:
255 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
257 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
263 // Load an Arvados object.
264 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
265 file := uuid + ".json"
273 homeDir, err := os.UserHomeDir()
276 logger.Info("Unable to determine current user home directory, not using cache")
278 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
279 err = ensureDirectory(logger, cacheDir)
282 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
284 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
292 if strings.Contains(uuid, "-j7d0g-") {
293 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
294 } else if strings.Contains(uuid, "-xvhdp-") {
295 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
296 } else if strings.Contains(uuid, "-dz642-") {
297 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
298 } else if strings.Contains(uuid, "-4zz18-") {
299 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
301 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
305 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
308 encoded, err := json.MarshalIndent(object, "", " ")
310 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
314 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
316 err = fmt.Errorf("error writing file %s:\n %s", file, err)
323 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
324 if cr.LogUUID == "" {
325 err = errors.New("no log collection")
329 var collection arvados.Collection
330 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
332 err = fmt.Errorf("error getting collection: %s", err)
336 var fs arvados.CollectionFileSystem
337 fs, err = collection.FileSystem(ac, kc)
339 err = fmt.Errorf("error opening collection as filesystem: %s", err)
343 f, err = fs.Open("node.json")
345 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
349 err = json.NewDecoder(f).Decode(&node)
351 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
357 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
358 cost = make(map[string]float64)
360 var project arvados.Group
361 err = loadObject(logger, ac, uuid, uuid, cache, &project)
363 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
366 var childCrs map[string]interface{}
367 filterset := []arvados.Filter{
371 Operand: project.UUID,
374 Attr: "requesting_container_uuid",
379 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
380 "filters": filterset,
384 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
386 if value, ok := childCrs["items"]; ok {
387 logger.Infof("Collecting top level container requests in project %s\n", uuid)
388 items := value.([]interface{})
389 for _, item := range items {
390 itemMap := item.(map[string]interface{})
391 crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
393 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
395 for k, v := range crCsv {
400 logger.Infof("No top level container requests found in project %s\n", uuid)
405 func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
407 cost = make(map[string]float64)
409 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
411 var tmpTotalCost float64
412 var totalCost float64
413 fmt.Printf("Processing %s\n", uuid)
416 if strings.Contains(uuid, "-4zz18-") {
417 // This is a collection, find the associated container request (if any)
418 var c arvados.Collection
419 err = loadObject(logger, ac, uuid, uuid, cache, &c)
421 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
423 value, ok := c.Properties["container_request"]
425 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
427 crUUID, ok = value.(string)
429 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
433 // This is a container request, find the container
434 var cr arvados.ContainerRequest
435 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
437 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
439 if len(cr.ContainerUUID) == 0 {
440 // Nothing to do! E.g. a CR in 'Uncommitted' state.
441 logger.Infof("No container associated with container request %s, skipping\n", crUUID)
444 var container arvados.Container
445 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
447 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
450 topNode, err := getNode(arv, ac, kc, cr)
452 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
455 tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container)
457 totalCost += tmpTotalCost
458 cost[container.UUID] = totalCost
460 // Find all container requests that have the container we found above as requesting_container_uuid
461 var childCrs arvados.ContainerRequestList
462 filterset := []arvados.Filter{
464 Attr: "requesting_container_uuid",
466 Operand: container.UUID,
468 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
469 "filters": filterset,
473 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
475 logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt)
476 for _, cr2 := range childCrs.Items {
478 node, err := getNode(arv, ac, kc, cr2)
480 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
483 logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n")
484 var c2 arvados.Container
485 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
487 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
489 tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2)
490 cost[cr2.ContainerUUID] = tmpTotalCost
492 totalCost += tmpTotalCost
494 logger.Info(" done\n")
496 csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n"
498 if resultsDir != "" {
499 // Write the resulting CSV file
500 fName := resultsDir + "/" + crUUID + ".csv"
501 err = ioutil.WriteFile(fName, []byte(csv), 0644)
503 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
505 logger.Infof("\nUUID report in %s\n\n", fName)
511 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
512 exitcode, err = c.parseFlags(prog, args, logger, stderr)
517 if c.resultsDir != "" {
518 err = ensureDirectory(logger, c.resultsDir)
525 uuidChannel := make(chan string)
527 // Arvados Client setup
528 arv, err := arvadosclient.MakeArvadosClient()
530 err = fmt.Errorf("error creating Arvados object: %s", err)
534 kc, err := keepclient.MakeKeepClient(arv)
536 err = fmt.Errorf("error creating Keep object: %s", err)
541 ac := arvados.NewClientFromEnv()
543 // Populate uuidChannel with the requested uuid list
545 defer close(uuidChannel)
546 for _, uuid := range c.uuids {
550 if !c.begin.IsZero() {
551 initialParams := arvados.ResourceListParams{
552 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
555 params := initialParams
557 // This list variable must be a new one declared
558 // inside the loop: otherwise, items in the API
559 // response would get deep-merged into the items
560 // loaded in previous iterations.
561 var list arvados.ContainerRequestList
563 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
565 logger.Errorf("Error getting container request list from Arvados API: %s\n", err)
568 if len(list.Items) == 0 {
572 for _, i := range list.Items {
573 uuidChannel <- i.UUID
575 params.Offset += len(list.Items)
581 cost := make(map[string]float64)
583 for uuid := range uuidChannel {
584 fmt.Printf("Considering %s\n", uuid)
585 if strings.Contains(uuid, "-j7d0g-") {
586 // This is a project (group)
587 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
592 for k, v := range cost {
595 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
596 // This is a container request
597 var crCsv map[string]float64
598 crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
600 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
604 for k, v := range crCsv {
607 } else if strings.Contains(uuid, "-tpzed-") {
608 // This is a user. The "Home" project for a user is not a real project.
609 // It is identified by the user uuid. As such, cost analysis for the
610 // "Home" project is not supported by this program. Skip this uuid, but
612 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
614 logger.Errorf("this argument does not look like a uuid: %s\n", uuid)
621 logger.Info("Nothing to do!\n")
627 csv = "# Aggregate cost accounting for uuids:\n"
628 for _, uuid := range c.uuids {
629 csv += "# " + uuid + "\n"
633 for k, v := range cost {
634 csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n"
638 csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n"
640 if c.resultsDir != "" {
641 // Write the resulting CSV file
642 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
643 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
645 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
649 logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile)
652 // Output the total dollar amount on stdout
653 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total, 'f', 8, 64))