1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/sdk/go/arvados"
21 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
22 "git.arvados.org/arvados.git/sdk/go/keepclient"
23 "github.com/sirupsen/logrus"
26 const timestampFormat = "2006-01-02T15:04:05"
28 type nodeInfo struct {
29 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
42 type containerInfo struct {
47 type arrayFlags []string
49 func (i *arrayFlags) String() string {
53 func (i *arrayFlags) Set(value string) error {
54 for _, s := range strings.Split(value, ",") {
60 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (exitCode int, err error) {
61 var beginStr, endStr string
62 flags := flag.NewFlagSet("", flag.ContinueOnError)
63 flags.SetOutput(stderr)
64 flags.Usage = func() {
65 fmt.Fprintf(flags.Output(), `
67 %s [options ...] [UUID ...]
69 This program analyzes the cost of Arvados container requests and calculates
70 the total cost across all requests. At least one UUID or a timestamp range
73 When the '-output' option is specified, a set of CSV files with cost details
74 will be written to the provided directory. Each file is a CSV report that lists
75 all the containers used to fulfill the container request, together with the
76 machine type and cost of each container.
78 When supplied with the UUID of a container request, it will calculate the
79 cost of that container request and all its children.
81 When supplied with the UUID of a collection, it will see if there is a
82 container_request UUID in the properties of the collection, and if so, it
83 will calculate the cost of that container request and all its children.
85 When supplied with a project UUID or when supplied with multiple container
86 request or collection UUIDs, it will calculate the total cost for all
89 When supplied with a 'begin' and 'end' timestamp (format:
90 %s), it will calculate the cost for all top-level container
91 requests whose containers finished during the specified interval.
93 The total cost calculation takes container reuse into account: if a container
94 was reused between several container requests, its cost will only be counted
99 - This program uses the cost data from config.yml at the time of the
100 execution of the container, stored in the 'node.json' file in its log
101 collection. If the cost data was not correctly configured at the time the
102 container was executed, the output from this program will be incorrect.
104 - If a container was run on a preemptible ("spot") instance, the cost data
105 reported by this program may be wildly inaccurate, because it does not have
106 access to the spot pricing in effect for the node then the container ran. The
107 UUID report file that is generated when the '-output' option is specified has
108 a column that indicates the preemptible state of the instance that ran the
111 - This program does not take into account overhead costs like the time spent
112 starting and stopping compute nodes that run containers, the cost of the
113 permanent cloud nodes that provide the Arvados services, the cost of data
114 stored in Arvados, etc.
116 - When provided with a project UUID, subprojects will not be considered.
118 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
119 ARVADOS_API_TOKEN environment variables must be set.
121 This program prints the total dollar amount from the aggregate cost
122 accounting across all provided UUIDs on stdout.
125 `, prog, timestampFormat)
126 flags.PrintDefaults()
128 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
129 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
130 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
131 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
132 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
133 err = flags.Parse(args)
134 if err == flag.ErrHelp {
138 } else if err != nil {
142 c.uuids = flags.Args()
144 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
146 err = fmt.Errorf("When specifying a date range, both begin and end must be specified")
151 if len(beginStr) != 0 {
153 c.begin, errB = time.Parse(timestampFormat, beginStr)
154 c.end, errE = time.Parse(timestampFormat, endStr)
155 if (errB != nil) || (errE != nil) {
157 err = fmt.Errorf("When specifying a date range, both begin and end must be of the format %s %+v, %+v", timestampFormat, errB, errE)
163 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
165 err = fmt.Errorf("error: no uuid(s) provided")
170 lvl, err := logrus.ParseLevel(*loglevel)
177 logger.Debug("Caching disabled")
182 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
183 statData, err := os.Stat(dir)
184 if os.IsNotExist(err) {
185 err = os.MkdirAll(dir, 0700)
187 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
190 if !statData.IsDir() {
191 return fmt.Errorf("the path %s is not a directory", dir)
197 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64, duration float64) {
200 csv += container.UUID + ","
201 csv += string(container.State) + ","
202 if container.StartedAt != nil {
203 csv += container.StartedAt.String() + ","
208 var delta time.Duration
209 if container.FinishedAt != nil {
210 csv += container.FinishedAt.String() + ","
211 delta = container.FinishedAt.Sub(*container.StartedAt)
212 csv += strconv.FormatFloat(delta.Seconds(), 'f', 3, 64) + ","
218 if node.Properties.CloudNode.Price != 0 {
219 price = node.Properties.CloudNode.Price
220 size = node.Properties.CloudNode.Size
223 size = node.ProviderType
225 cost = delta.Seconds() / 3600 * price
226 duration = delta.Seconds()
227 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
231 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
233 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
234 // We do not cache projects or collections, they have no final state
237 // See if we have a cached copy of this object
238 _, err := os.Stat(file)
242 data, err := ioutil.ReadFile(file)
244 logger.Errorf("error reading %q: %s", file, err)
247 err = json.Unmarshal(data, &object)
249 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
253 // See if it is in a final state, if that makes sense
254 switch v := object.(type) {
255 case *arvados.ContainerRequest:
256 if v.State == arvados.ContainerRequestStateFinal {
258 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
260 case *arvados.Container:
261 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
263 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
269 // Load an Arvados object.
270 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
271 file := uuid + ".json"
279 homeDir, err := os.UserHomeDir()
282 logger.Info("Unable to determine current user home directory, not using cache")
284 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
285 err = ensureDirectory(logger, cacheDir)
288 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
290 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
298 if strings.Contains(uuid, "-j7d0g-") {
299 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
300 } else if strings.Contains(uuid, "-xvhdp-") {
301 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
302 } else if strings.Contains(uuid, "-dz642-") {
303 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
304 } else if strings.Contains(uuid, "-4zz18-") {
305 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
307 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
311 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
314 encoded, err := json.MarshalIndent(object, "", " ")
316 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
320 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
322 err = fmt.Errorf("error writing file %s:\n %s", file, err)
329 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
330 if cr.LogUUID == "" {
331 err = errors.New("no log collection")
335 var collection arvados.Collection
336 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
338 err = fmt.Errorf("error getting collection: %s", err)
342 var fs arvados.CollectionFileSystem
343 fs, err = collection.FileSystem(ac, kc)
345 err = fmt.Errorf("error opening collection as filesystem: %s", err)
349 f, err = fs.Open("node.json")
351 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
355 err = json.NewDecoder(f).Decode(&node)
357 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
363 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]containerInfo, err error) {
364 cost = make(map[string]containerInfo)
366 var project arvados.Group
367 err = loadObject(logger, ac, uuid, uuid, cache, &project)
369 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
372 var childCrs map[string]interface{}
373 filterset := []arvados.Filter{
377 Operand: project.UUID,
380 Attr: "requesting_container_uuid",
385 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
386 "filters": filterset,
390 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
392 if value, ok := childCrs["items"]; ok {
393 logger.Infof("Collecting top level container requests in project %s", uuid)
394 items := value.([]interface{})
395 for _, item := range items {
396 itemMap := item.(map[string]interface{})
397 crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
399 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
401 for k, v := range crCsv {
406 logger.Infof("No top level container requests found in project %s", uuid)
411 func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]containerInfo, err error) {
413 cost = make(map[string]containerInfo)
415 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
417 var tmpTotalCost, tmpTotalDuration float64
418 var totalCost, totalDuration float64
419 logger.Debugf("Processing %s", uuid)
422 if strings.Contains(uuid, "-4zz18-") {
423 // This is a collection, find the associated container request (if any)
424 var c arvados.Collection
425 err = loadObject(logger, ac, uuid, uuid, cache, &c)
427 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
429 value, ok := c.Properties["container_request"]
431 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
433 crUUID, ok = value.(string)
435 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
439 // This is a container request, find the container
440 var cr arvados.ContainerRequest
441 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
443 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
445 if len(cr.ContainerUUID) == 0 {
446 // Nothing to do! E.g. a CR in 'Uncommitted' state.
447 logger.Infof("No container associated with container request %s, skipping", crUUID)
450 var container arvados.Container
451 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
453 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
456 topNode, err := getNode(arv, ac, kc, cr)
458 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
461 tmpCsv, totalCost, totalDuration = addContainerLine(logger, topNode, cr, container)
463 totalCost += tmpTotalCost
464 cost[container.UUID] = containerInfo{cost: totalCost, duration: totalDuration}
466 // Find all container requests that have the container we found above as requesting_container_uuid
467 var childCrs arvados.ContainerRequestList
468 filterset := []arvados.Filter{
470 Attr: "requesting_container_uuid",
472 Operand: container.UUID,
474 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
475 "filters": filterset,
479 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
481 logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt)
482 progressTicker := time.NewTicker(5 * time.Second)
483 defer progressTicker.Stop()
484 for i, cr2 := range childCrs.Items {
486 case <-progressTicker.C:
487 logger.Infof("... %d of %d", i+1, len(childCrs.Items))
490 node, err := getNode(arv, ac, kc, cr2)
492 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
495 logger.Debug("Child container: " + cr2.ContainerUUID)
496 var c2 arvados.Container
497 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
499 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
501 tmpCsv, tmpTotalCost, tmpTotalDuration = addContainerLine(logger, node, cr2, c2)
502 cost[cr2.ContainerUUID] = containerInfo{cost: tmpTotalCost, duration: tmpTotalDuration}
504 totalCost += tmpTotalCost
506 logger.Debug("Done collecting child containers")
508 csv += "TOTAL,,,,," + strconv.FormatFloat(totalDuration, 'f', 3, 64) + ",,,," + strconv.FormatFloat(totalCost, 'f', 2, 64) + "\n"
510 if resultsDir != "" {
511 // Write the resulting CSV file
512 fName := resultsDir + "/" + crUUID + ".csv"
513 err = ioutil.WriteFile(fName, []byte(csv), 0644)
515 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
517 logger.Infof("\nUUID report in %s", fName)
523 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
524 exitcode, err = c.parseFlags(prog, args, logger, stderr)
529 if c.resultsDir != "" {
530 err = ensureDirectory(logger, c.resultsDir)
537 uuidChannel := make(chan string)
539 // Arvados Client setup
540 arv, err := arvadosclient.MakeArvadosClient()
542 err = fmt.Errorf("error creating Arvados object: %s", err)
546 kc, err := keepclient.MakeKeepClient(arv)
548 err = fmt.Errorf("error creating Keep object: %s", err)
553 ac := arvados.NewClientFromEnv()
555 // Populate uuidChannel with the requested uuid list
557 defer close(uuidChannel)
558 for _, uuid := range c.uuids {
562 if !c.begin.IsZero() {
563 initialParams := arvados.ResourceListParams{
564 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
567 params := initialParams
569 // This list variable must be a new one declared
570 // inside the loop: otherwise, items in the API
571 // response would get deep-merged into the items
572 // loaded in previous iterations.
573 var list arvados.ContainerRequestList
575 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
577 logger.Errorf("Error getting container request list from Arvados API: %s", err)
580 if len(list.Items) == 0 {
584 for _, i := range list.Items {
585 uuidChannel <- i.UUID
587 params.Offset += len(list.Items)
593 cost := make(map[string]containerInfo)
595 for uuid := range uuidChannel {
596 logger.Debugf("Considering %s", uuid)
597 if strings.Contains(uuid, "-j7d0g-") {
598 // This is a project (group)
599 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
604 for k, v := range cost {
607 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
608 // This is a container request
609 var crCsv map[string]containerInfo
610 crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
612 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
616 for k, v := range crCsv {
619 } else if strings.Contains(uuid, "-tpzed-") {
620 // This is a user. The "Home" project for a user is not a real project.
621 // It is identified by the user uuid. As such, cost analysis for the
622 // "Home" project is not supported by this program. Skip this uuid, but
624 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
626 logger.Errorf("this argument does not look like a uuid: %s", uuid)
633 logger.Info("Nothing to do!")
639 csv = "# Aggregate cost accounting for uuids:\n# UUID, Duration in seconds, Total cost\n"
640 for _, uuid := range c.uuids {
641 csv += "# " + uuid + "\n"
644 var totalCost, totalDuration float64
645 for k, v := range cost {
646 csv += k + "," + strconv.FormatFloat(v.duration, 'f', 3, 64) + "," + strconv.FormatFloat(v.cost, 'f', 8, 64) + "\n"
648 totalDuration += v.duration
651 csv += "TOTAL," + strconv.FormatFloat(totalDuration, 'f', 3, 64) + "," + strconv.FormatFloat(totalCost, 'f', 2, 64) + "\n"
653 if c.resultsDir != "" {
654 // Write the resulting CSV file
655 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
656 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
658 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
662 logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile)
665 // Output the total dollar amount on stdout
666 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(totalCost, 'f', 2, 64))