1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "git.arvados.org/arvados.git/sdk/go/arvados"
13 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
14 "git.arvados.org/arvados.git/sdk/go/keepclient"
23 "github.com/sirupsen/logrus"
26 const timestampFormat = "2006-01-02T15:04:05"
28 type nodeInfo struct {
29 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
42 type arrayFlags []string
44 func (i *arrayFlags) String() string {
48 func (i *arrayFlags) Set(value string) error {
49 for _, s := range strings.Split(value, ",") {
55 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (exitCode int, err error) {
56 var beginStr, endStr string
57 flags := flag.NewFlagSet("", flag.ContinueOnError)
58 flags.SetOutput(stderr)
59 flags.Usage = func() {
60 fmt.Fprintf(flags.Output(), `
62 %s [options ...] [UUID ...]
64 This program analyzes the cost of Arvados container requests and calculates
65 the total cost across all requests. At least one UUID or a timestamp range
68 When the '-output' option is specified, a set of CSV files with cost details
69 will be written to the provided directory. Each file is a CSV report that lists
70 all the containers used to fulfill the container request, together with the
71 machine type and cost of each container.
73 When supplied with the UUID of a container request, it will calculate the
74 cost of that container request and all its children.
76 When supplied with the UUID of a collection, it will see if there is a
77 container_request UUID in the properties of the collection, and if so, it
78 will calculate the cost of that container request and all its children.
80 When supplied with a project UUID or when supplied with multiple container
81 request or collection UUIDs, it will calculate the total cost for all
84 When supplied with a 'begin' and 'end' timestamp (format:
85 %s), it will calculate the cost for all top-level container
86 requests whose containers finished during the specified interval.
88 The total cost calculation takes container reuse into account: if a container
89 was reused between several container requests, its cost will only be counted
94 - This program uses the cost data from config.yml at the time of the
95 execution of the container, stored in the 'node.json' file in its log
96 collection. If the cost data was not correctly configured at the time the
97 container was executed, the output from this program will be incorrect.
99 - If a container was run on a preemptible ("spot") instance, the cost data
100 reported by this program may be wildly inaccurate, because it does not have
101 access to the spot pricing in effect for the node then the container ran. The
102 UUID report file that is generated when the '-output' option is specified has
103 a column that indicates the preemptible state of the instance that ran the
106 - This program does not take into account overhead costs like the time spent
107 starting and stopping compute nodes that run containers, the cost of the
108 permanent cloud nodes that provide the Arvados services, the cost of data
109 stored in Arvados, etc.
111 - When provided with a project UUID, subprojects will not be considered.
113 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
114 ARVADOS_API_TOKEN environment variables must be set.
116 This program prints the total dollar amount from the aggregate cost
117 accounting across all provided UUIDs on stdout.
120 `, prog, timestampFormat)
121 flags.PrintDefaults()
123 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
124 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
125 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
126 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
127 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
128 err = flags.Parse(args)
129 if err == flag.ErrHelp {
133 } else if err != nil {
137 c.uuids = flags.Args()
139 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
141 err = fmt.Errorf("When specifying a date range, both begin and end must be specified")
146 if len(beginStr) != 0 {
148 c.begin, errB = time.Parse(timestampFormat, beginStr)
149 c.end, errE = time.Parse(timestampFormat, endStr)
150 if (errB != nil) || (errE != nil) {
152 err = fmt.Errorf("When specifying a date range, both begin and end must be of the format %s %+v, %+v", timestampFormat, errB, errE)
158 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
160 err = fmt.Errorf("error: no uuid(s) provided")
164 fmt.Printf("UUIDS: %s\n", c.uuids)
166 lvl, err := logrus.ParseLevel(*loglevel)
173 logger.Debug("Caching disabled\n")
178 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
179 statData, err := os.Stat(dir)
180 if os.IsNotExist(err) {
181 err = os.MkdirAll(dir, 0700)
183 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
186 if !statData.IsDir() {
187 return fmt.Errorf("the path %s is not a directory", dir)
193 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) {
196 csv += container.UUID + ","
197 csv += string(container.State) + ","
198 if container.StartedAt != nil {
199 csv += container.StartedAt.String() + ","
204 var delta time.Duration
205 if container.FinishedAt != nil {
206 csv += container.FinishedAt.String() + ","
207 delta = container.FinishedAt.Sub(*container.StartedAt)
208 csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + ","
214 if node.Properties.CloudNode.Price != 0 {
215 price = node.Properties.CloudNode.Price
216 size = node.Properties.CloudNode.Size
219 size = node.ProviderType
221 cost = delta.Seconds() / 3600 * price
222 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
226 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
228 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
229 // We do not cache projects or collections, they have no final state
232 // See if we have a cached copy of this object
233 _, err := os.Stat(file)
237 data, err := ioutil.ReadFile(file)
239 logger.Errorf("error reading %q: %s", file, err)
242 err = json.Unmarshal(data, &object)
244 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
248 // See if it is in a final state, if that makes sense
249 switch v := object.(type) {
250 case *arvados.ContainerRequest:
251 if v.State == arvados.ContainerRequestStateFinal {
253 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
255 case *arvados.Container:
256 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
258 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
264 // Load an Arvados object.
265 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
266 file := uuid + ".json"
274 homeDir, err := os.UserHomeDir()
277 logger.Info("Unable to determine current user home directory, not using cache")
279 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
280 err = ensureDirectory(logger, cacheDir)
283 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
285 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
293 if strings.Contains(uuid, "-j7d0g-") {
294 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
295 } else if strings.Contains(uuid, "-xvhdp-") {
296 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
297 } else if strings.Contains(uuid, "-dz642-") {
298 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
299 } else if strings.Contains(uuid, "-4zz18-") {
300 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
302 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
306 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
309 encoded, err := json.MarshalIndent(object, "", " ")
311 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
315 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
317 err = fmt.Errorf("error writing file %s:\n %s", file, err)
324 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
325 if cr.LogUUID == "" {
326 err = errors.New("no log collection")
330 var collection arvados.Collection
331 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
333 err = fmt.Errorf("error getting collection: %s", err)
337 var fs arvados.CollectionFileSystem
338 fs, err = collection.FileSystem(ac, kc)
340 err = fmt.Errorf("error opening collection as filesystem: %s", err)
344 f, err = fs.Open("node.json")
346 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
350 err = json.NewDecoder(f).Decode(&node)
352 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
358 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
359 cost = make(map[string]float64)
361 var project arvados.Group
362 err = loadObject(logger, ac, uuid, uuid, cache, &project)
364 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
367 var childCrs map[string]interface{}
368 filterset := []arvados.Filter{
372 Operand: project.UUID,
375 Attr: "requesting_container_uuid",
380 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
381 "filters": filterset,
385 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
387 if value, ok := childCrs["items"]; ok {
388 logger.Infof("Collecting top level container requests in project %s\n", uuid)
389 items := value.([]interface{})
390 for _, item := range items {
391 itemMap := item.(map[string]interface{})
392 crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
394 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
396 for k, v := range crCsv {
401 logger.Infof("No top level container requests found in project %s\n", uuid)
406 func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
408 cost = make(map[string]float64)
410 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
412 var tmpTotalCost float64
413 var totalCost float64
414 fmt.Printf("Processing %s\n", uuid)
417 if strings.Contains(uuid, "-4zz18-") {
418 // This is a collection, find the associated container request (if any)
419 var c arvados.Collection
420 err = loadObject(logger, ac, uuid, uuid, cache, &c)
422 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
424 value, ok := c.Properties["container_request"]
426 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
428 crUUID, ok = value.(string)
430 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
434 // This is a container request, find the container
435 var cr arvados.ContainerRequest
436 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
438 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
440 if len(cr.ContainerUUID) == 0 {
441 // Nothing to do! E.g. a CR in 'Uncommitted' state.
442 logger.Infof("No container associated with container request %s, skipping\n", crUUID)
445 var container arvados.Container
446 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
448 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
451 topNode, err := getNode(arv, ac, kc, cr)
453 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
456 tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container)
458 totalCost += tmpTotalCost
459 cost[container.UUID] = totalCost
461 // Find all container requests that have the container we found above as requesting_container_uuid
462 var childCrs arvados.ContainerRequestList
463 filterset := []arvados.Filter{
465 Attr: "requesting_container_uuid",
467 Operand: container.UUID,
469 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
470 "filters": filterset,
474 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
476 logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt)
477 for _, cr2 := range childCrs.Items {
479 node, err := getNode(arv, ac, kc, cr2)
481 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
484 logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n")
485 var c2 arvados.Container
486 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
488 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
490 tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2)
491 cost[cr2.ContainerUUID] = tmpTotalCost
493 totalCost += tmpTotalCost
495 logger.Info(" done\n")
497 csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n"
499 if resultsDir != "" {
500 // Write the resulting CSV file
501 fName := resultsDir + "/" + crUUID + ".csv"
502 err = ioutil.WriteFile(fName, []byte(csv), 0644)
504 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
506 logger.Infof("\nUUID report in %s\n\n", fName)
512 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
513 exitcode, err = c.parseFlags(prog, args, logger, stderr)
518 if c.resultsDir != "" {
519 err = ensureDirectory(logger, c.resultsDir)
526 uuidChannel := make(chan string)
528 // Arvados Client setup
529 arv, err := arvadosclient.MakeArvadosClient()
531 err = fmt.Errorf("error creating Arvados object: %s", err)
535 kc, err := keepclient.MakeKeepClient(arv)
537 err = fmt.Errorf("error creating Keep object: %s", err)
542 ac := arvados.NewClientFromEnv()
544 // Populate uuidChannel with the requested uuid list
546 defer close(uuidChannel)
547 for _, uuid := range c.uuids {
551 if !c.begin.IsZero() {
552 initialParams := arvados.ResourceListParams{
553 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
556 params := initialParams
558 // This list variable must be a new one declared
559 // inside the loop: otherwise, items in the API
560 // response would get deep-merged into the items
561 // loaded in previous iterations.
562 var list arvados.ContainerRequestList
564 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
566 logger.Errorf("Error getting container request list from Arvados API: %s\n", err)
569 if len(list.Items) == 0 {
573 for _, i := range list.Items {
574 uuidChannel <- i.UUID
576 params.Offset += len(list.Items)
582 cost := make(map[string]float64)
584 for uuid := range uuidChannel {
585 fmt.Printf("Considering %s\n", uuid)
586 if strings.Contains(uuid, "-j7d0g-") {
587 // This is a project (group)
588 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
593 for k, v := range cost {
596 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
597 // This is a container request
598 var crCsv map[string]float64
599 crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
601 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
605 for k, v := range crCsv {
608 } else if strings.Contains(uuid, "-tpzed-") {
609 // This is a user. The "Home" project for a user is not a real project.
610 // It is identified by the user uuid. As such, cost analysis for the
611 // "Home" project is not supported by this program. Skip this uuid, but
613 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
615 logger.Errorf("this argument does not look like a uuid: %s\n", uuid)
622 logger.Info("Nothing to do!\n")
628 csv = "# Aggregate cost accounting for uuids:\n"
629 for _, uuid := range c.uuids {
630 csv += "# " + uuid + "\n"
634 for k, v := range cost {
635 csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n"
639 csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n"
641 if c.resultsDir != "" {
642 // Write the resulting CSV file
643 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
644 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
646 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
650 logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile)
653 // Output the total dollar amount on stdout
654 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total, 'f', 8, 64))