1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/lib/cmd"
21 "git.arvados.org/arvados.git/sdk/go/arvados"
22 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
23 "git.arvados.org/arvados.git/sdk/go/keepclient"
24 "github.com/sirupsen/logrus"
27 const timestampFormat = "2006-01-02T15:04:05"
29 type nodeInfo struct {
30 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
43 type consumption struct {
48 func (c *consumption) Add(n consumption) {
50 c.duration += n.duration
53 type arrayFlags []string
55 func (i *arrayFlags) String() string {
59 func (i *arrayFlags) Set(value string) error {
60 for _, s := range strings.Split(value, ",") {
66 func (c *command) parseFlags(prog string, args []string, logger *logrus.Logger, stderr io.Writer) (ok bool, exitCode int) {
67 var beginStr, endStr string
68 flags := flag.NewFlagSet("", flag.ContinueOnError)
69 flags.Usage = func() {
70 fmt.Fprintf(flags.Output(), `
72 %s [options ...] [UUID ...]
74 This program analyzes the cost of Arvados container requests and calculates
75 the total cost across all requests. At least one UUID or a timestamp range
78 When the '-output' option is specified, a set of CSV files with cost details
79 will be written to the provided directory. Each file is a CSV report that lists
80 all the containers used to fulfill the container request, together with the
81 machine type and cost of each container.
83 When supplied with the UUID of a container request, it will calculate the
84 cost of that container request and all its children.
86 When supplied with the UUID of a collection, it will see if there is a
87 container_request UUID in the properties of the collection, and if so, it
88 will calculate the cost of that container request and all its children.
90 When supplied with a project UUID or when supplied with multiple container
91 request or collection UUIDs, it will calculate the total cost for all
94 When supplied with a 'begin' and 'end' timestamp (format:
95 %s), it will calculate the cost for all top-level container
96 requests whose containers finished during the specified interval.
98 The total cost calculation takes container reuse into account: if a container
99 was reused between several container requests, its cost will only be counted
104 - This program uses the cost data from config.yml at the time of the
105 execution of the container, stored in the 'node.json' file in its log
106 collection. If the cost data was not correctly configured at the time the
107 container was executed, the output from this program will be incorrect.
109 - If a container was run on a preemptible ("spot") instance, the cost data
110 reported by this program may be wildly inaccurate, because it does not have
111 access to the spot pricing in effect for the node then the container ran. The
112 UUID report file that is generated when the '-output' option is specified has
113 a column that indicates the preemptible state of the instance that ran the
116 - This program does not take into account overhead costs like the time spent
117 starting and stopping compute nodes that run containers, the cost of the
118 permanent cloud nodes that provide the Arvados services, the cost of data
119 stored in Arvados, etc.
121 - When provided with a project UUID, subprojects will not be considered.
123 In order to get the data for the UUIDs supplied, the ARVADOS_API_HOST and
124 ARVADOS_API_TOKEN environment variables must be set.
126 This program prints the total dollar amount from the aggregate cost
127 accounting across all provided UUIDs on stdout.
130 `, prog, timestampFormat)
131 flags.PrintDefaults()
133 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
134 flags.StringVar(&c.resultsDir, "output", "", "output `directory` for the CSV reports")
135 flags.StringVar(&beginStr, "begin", "", fmt.Sprintf("timestamp `begin` for date range operation (format: %s)", timestampFormat))
136 flags.StringVar(&endStr, "end", "", fmt.Sprintf("timestamp `end` for date range operation (format: %s)", timestampFormat))
137 flags.BoolVar(&c.cache, "cache", true, "create and use a local disk cache of Arvados objects")
138 if ok, code := cmd.ParseFlags(flags, prog, args, "[uuid ...]", stderr); !ok {
141 c.uuids = flags.Args()
143 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
144 fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be specified (try -help)\n")
148 if len(beginStr) != 0 {
150 c.begin, errB = time.Parse(timestampFormat, beginStr)
151 c.end, errE = time.Parse(timestampFormat, endStr)
152 if (errB != nil) || (errE != nil) {
153 fmt.Fprintf(stderr, "When specifying a date range, both begin and end must be of the format %s %+v, %+v\n", timestampFormat, errB, errE)
158 if (len(c.uuids) < 1) && (len(beginStr) == 0) {
159 fmt.Fprintf(stderr, "error: no uuid(s) provided (try -help)\n")
163 lvl, err := logrus.ParseLevel(*loglevel)
165 fmt.Fprintf(stderr, "invalid argument to -log-level: %s\n", err)
170 logger.Debug("Caching disabled")
175 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
176 statData, err := os.Stat(dir)
177 if os.IsNotExist(err) {
178 err = os.MkdirAll(dir, 0700)
180 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
183 if !statData.IsDir() {
184 return fmt.Errorf("the path %s is not a directory", dir)
190 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (string, consumption) {
192 var containerConsumption consumption
195 csv += container.UUID + ","
196 csv += string(container.State) + ","
197 if container.StartedAt != nil {
198 csv += container.StartedAt.String() + ","
203 var delta time.Duration
204 if container.FinishedAt != nil {
205 csv += container.FinishedAt.String() + ","
206 delta = container.FinishedAt.Sub(*container.StartedAt)
207 csv += strconv.FormatFloat(delta.Seconds(), 'f', 3, 64) + ","
213 if node.Properties.CloudNode.Price != 0 {
214 price = node.Properties.CloudNode.Price
215 size = node.Properties.CloudNode.Size
218 size = node.ProviderType
220 containerConsumption.cost = delta.Seconds() / 3600 * price
221 containerConsumption.duration = delta.Seconds()
222 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(containerConsumption.cost, 'f', 8, 64) + "\n"
223 return csv, containerConsumption
226 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
228 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
229 // We do not cache projects or collections, they have no final state
232 // See if we have a cached copy of this object
233 _, err := os.Stat(file)
237 data, err := ioutil.ReadFile(file)
239 logger.Errorf("error reading %q: %s", file, err)
242 err = json.Unmarshal(data, &object)
244 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
248 // See if it is in a final state, if that makes sense
249 switch v := object.(type) {
250 case *arvados.ContainerRequest:
251 if v.State == arvados.ContainerRequestStateFinal {
253 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
255 case *arvados.Container:
256 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
258 logger.Debugf("Loaded object %s from local cache (%s)", uuid, file)
264 // Load an Arvados object.
265 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
266 file := uuid + ".json"
274 homeDir, err := os.UserHomeDir()
277 logger.Info("Unable to determine current user home directory, not using cache")
279 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
280 err = ensureDirectory(logger, cacheDir)
283 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
285 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
293 if strings.Contains(uuid, "-j7d0g-") {
294 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
295 } else if strings.Contains(uuid, "-xvhdp-") {
296 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
297 } else if strings.Contains(uuid, "-dz642-") {
298 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
299 } else if strings.Contains(uuid, "-4zz18-") {
300 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
302 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
306 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
309 encoded, err := json.MarshalIndent(object, "", " ")
311 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
315 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
317 err = fmt.Errorf("error writing file %s:\n %s", file, err)
324 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
325 if cr.LogUUID == "" {
326 err = errors.New("no log collection")
330 var collection arvados.Collection
331 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
333 err = fmt.Errorf("error getting collection: %s", err)
337 var fs arvados.CollectionFileSystem
338 fs, err = collection.FileSystem(ac, kc)
340 err = fmt.Errorf("error opening collection as filesystem: %s", err)
344 f, err = fs.Open("node.json")
346 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
350 err = json.NewDecoder(f).Decode(&node)
352 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
358 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
359 cost = make(map[string]consumption)
361 var project arvados.Group
362 err = loadObject(logger, ac, uuid, uuid, cache, &project)
364 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
367 var childCrs map[string]interface{}
368 filterset := []arvados.Filter{
372 Operand: project.UUID,
375 Attr: "requesting_container_uuid",
380 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
381 "filters": filterset,
385 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
387 if value, ok := childCrs["items"]; ok {
388 logger.Infof("Collecting top level container requests in project %s", uuid)
389 items := value.([]interface{})
390 for _, item := range items {
391 itemMap := item.(map[string]interface{})
392 crInfo, err := generateCrInfo(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
394 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
396 for k, v := range crInfo {
401 logger.Infof("No top level container requests found in project %s", uuid)
406 func generateCrInfo(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]consumption, err error) {
408 cost = make(map[string]consumption)
410 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
412 var total, tmpTotal consumption
413 logger.Debugf("Processing %s", uuid)
416 if strings.Contains(uuid, "-4zz18-") {
417 // This is a collection, find the associated container request (if any)
418 var c arvados.Collection
419 err = loadObject(logger, ac, uuid, uuid, cache, &c)
421 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
423 value, ok := c.Properties["container_request"]
425 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
427 crUUID, ok = value.(string)
429 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
433 // This is a container request, find the container
434 var cr arvados.ContainerRequest
435 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
437 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
439 if len(cr.ContainerUUID) == 0 {
440 // Nothing to do! E.g. a CR in 'Uncommitted' state.
441 logger.Infof("No container associated with container request %s, skipping", crUUID)
444 var container arvados.Container
445 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
447 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
450 topNode, err := getNode(arv, ac, kc, cr)
452 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
455 tmpCsv, total = addContainerLine(logger, topNode, cr, container)
457 cost[container.UUID] = total
459 // Find all container requests that have the container we found above as requesting_container_uuid
460 var childCrs arvados.ContainerRequestList
461 filterset := []arvados.Filter{
463 Attr: "requesting_container_uuid",
465 Operand: container.UUID,
467 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
468 "filters": filterset,
472 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
474 logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt)
475 progressTicker := time.NewTicker(5 * time.Second)
476 defer progressTicker.Stop()
477 for i, cr2 := range childCrs.Items {
479 case <-progressTicker.C:
480 logger.Infof("... %d of %d", i+1, len(childCrs.Items))
483 node, err := getNode(arv, ac, kc, cr2)
485 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
488 logger.Debug("Child container: " + cr2.ContainerUUID)
489 var c2 arvados.Container
490 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
492 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
494 tmpCsv, tmpTotal = addContainerLine(logger, node, cr2, c2)
495 cost[cr2.ContainerUUID] = tmpTotal
499 logger.Debug("Done collecting child containers")
501 csv += "TOTAL,,,,,," + strconv.FormatFloat(total.duration, 'f', 3, 64) + ",,,," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
503 if resultsDir != "" {
504 // Write the resulting CSV file
505 fName := resultsDir + "/" + crUUID + ".csv"
506 err = ioutil.WriteFile(fName, []byte(csv), 0644)
508 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
510 logger.Infof("\nUUID report in %s", fName)
516 func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
518 ok, exitcode = c.parseFlags(prog, args, logger, stderr)
522 if c.resultsDir != "" {
523 err = ensureDirectory(logger, c.resultsDir)
530 uuidChannel := make(chan string)
532 // Arvados Client setup
533 arv, err := arvadosclient.MakeArvadosClient()
535 err = fmt.Errorf("error creating Arvados object: %s", err)
539 kc, err := keepclient.MakeKeepClient(arv)
541 err = fmt.Errorf("error creating Keep object: %s", err)
546 ac := arvados.NewClientFromEnv()
548 // Populate uuidChannel with the requested uuid list
550 defer close(uuidChannel)
551 for _, uuid := range c.uuids {
555 if !c.begin.IsZero() {
556 initialParams := arvados.ResourceListParams{
557 Filters: []arvados.Filter{{"container.finished_at", ">=", c.begin}, {"container.finished_at", "<", c.end}, {"requesting_container_uuid", "=", nil}},
560 params := initialParams
562 // This list variable must be a new one declared
563 // inside the loop: otherwise, items in the API
564 // response would get deep-merged into the items
565 // loaded in previous iterations.
566 var list arvados.ContainerRequestList
568 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
570 logger.Errorf("Error getting container request list from Arvados API: %s", err)
573 if len(list.Items) == 0 {
577 for _, i := range list.Items {
578 uuidChannel <- i.UUID
580 params.Offset += len(list.Items)
586 cost := make(map[string]consumption)
588 for uuid := range uuidChannel {
589 logger.Debugf("Considering %s", uuid)
590 if strings.Contains(uuid, "-j7d0g-") {
591 // This is a project (group)
592 cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
597 for k, v := range cost {
600 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
601 // This is a container request or collection
602 var crInfo map[string]consumption
603 crInfo, err = generateCrInfo(logger, uuid, arv, ac, kc, c.resultsDir, c.cache)
605 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
609 for k, v := range crInfo {
612 } else if strings.Contains(uuid, "-tpzed-") {
613 // This is a user. The "Home" project for a user is not a real project.
614 // It is identified by the user uuid. As such, cost analysis for the
615 // "Home" project is not supported by this program. Skip this uuid, but
617 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
619 logger.Errorf("this argument does not look like a uuid: %s", uuid)
626 logger.Info("Nothing to do!")
632 csv = "# Aggregate cost accounting for uuids:\n# UUID, Duration in seconds, Total cost\n"
633 for _, uuid := range c.uuids {
634 csv += "# " + uuid + "\n"
637 var total consumption
638 for k, v := range cost {
639 csv += k + "," + strconv.FormatFloat(v.duration, 'f', 3, 64) + "," + strconv.FormatFloat(v.cost, 'f', 8, 64) + "\n"
643 csv += "TOTAL," + strconv.FormatFloat(total.duration, 'f', 3, 64) + "," + strconv.FormatFloat(total.cost, 'f', 2, 64) + "\n"
645 if c.resultsDir != "" {
646 // Write the resulting CSV file
647 aFile := c.resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
648 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
650 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
654 logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile)
657 // Output the total dollar amount on stdout
658 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total.cost, 'f', 2, 64))