1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "git.arvados.org/arvados.git/lib/config"
13 "git.arvados.org/arvados.git/sdk/go/arvados"
14 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
15 "git.arvados.org/arvados.git/sdk/go/keepclient"
24 "github.com/sirupsen/logrus"
27 type nodeInfo struct {
28 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
41 type arrayFlags []string
43 func (i *arrayFlags) String() string {
47 func (i *arrayFlags) Set(value string) error {
48 for _, s := range strings.Split(value, ",") {
54 func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitCode int, uuids arrayFlags, resultsDir string, cache bool, begin time.Time, end time.Time, err error) {
55 var beginStr, endStr string
56 flags := flag.NewFlagSet("", flag.ContinueOnError)
57 flags.SetOutput(stderr)
58 flags.Usage = func() {
59 fmt.Fprintf(flags.Output(), `
61 %s [options ...] [uuid ...]
63 This program analyzes the cost of Arvados container requests and calculates
64 the total cost across all requests. At least one uuid or a timestamp range
67 When the '-output' option is specified, a set of CSV files with cost details
68 will be written to the provided directory. Each file is a CSV report that lists
69 all the containers used to fulfill the container request, together with the
70 machine type and cost of each container.
72 When supplied with the uuid of a container request, it will calculate the
73 cost of that container request and all its children.
75 When supplied with the uuid of a collection, it will see if there is a
76 container_request uuid in the properties of the collection, and if so, it
77 will calculate the cost of that container request and all its children.
79 When supplied with a project uuid or when supplied with multiple container
80 request or collection uuids, it will calculate the total cost for all
83 When supplied with a 'begin' and 'end' timestamp (format:
84 2006-01-02T15:04:05), it will calculate the cost for the UUIDs of all the
85 container requests with an associated container whose "Finished at" timestamp
86 is greater than or equal to the "begin" timestamp and smaller than the "end"
89 The total cost calculation takes container reuse into account: if a container
90 was reused between several container requests, its cost will only be counted
95 - This program uses the cost data from config.yml at the time of the
96 execution of the container, stored in the 'node.json' file in its log
97 collection. If the cost data was not correctly configured at the time the
98 container was executed, the output from this program will be incorrect.
100 - If a container was run on a preemptible ("spot") instance, the cost data
101 reported by this program may be wildly inaccurate, because it does not have
102 access to the spot pricing in effect for the node then the container ran. The
103 UUID report file that is generated when the '-output' option is specified has
104 a column that indicates the preemptible state of the instance that ran the
107 - This program does not take into account overhead costs like the time spent
108 starting and stopping compute nodes that run containers, the cost of the
109 permanent cloud nodes that provide the Arvados services, the cost of data
110 stored in Arvados, etc.
112 - When provided with a project uuid, subprojects will not be considered.
114 In order to get the data for the uuids supplied, the ARVADOS_API_HOST and
115 ARVADOS_API_TOKEN environment variables must be set.
117 This program prints the total dollar amount from the aggregate cost
118 accounting across all provided uuids on stdout.
122 flags.PrintDefaults()
124 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
125 flags.StringVar(&resultsDir, "output", "", "output `directory` for the CSV reports")
126 flags.StringVar(&beginStr, "begin", "", "timestamp `begin` for date range operation (format: 2006-01-02T15:04:05)")
127 flags.StringVar(&endStr, "end", "", "timestamp `end` for date range operation (format: 2006-01-02T15:04:05)")
128 flags.BoolVar(&cache, "cache", true, "create and use a local disk cache of Arvados objects")
129 err = flags.Parse(args)
130 if err == flag.ErrHelp {
134 } else if err != nil {
140 if (len(beginStr) != 0 && len(endStr) == 0) || (len(beginStr) == 0 && len(endStr) != 0) {
142 err = fmt.Errorf("When specifying a date range, both begin and end must be specified")
147 if len(beginStr) != 0 {
149 begin, errB = time.Parse("2006-01-02T15:04:05", beginStr)
150 end, errE = time.Parse("2006-01-02T15:04:05", endStr)
151 if (errB != nil) || (errE != nil) {
153 err = fmt.Errorf("When specifying a date range, both begin and end must be of the format 2006-01-02T15:04:05 %+v, %+v", errB, errE)
159 if (len(uuids) < 1) && (len(beginStr) == 0) {
161 err = fmt.Errorf("error: no uuid(s) provided")
166 lvl, err := logrus.ParseLevel(*loglevel)
173 logger.Debug("Caching disabled\n")
178 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
179 statData, err := os.Stat(dir)
180 if os.IsNotExist(err) {
181 err = os.MkdirAll(dir, 0700)
183 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
186 if !statData.IsDir() {
187 return fmt.Errorf("the path %s is not a directory", dir)
193 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) {
196 csv += container.UUID + ","
197 csv += string(container.State) + ","
198 if container.StartedAt != nil {
199 csv += container.StartedAt.String() + ","
204 var delta time.Duration
205 if container.FinishedAt != nil {
206 csv += container.FinishedAt.String() + ","
207 delta = container.FinishedAt.Sub(*container.StartedAt)
208 csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + ","
214 if node.Properties.CloudNode.Price != 0 {
215 price = node.Properties.CloudNode.Price
216 size = node.Properties.CloudNode.Size
219 size = node.ProviderType
221 cost = delta.Seconds() / 3600 * price
222 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
226 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
228 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
229 // We do not cache projects or collections, they have no final state
232 // See if we have a cached copy of this object
233 _, err := os.Stat(file)
237 data, err := ioutil.ReadFile(file)
239 logger.Errorf("error reading %q: %s", file, err)
242 err = json.Unmarshal(data, &object)
244 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
248 // See if it is in a final state, if that makes sense
249 switch v := object.(type) {
250 case *arvados.ContainerRequest:
251 if v.State == arvados.ContainerRequestStateFinal {
253 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
255 case *arvados.Container:
256 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
258 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
264 // Load an Arvados object.
265 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
266 file := uuid + ".json"
274 homeDir, err := os.UserHomeDir()
277 logger.Info("Unable to determine current user home directory, not using cache")
279 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
280 err = ensureDirectory(logger, cacheDir)
283 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
285 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
293 if strings.Contains(uuid, "-j7d0g-") {
294 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
295 } else if strings.Contains(uuid, "-xvhdp-") {
296 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
297 } else if strings.Contains(uuid, "-dz642-") {
298 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
299 } else if strings.Contains(uuid, "-4zz18-") {
300 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
302 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
306 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
309 encoded, err := json.MarshalIndent(object, "", " ")
311 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
315 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
317 err = fmt.Errorf("error writing file %s:\n %s", file, err)
324 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
325 if cr.LogUUID == "" {
326 err = errors.New("no log collection")
330 var collection arvados.Collection
331 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
333 err = fmt.Errorf("error getting collection: %s", err)
337 var fs arvados.CollectionFileSystem
338 fs, err = collection.FileSystem(ac, kc)
340 err = fmt.Errorf("error opening collection as filesystem: %s", err)
344 f, err = fs.Open("node.json")
346 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
350 err = json.NewDecoder(f).Decode(&node)
352 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
358 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
359 cost = make(map[string]float64)
361 var project arvados.Group
362 err = loadObject(logger, ac, uuid, uuid, cache, &project)
364 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
367 var childCrs map[string]interface{}
368 filterset := []arvados.Filter{
372 Operand: project.UUID,
375 Attr: "requesting_container_uuid",
380 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
381 "filters": filterset,
385 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
387 if value, ok := childCrs["items"]; ok {
388 logger.Infof("Collecting top level container requests in project %s\n", uuid)
389 items := value.([]interface{})
390 for _, item := range items {
391 itemMap := item.(map[string]interface{})
392 crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
394 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
396 for k, v := range crCsv {
401 logger.Infof("No top level container requests found in project %s\n", uuid)
406 func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
408 cost = make(map[string]float64)
410 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
412 var tmpTotalCost float64
413 var totalCost float64
414 fmt.Printf("Processing %s\n", uuid)
417 if strings.Contains(uuid, "-4zz18-") {
418 // This is a collection, find the associated container request (if any)
419 var c arvados.Collection
420 err = loadObject(logger, ac, uuid, uuid, cache, &c)
422 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
424 value, ok := c.Properties["container_request"]
426 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
428 crUUID, ok = value.(string)
430 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
434 // This is a container request, find the container
435 var cr arvados.ContainerRequest
436 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
438 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
440 if len(cr.ContainerUUID) == 0 {
441 // Nothing to do! E.g. a CR in 'Uncommitted' state.
442 logger.Infof("No container associated with container request %s, skipping\n", crUUID)
445 var container arvados.Container
446 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
448 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
451 topNode, err := getNode(arv, ac, kc, cr)
453 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr.UUID, cr.UUID, err)
456 tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container)
458 totalCost += tmpTotalCost
459 cost[container.UUID] = totalCost
461 // Find all container requests that have the container we found above as requesting_container_uuid
462 var childCrs arvados.ContainerRequestList
463 filterset := []arvados.Filter{
465 Attr: "requesting_container_uuid",
467 Operand: container.UUID,
469 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
470 "filters": filterset,
474 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
476 logger.Infof("Collecting child containers for container request %s", crUUID)
477 for _, cr2 := range childCrs.Items {
479 node, err := getNode(arv, ac, kc, cr2)
481 logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err)
484 logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n")
485 var c2 arvados.Container
486 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
488 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
490 tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2)
491 cost[cr2.ContainerUUID] = tmpTotalCost
493 totalCost += tmpTotalCost
495 logger.Info(" done\n")
497 csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n"
499 if resultsDir != "" {
500 // Write the resulting CSV file
501 fName := resultsDir + "/" + crUUID + ".csv"
502 err = ioutil.WriteFile(fName, []byte(csv), 0644)
504 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
506 logger.Infof("\nUUID report in %s\n\n", fName)
512 func costanalyzer(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
513 exitcode, uuids, resultsDir, cache, begin, end, err := parseFlags(prog, args, loader, logger, stderr)
517 if resultsDir != "" {
518 err = ensureDirectory(logger, resultsDir)
525 uuidChannel := make(chan string)
527 // Arvados Client setup
528 arv, err := arvadosclient.MakeArvadosClient()
530 err = fmt.Errorf("error creating Arvados object: %s", err)
534 kc, err := keepclient.MakeKeepClient(arv)
536 err = fmt.Errorf("error creating Keep object: %s", err)
541 ac := arvados.NewClientFromEnv()
543 // Populate uuidChannel with the requested uuid list
545 for _, uuid := range uuids {
550 initialParams := arvados.ResourceListParams{
551 Filters: []arvados.Filter{{"container.finished_at", ">=", begin}, {"container.finished_at", "<", end}, {"requesting_container_uuid", "=", nil}},
553 params := initialParams
555 // This list variable must be a new one declared
556 // inside the loop: otherwise, items in the API
557 // response would get deep-merged into the items
558 // loaded in previous iterations.
559 var list arvados.ContainerRequestList
561 err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params)
563 logger.Errorf("Error getting container request list from Arvados API: %s\n", err)
566 if len(list.Items) == 0 {
570 for _, i := range list.Items {
571 uuidChannel <- i.UUID
573 params.Offset += len(list.Items)
580 cost := make(map[string]float64)
582 for uuid := range uuidChannel {
583 fmt.Printf("Considering %s\n", uuid)
584 if strings.Contains(uuid, "-j7d0g-") {
585 // This is a project (group)
586 cost, err = handleProject(logger, uuid, arv, ac, kc, resultsDir, cache)
591 for k, v := range cost {
594 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
595 // This is a container request
596 var crCsv map[string]float64
597 crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, resultsDir, cache)
599 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
603 for k, v := range crCsv {
606 } else if strings.Contains(uuid, "-tpzed-") {
607 // This is a user. The "Home" project for a user is not a real project.
608 // It is identified by the user uuid. As such, cost analysis for the
609 // "Home" project is not supported by this program. Skip this uuid, but
611 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
613 logger.Errorf("this argument does not look like a uuid: %s\n", uuid)
620 logger.Info("Nothing to do!\n")
626 csv = "# Aggregate cost accounting for uuids:\n"
627 for _, uuid := range uuids {
628 csv += "# " + uuid + "\n"
632 for k, v := range cost {
633 csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n"
637 csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n"
639 if resultsDir != "" {
640 // Write the resulting CSV file
641 aFile := resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
642 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
644 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
648 logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile)
651 // Output the total dollar amount on stdout
652 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total, 'f', 8, 64))