1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "git.arvados.org/arvados.git/lib/config"
13 "git.arvados.org/arvados.git/sdk/go/arvados"
14 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
15 "git.arvados.org/arvados.git/sdk/go/keepclient"
24 "github.com/sirupsen/logrus"
27 type nodeInfo struct {
28 // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
41 type arrayFlags []string
43 func (i *arrayFlags) String() string {
47 func (i *arrayFlags) Set(value string) error {
48 for _, s := range strings.Split(value, ",") {
54 func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitCode int, uuids arrayFlags, resultsDir string, cache bool, err error) {
55 flags := flag.NewFlagSet("", flag.ContinueOnError)
56 flags.SetOutput(stderr)
57 flags.Usage = func() {
58 fmt.Fprintf(flags.Output(), `
60 %s [options ...] <uuid> ...
62 This program analyzes the cost of Arvados container requests. For each uuid
63 supplied, it creates a CSV report that lists all the containers used to
64 fulfill the container request, together with the machine type and cost of
65 each container. At least one uuid must be specified.
67 When supplied with the uuid of a container request, it will calculate the
68 cost of that container request and all its children.
70 When supplied with the uuid of a collection, it will see if there is a
71 container_request uuid in the properties of the collection, and if so, it
72 will calculate the cost of that container request and all its children.
74 When supplied with a project uuid or when supplied with multiple container
75 request or collection uuids, it will create a CSV report for each supplied
76 uuid, as well as a CSV file with aggregate cost accounting for all supplied
77 uuids. The aggregate cost report takes container reuse into account: if a
78 container was reused between several container requests, its cost will only
81 To get the node costs, the progam queries the Arvados API for current cost
82 data for each node type used. This means that the reported cost always
83 reflects the cost data as currently defined in the Arvados API configuration
87 - the Arvados API configuration cost data may be out of sync with the cloud
89 - when generating reports for older container requests, the cost data in the
90 Arvados API configuration file may have changed since the container request
91 was fulfilled. This program uses the cost data stored at the time of the
92 execution of the container, stored in the 'node.json' file in its log
94 - if a container was run on a preemptible ("spot") instance, the cost data
95 reported by this program may be wildly inaccurate, because it does not have
96 access to the spot pricing in effect for the node then the container ran. The
97 UUID report file that is generated when the '-output' option is specified has
98 a column that indicates the preemptible state of the instance that ran the
101 In order to get the data for the uuids supplied, the ARVADOS_API_HOST and
102 ARVADOS_API_TOKEN environment variables must be set.
104 This program prints the total dollar amount from the aggregate cost
105 accounting across all provided uuids on stdout.
107 When the '-output' option is specified, a set of CSV files with cost details
108 will be written to the provided directory.
112 flags.PrintDefaults()
114 loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
115 flags.StringVar(&resultsDir, "output", "", "output `directory` for the CSV reports")
116 flags.BoolVar(&cache, "cache", true, "create and use a local disk cache of Arvados objects")
117 err = flags.Parse(args)
118 if err == flag.ErrHelp {
122 } else if err != nil {
130 err = fmt.Errorf("error: no uuid(s) provided")
135 lvl, err := logrus.ParseLevel(*loglevel)
142 logger.Debug("Caching disabled\n")
147 func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
148 statData, err := os.Stat(dir)
149 if os.IsNotExist(err) {
150 err = os.MkdirAll(dir, 0700)
152 return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
155 if !statData.IsDir() {
156 return fmt.Errorf("the path %s is not a directory", dir)
162 func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) {
165 csv += container.UUID + ","
166 csv += string(container.State) + ","
167 if container.StartedAt != nil {
168 csv += container.StartedAt.String() + ","
173 var delta time.Duration
174 if container.FinishedAt != nil {
175 csv += container.FinishedAt.String() + ","
176 delta = container.FinishedAt.Sub(*container.StartedAt)
177 csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + ","
183 if node.Properties.CloudNode.Price != 0 {
184 price = node.Properties.CloudNode.Price
185 size = node.Properties.CloudNode.Size
188 size = node.ProviderType
190 cost = delta.Seconds() / 3600 * price
191 csv += size + "," + fmt.Sprintf("%+v", node.Preemptible) + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
195 func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
197 if strings.Contains(uuid, "-j7d0g-") || strings.Contains(uuid, "-4zz18-") {
198 // We do not cache projects or collections, they have no final state
201 // See if we have a cached copy of this object
202 _, err := os.Stat(file)
206 data, err := ioutil.ReadFile(file)
208 logger.Errorf("error reading %q: %s", file, err)
211 err = json.Unmarshal(data, &object)
213 logger.Errorf("failed to unmarshal json: %s: %s", data, err)
217 // See if it is in a final state, if that makes sense
218 switch v := object.(type) {
219 case *arvados.ContainerRequest:
220 if v.State == arvados.ContainerRequestStateFinal {
222 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
224 case *arvados.Container:
225 if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
227 logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
233 // Load an Arvados object.
234 func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
235 file := uuid + ".json"
243 homeDir, err := os.UserHomeDir()
246 logger.Info("Unable to determine current user home directory, not using cache")
248 cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
249 err = ensureDirectory(logger, cacheDir)
252 logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
254 reload = loadCachedObject(logger, cacheDir+file, uuid, object)
262 if strings.Contains(uuid, "-j7d0g-") {
263 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
264 } else if strings.Contains(uuid, "-xvhdp-") {
265 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
266 } else if strings.Contains(uuid, "-dz642-") {
267 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
268 } else if strings.Contains(uuid, "-4zz18-") {
269 err = ac.RequestAndDecode(&object, "GET", "arvados/v1/collections/"+uuid, nil, nil)
271 err = fmt.Errorf("unsupported object type with UUID %q:\n %s", uuid, err)
275 err = fmt.Errorf("error loading object with UUID %q:\n %s", uuid, err)
278 encoded, err := json.MarshalIndent(object, "", " ")
280 err = fmt.Errorf("error marshaling object with UUID %q:\n %s", uuid, err)
284 err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
286 err = fmt.Errorf("error writing file %s:\n %s", file, err)
293 func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
294 if cr.LogUUID == "" {
295 err = errors.New("no log collection")
299 var collection arvados.Collection
300 err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
302 err = fmt.Errorf("error getting collection: %s", err)
306 var fs arvados.CollectionFileSystem
307 fs, err = collection.FileSystem(ac, kc)
309 err = fmt.Errorf("error opening collection as filesystem: %s", err)
313 f, err = fs.Open("node.json")
315 err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
319 err = json.NewDecoder(f).Decode(&node)
321 err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
327 func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
328 cost = make(map[string]float64)
330 var project arvados.Group
331 err = loadObject(logger, ac, uuid, uuid, cache, &project)
333 return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
336 var childCrs map[string]interface{}
337 filterset := []arvados.Filter{
341 Operand: project.UUID,
344 Attr: "requesting_container_uuid",
349 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
350 "filters": filterset,
354 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
356 if value, ok := childCrs["items"]; ok {
357 logger.Infof("Collecting top level container requests in project %s\n", uuid)
358 items := value.([]interface{})
359 for _, item := range items {
360 itemMap := item.(map[string]interface{})
361 crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
363 return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
365 for k, v := range crCsv {
370 logger.Infof("No top level container requests found in project %s\n", uuid)
375 func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
377 cost = make(map[string]float64)
379 csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Preemptible,Hourly node cost,Total cost\n"
381 var tmpTotalCost float64
382 var totalCost float64
385 if strings.Contains(uuid, "-4zz18-") {
386 // This is a collection, find the associated container request (if any)
387 var c arvados.Collection
388 err = loadObject(logger, ac, uuid, uuid, cache, &c)
390 return nil, fmt.Errorf("error loading collection object %s: %s", uuid, err)
392 value, ok := c.Properties["container_request"]
394 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property", uuid)
396 crUUID, ok = value.(string)
398 return nil, fmt.Errorf("error: collection %s does not have a 'container_request' property of the string type", uuid)
402 // This is a container request, find the container
403 var cr arvados.ContainerRequest
404 err = loadObject(logger, ac, crUUID, crUUID, cache, &cr)
406 return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
408 var container arvados.Container
409 err = loadObject(logger, ac, crUUID, cr.ContainerUUID, cache, &container)
411 return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
414 topNode, err := getNode(arv, ac, kc, cr)
416 return nil, fmt.Errorf("error getting node %s: %s", cr.UUID, err)
418 tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container)
420 totalCost += tmpTotalCost
421 cost[container.UUID] = totalCost
423 // Find all container requests that have the container we found above as requesting_container_uuid
424 var childCrs arvados.ContainerRequestList
425 filterset := []arvados.Filter{
427 Attr: "requesting_container_uuid",
429 Operand: container.UUID,
431 err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
432 "filters": filterset,
436 return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
438 logger.Infof("Collecting child containers for container request %s", crUUID)
439 for _, cr2 := range childCrs.Items {
441 node, err := getNode(arv, ac, kc, cr2)
443 return nil, fmt.Errorf("error getting node %s: %s", cr2.UUID, err)
445 logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n")
446 var c2 arvados.Container
447 err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2)
449 return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
451 tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2)
452 cost[cr2.ContainerUUID] = tmpTotalCost
454 totalCost += tmpTotalCost
456 logger.Info(" done\n")
458 csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n"
460 if resultsDir != "" {
461 // Write the resulting CSV file
462 fName := resultsDir + "/" + crUUID + ".csv"
463 err = ioutil.WriteFile(fName, []byte(csv), 0644)
465 return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
467 logger.Infof("\nUUID report in %s\n\n", fName)
473 func costanalyzer(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
474 exitcode, uuids, resultsDir, cache, err := parseFlags(prog, args, loader, logger, stderr)
478 if resultsDir != "" {
479 err = ensureDirectory(logger, resultsDir)
486 // Arvados Client setup
487 arv, err := arvadosclient.MakeArvadosClient()
489 err = fmt.Errorf("error creating Arvados object: %s", err)
493 kc, err := keepclient.MakeKeepClient(arv)
495 err = fmt.Errorf("error creating Keep object: %s", err)
500 ac := arvados.NewClientFromEnv()
502 cost := make(map[string]float64)
503 for _, uuid := range uuids {
504 if strings.Contains(uuid, "-j7d0g-") {
505 // This is a project (group)
506 cost, err = handleProject(logger, uuid, arv, ac, kc, resultsDir, cache)
511 for k, v := range cost {
514 } else if strings.Contains(uuid, "-xvhdp-") || strings.Contains(uuid, "-4zz18-") {
515 // This is a container request
516 var crCsv map[string]float64
517 crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, resultsDir, cache)
519 err = fmt.Errorf("error generating CSV for uuid %s: %s", uuid, err.Error())
523 for k, v := range crCsv {
526 } else if strings.Contains(uuid, "-tpzed-") {
527 // This is a user. The "Home" project for a user is not a real project.
528 // It is identified by the user uuid. As such, cost analysis for the
529 // "Home" project is not supported by this program. Skip this uuid, but
531 logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid)
533 logger.Errorf("this argument does not look like a uuid: %s\n", uuid)
540 logger.Info("Nothing to do!\n")
546 csv = "# Aggregate cost accounting for uuids:\n"
547 for _, uuid := range uuids {
548 csv += "# " + uuid + "\n"
552 for k, v := range cost {
553 csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n"
557 csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n"
559 if resultsDir != "" {
560 // Write the resulting CSV file
561 aFile := resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
562 err = ioutil.WriteFile(aFile, []byte(csv), 0644)
564 err = fmt.Errorf("error writing file with path %s: %s", aFile, err.Error())
568 logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile)
571 // Output the total dollar amount on stdout
572 fmt.Fprintf(stdout, "%s\n", strconv.FormatFloat(total, 'f', 8, 64))