22273: Remove built-in type subscript

[arvados.git] / tools / cluster-activity / arvados_cluster_activity / report.py
diff --git a/tools/cluster-activity/arvados_cluster_activity/report.py b/tools/cluster-activity/arvados_cluster_activity/report.py

index c0c49dea4523a4a4e02f070f2b608876f5d645c2..5737a2e20e9b36f998c77a80fdc62730458e159e 100644 (file)
--- a/tools/cluster-activity/arvados_cluster_activity/report.py
+++ b/tools/cluster-activity/arvados_cluster_activity/report.py
@@ -12,8 +12,7 @@ import math
  import collections
  import json
  from datetime import date, datetime, timedelta
-import pkg_resources
-from typing import List
+from typing import Dict, List
  import statistics
  
  from dataclasses import dataclass
@@ -34,7 +33,7 @@ class WorkflowRunSummary:
  class ProjectSummary:
      users: set
      uuid: str
-    runs: dict[str, WorkflowRunSummary]
+    runs: Dict[str, WorkflowRunSummary]
      earliest: datetime = datetime(year=9999, month=1, day=1)
      latest: datetime = datetime(year=1900, month=1, day=1)
      name: str = ""
@@ -139,9 +138,16 @@ class ClusterActivityReport(object):
  
          self.label = "Cluster report for %s from %s to %s" % (self.cluster, since.date(), to.date())
  
-        # If we already produced a CSV report we have summary stats
-        # and don't need to fetch everything all over again.
          if not self.summary_fetched:
+            # If we haven't done it already, need to fetch everything
+            # from the API to collect summary stats (report_from_api
+            # calls collect_summary_stats on each row).
+            #
+            # Because it is a Python generator, we need call it in a
+            # loop to process all the rows.  This method also yields
+            # each row which is used by a different function to create
+            # the CSV report, but for the HTML report we just discard
+            # them.
              for row in self.report_from_api(since, to, include_workflow_steps, exclude):
                  pass
  
@@ -190,7 +196,6 @@ class ClusterActivityReport(object):
          if workbench.endswith("/"):
              workbench = workbench[:-1]
  
-        print(to.date(), self.today())
          if to.date() == self.today():
              # The deduplication ratio overstates things a bit, you can
              # have collections which reference a small slice of a large
@@ -230,6 +235,24 @@ class ClusterActivityReport(object):
                         workbench=workbench,
                         data_rows=data_rows))
  
+        # We have a couple of options for getting total container hours
+        #
+        # total_hours=container_cumulative_hours
+        #
+        # calculates the sum from prometheus metrics
+        #
+        # total_hours=self.total_hours
+        #
+        # calculates the sum of the containers that were fetched
+        #
+        # The problem is these numbers tend not to match, especially
+        # if the report generation was not called with "include
+        # workflow steps".
+        #
+        # I decided to use the sum from containers fetched, because it
+        # will match the sum of compute time for each project listed
+        # in the report.
+
          cards.append("""<h2>Activity and cost over the {reporting_days} day period {since} to {to}</h2>
          <table class='aggtable'><tbody>
          <tr><th>Active users</th> <td>{active_users}</td></tr>
@@ -242,7 +265,6 @@ class ClusterActivityReport(object):
          <p>See <a href="#prices">note on usage and cost calculations</a> for details on how costs are calculated.</p>
          """.format(active_users=len(self.active_users),
                     total_users=self.total_users,
-                   #total_hours=container_cumulative_hours,
                     total_hours=self.total_hours,
                     total_cost=self.total_cost,
                     total_workflows=self.total_workflows,
@@ -309,14 +331,7 @@ class ClusterActivityReport(object):
                  {wfsum}
                  </tbody></table>
                  """.format(name=prj.name,
-                           users=", ".join(prj.users),
-                           cost=prj.cost,
-                           hours=prj.hours,
                             wfsum=" ".join(wfsum),
-                           earliest=prj.earliest.date(),
-                           latest=prj.latest.date(),
-                           activity=prj.activityspan,
-                           userplural='s' if len(prj.users) > 1 else '',
                             projectrow=prj.tablerow,
                             workbench=workbench,
                             uuid=prj.uuid)