Graph temp disk usage
authorTom Morris <tfmorris@veritasgenetics.com>
Tue, 23 Apr 2019 00:48:53 +0000 (20:48 -0400)
committerTom Morris <tfmorris@veritasgenetics.com>
Tue, 9 Jul 2019 18:24:35 +0000 (14:24 -0400)
Plots temp disk space used as well as total available.

Also adds support for multiple metrics per graph and
groups CPU and network stats by category.

refs #13913

Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>

tools/crunchstat-summary/crunchstat_summary/dygraphs.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/crunchstat_summary/webchart.py

index 6df440a14e37f87f8fcea5cac7c57ca1269915b4..10f1f26f46610dac96ed2b9f1c5cc5e8db0ba528 100644 (file)
@@ -13,21 +13,34 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
     def headHTML(self):
         return '<link rel="stylesheet" href="{}">\n'.format(self.CSS)
 
-    def chartdata(self, label, tasks, stat):
+    def chartdata(self, label, tasks, stats):
+        '''For Crunch2, label is the name of container request,
+        tasks is the top level container and
+        stats is index by a tuple of (category, metric).
+        '''
         return {
-            'data': self._collate_data(tasks, stat),
+            'data': self._collate_data(tasks, stats),
             'options': {
+                'legend': 'always',
                 'connectSeparatedPoints': True,
-                'labels': ['elapsed']+[uuid for uuid, _ in tasks.items()],
-                'title': '{}: {} {}'.format(label, stat[0], stat[1]),
+                'labels': ['elapsed'] +  stats[1],
+                'title': '{}: {}'.format(label, stats[0]),
             },
         }
 
-    def _collate_data(self, tasks, stat):
+    def _collate_data(self, tasks, stats):
         data = []
         nulls = []
+        # uuid is category for crunch2
         for uuid, task in tasks.items():
-            for pt in task.series[stat]:
-                data.append([pt[0].total_seconds()] + nulls + [pt[1]])
+            # All stats in a category are assumed to have the same time base and same number of samples
+            category = stats[0]
+            series_names = stats[1]
+            sn0 = series_names[0]
+            series = task.series[(category,sn0)]
+            for i in range(len(series)):
+                pt = series[i]
+                vals = [task.series[(category,stat)][i][1] for stat in series_names[1:]]
+                data.append([pt[0].total_seconds()] + nulls + [pt[1]] + vals)
             nulls.append(None)
         return sorted(data)
index 884f16b4a7db36f64d95bc256938352837f91412..a86702ed70acebffe3abc776f26a13b75208ad72 100644 (file)
@@ -129,13 +129,14 @@ class Summarizer(object):
                 try:
                     self.label = m.group('job_uuid')
                 except IndexError:
-                    self.label = 'container'
-            if m.group('category').endswith(':'):
+                    self.label = 'label #1'
+            category = m.group('category')
+            if category.endswith(':'):
                 # "stderr crunchstat: notice: ..."
                 continue
-            elif m.group('category') in ('error', 'caught'):
+            elif category in ('error', 'caught'):
                 continue
-            elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
+            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
                 # "stderr crunchstat: read /proc/1234/net/dev: ..."
                 # (old logs are less careful with unprefixed error messages)
                 continue
@@ -221,11 +222,11 @@ class Summarizer(object):
                     if group == 'interval' and this_interval_s:
                             stat = stat + '__rate'
                             val = val / this_interval_s
-                            if stat in ['user+sys__rate', 'tx+rx__rate']:
+                            if stat in ['user+sys__rate', 'user__rate', 'sys__rate', 'tx+rx__rate', 'rx__rate', 'tx__rate']:
                                 task.series[category, stat].append(
                                     (timestamp - self.starttime, val))
                     else:
-                        if stat in ['rss']:
+                        if stat in ['rss','used','total']:
                             task.series[category, stat].append(
                                 (timestamp - self.starttime, val))
                         self.task_stats[task_id][category][stat] = val
@@ -315,7 +316,13 @@ class Summarizer(object):
                  (float(self.job_tot['blkio:0:0']['read']) /
                  float(self.job_tot['net:keep0']['rx']))
                  if self.job_tot['net:keep0']['rx'] > 0 else 0,
-                 lambda x: x * 100.0)):
+                 lambda x: x * 100.0),
+               ('Temp disk utilization {}%',
+                 (float(self.job_tot['statfs']['used']) /
+                 float(self.job_tot['statfs']['total']))
+                 if self.job_tot['statfs']['total'] > 0 else 0,
+                 lambda x: x * 100.0),
+                ):
             format_string, val, transform = args
             if val == float('-Inf'):
                 continue
index cf0c1e67aa1ffdcf7853b2b1271bb2f03b16bae2..31afcf64e906166788bf06b9caa4ed191ead13c9 100644 (file)
@@ -45,10 +45,13 @@ class WebChart(object):
                 'label': s.long_label(),
                 'charts': [
                     self.chartdata(s.label, s.tasks, stat)
-                    for stat in (('cpu', 'user+sys__rate'),
-                                 ('mem', 'rss'),
-                                 ('net:eth0', 'tx+rx__rate'),
-                                 ('net:keep0', 'tx+rx__rate'))],
+                    for stat in (('cpu', ['user+sys__rate', 'user__rate', 'sys__rate']),
+                                 ('mem', ['rss']),
+                                 ('net:eth0', ['tx+rx__rate','rx__rate','tx__rate']),
+                                 ('net:keep0', ['tx+rx__rate','rx__rate','tx__rate']),
+                                 ('statfs', ['used', 'total']),
+                                 )
+                    ],
             }
             for s in self.summarizers]