Plots temp disk space used as well as total available.
Also adds support for multiple metrics per graph and
groups CPU and network stats by category.
refs #13913
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>
def headHTML(self):
return '<link rel="stylesheet" href="{}">\n'.format(self.CSS)
- def chartdata(self, label, tasks, stat):
+ def chartdata(self, label, tasks, stats):
+ '''For Crunch2, label is the name of container request,
+ tasks is the top level container and
+ stats is index by a tuple of (category, metric).
+ '''
return {
- 'data': self._collate_data(tasks, stat),
+ 'data': self._collate_data(tasks, stats),
'options': {
+ 'legend': 'always',
'connectSeparatedPoints': True,
- 'labels': ['elapsed']+[uuid for uuid, _ in tasks.items()],
- 'title': '{}: {} {}'.format(label, stat[0], stat[1]),
+ 'labels': ['elapsed'] + stats[1],
+ 'title': '{}: {}'.format(label, stats[0]),
},
}
- def _collate_data(self, tasks, stat):
+ def _collate_data(self, tasks, stats):
data = []
nulls = []
+ # uuid is category for crunch2
for uuid, task in tasks.items():
- for pt in task.series[stat]:
- data.append([pt[0].total_seconds()] + nulls + [pt[1]])
+ # All stats in a category are assumed to have the same time base and same number of samples
+ category = stats[0]
+ series_names = stats[1]
+ sn0 = series_names[0]
+ series = task.series[(category,sn0)]
+ for i in range(len(series)):
+ pt = series[i]
+ vals = [task.series[(category,stat)][i][1] for stat in series_names[1:]]
+ data.append([pt[0].total_seconds()] + nulls + [pt[1]] + vals)
nulls.append(None)
return sorted(data)
try:
self.label = m.group('job_uuid')
except IndexError:
- self.label = 'container'
- if m.group('category').endswith(':'):
+ self.label = 'label #1'
+ category = m.group('category')
+ if category.endswith(':'):
# "stderr crunchstat: notice: ..."
continue
- elif m.group('category') in ('error', 'caught'):
+ elif category in ('error', 'caught'):
continue
- elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
+ elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
# "stderr crunchstat: read /proc/1234/net/dev: ..."
# (old logs are less careful with unprefixed error messages)
continue
if group == 'interval' and this_interval_s:
stat = stat + '__rate'
val = val / this_interval_s
- if stat in ['user+sys__rate', 'tx+rx__rate']:
+ if stat in ['user+sys__rate', 'user__rate', 'sys__rate', 'tx+rx__rate', 'rx__rate', 'tx__rate']:
task.series[category, stat].append(
(timestamp - self.starttime, val))
else:
- if stat in ['rss']:
+ if stat in ['rss','used','total']:
task.series[category, stat].append(
(timestamp - self.starttime, val))
self.task_stats[task_id][category][stat] = val
(float(self.job_tot['blkio:0:0']['read']) /
float(self.job_tot['net:keep0']['rx']))
if self.job_tot['net:keep0']['rx'] > 0 else 0,
- lambda x: x * 100.0)):
+ lambda x: x * 100.0),
+ ('Temp disk utilization {}%',
+ (float(self.job_tot['statfs']['used']) /
+ float(self.job_tot['statfs']['total']))
+ if self.job_tot['statfs']['total'] > 0 else 0,
+ lambda x: x * 100.0),
+ ):
format_string, val, transform = args
if val == float('-Inf'):
continue
'label': s.long_label(),
'charts': [
self.chartdata(s.label, s.tasks, stat)
- for stat in (('cpu', 'user+sys__rate'),
- ('mem', 'rss'),
- ('net:eth0', 'tx+rx__rate'),
- ('net:keep0', 'tx+rx__rate'))],
+ for stat in (('cpu', ['user+sys__rate', 'user__rate', 'sys__rate']),
+ ('mem', ['rss']),
+ ('net:eth0', ['tx+rx__rate','rx__rate','tx__rate']),
+ ('net:keep0', ['tx+rx__rate','rx__rate','tx__rate']),
+ ('statfs', ['used', 'total']),
+ )
+ ],
}
for s in self.summarizers]