From 4d72990a86b6f1c136a8fc5c644767af68f75eb1 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 17 Dec 2015 10:05:30 -0500 Subject: [PATCH] 7901: Add job stats, elapsed time, summed user+sys and tx+rx, and some human-readable highlights. --- .../crunchstat_summary/summarizer.py | 98 +++++++++++++++---- .../logfile_20151204190335.txt.gz.report | 50 ++++++---- .../logfile_20151210063411.txt.gz.report | 27 +++-- .../logfile_20151210063439.txt.gz.report | 27 +++-- 4 files changed, 144 insertions(+), 58 deletions(-) diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py index 65160184d6..ac0964b30e 100644 --- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py +++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py @@ -1,6 +1,8 @@ from __future__ import print_function import arvados +import collections +import functools import gzip import re import sys @@ -11,27 +13,46 @@ class Summarizer(object): self.args = args def run(self): - stats_max = {} + # stats_max: {category: {stat: val}} + self.stats_max = collections.defaultdict( + functools.partial(collections.defaultdict, + lambda: float('-Inf'))) + # task_stats: {task_id: {category: {stat: val}}} + self.task_stats = collections.defaultdict( + functools.partial(collections.defaultdict, dict)) for line in self._logdata(): + m = re.search(r'^\S+ \S+ \d+ (?P\d+) success in (?P\d+) seconds', line) + if m: + task_id = m.group('seq') + elapsed = int(m.group('elapsed')) + self.task_stats[task_id]['time'] = {'elapsed': elapsed} + if elapsed > self.stats_max['time']['elapsed']: + self.stats_max['time']['elapsed'] = elapsed + continue m = re.search(r'^\S+ \S+ \d+ (?P\d+) stderr crunchstat: (?P\S+) (?P.*?)( -- interval (?P.*))?\n', line) if not m: continue if m.group('category').endswith(':'): # "notice:" etc. continue + task_id = m.group('seq') this_interval_s = None for group in ['current', 'interval']: if not m.group(group): continue category = m.group('category') - if category not in stats_max: - stats_max[category] = {} words = m.group(group).split(' ') + stats = {} for val, stat in zip(words[::2], words[1::2]): if '.' in val: - val = float(val) + stats[stat] = float(val) else: - val = int(val) + stats[stat] = int(val) + if 'user' in stats or 'sys' in stats: + stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0) + if 'tx' in stats or 'rx' in stats: + stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0) + for stat, val in stats.iteritems(): if group == 'interval': if stat == 'seconds': this_interval_s = val @@ -44,26 +65,67 @@ class Summarizer(object): else: stat = stat + '__rate' val = val / this_interval_s - if val > stats_max[category].get(stat, float('-Inf')): - stats_max[category][stat] = val - self.stats_max = stats_max + else: + self.task_stats[task_id][category][stat] = val + if val > self.stats_max[category][stat]: + self.stats_max[category][stat] = val def report(self): return "\n".join(self._report_gen()) + "\n" def _report_gen(self): - yield "\t".join(['category', 'metric', 'max', 'max_rate']) - for category, stat_max in self.stats_max.iteritems(): - for stat, val in stat_max.iteritems(): + job_tot = collections.defaultdict( + functools.partial(collections.defaultdict, int)) + for task_id, task_stat in self.task_stats.iteritems(): + for category, stat_last in task_stat.iteritems(): + for stat, val in stat_last.iteritems(): + if stat in ['cpus', 'cache', 'swap', 'rss']: + # meaningless stats like 16 cpu cores x 5 tasks = 80 + continue + job_tot[category][stat] += val + yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total']) + for category, stat_max in sorted(self.stats_max.iteritems()): + for stat, val in sorted(stat_max.iteritems()): if stat.endswith('__rate'): continue - if stat+'__rate' in stat_max: - max_rate = '{:.2f}'.format(stat_max[stat+'__rate']) - else: - max_rate = '-' - if isinstance(val, float): - val = '{:.2f}'.format(val) - yield "\t".join([category, stat, str(val), max_rate]) + max_rate = self._format(stat_max.get(stat+'__rate', '-')) + val = self._format(val) + tot = self._format(job_tot[category].get(stat, '-')) + yield "\t".join([category, stat, str(val), max_rate, tot]) + for args in ( + ('Max CPU time spent by a single task: {}s', + self.stats_max['cpu']['user+sys'], + None), + ('Max CPU usage in a single interval: {}%', + self.stats_max['cpu']['user+sys__rate'], + lambda x: x * 100), + ('Overall CPU usage: {}%', + job_tot['cpu']['user+sys'] / job_tot['time']['elapsed'], + lambda x: x * 100), + ('Max memory used by a single task: {}GB', + self.stats_max['mem']['rss'], + lambda x: x / 1e9), + ('Max network traffic in a single task: {}GB', + self.stats_max['net:eth0']['tx+rx'], + lambda x: x / 1e9), + ('Max network speed in a single interval: {}MB/s', + self.stats_max['net:eth0']['tx+rx__rate'], + lambda x: x / 1e6)): + format_string, val, transform = args + if val == float('-Inf'): + continue + if transform: + val = transform(val) + yield "# "+format_string.format(self._format(val)) + + def _format(self, val): + """Return a string representation of a stat. + + {:.2f} for floats, default format for everything else.""" + if isinstance(val, float): + return '{:.2f}'.format(val) + else: + return '{}'.format(val) def _logdata(self): if self.args.log_file: diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report index 5ac1b28c01..ef7beb11c0 100644 --- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report +++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report @@ -1,20 +1,30 @@ -category metric max max_rate -net:eth0 rx 1754364530 41658344.87 -net:eth0 tx 38837956 920817.97 -mem rss 349814784 - -mem cache 1678139392 - -mem swap 0 - -mem pgmajfault 0 - -keepcalls put 0 0.00 -keepcalls get 0 0.00 -keepcache miss 0 0.00 -keepcache hit 0 0.00 -blkio:0:0 write 0 0.00 -blkio:0:0 read 0 0.00 -net:keep0 rx 0 0.00 -net:keep0 tx 0 0.00 -cpu sys 1.92 0.04 -cpu user 3.83 0.09 -cpu cpus 8 - -fuseops write 0 0.00 -fuseops read 0 0.00 +category metric task_max task_max_rate job_total +blkio:0:0 read 0 0.00 0 +blkio:0:0 write 0 0.00 0 +cpu cpus 8 - - +cpu sys 1.92 0.04 1.92 +cpu user 3.83 0.09 3.83 +cpu user+sys 5.75 0.13 5.75 +fuseops read 0 0.00 0 +fuseops write 0 0.00 0 +keepcache hit 0 0.00 0 +keepcache miss 0 0.00 0 +keepcalls get 0 0.00 0 +keepcalls put 0 0.00 0 +mem cache 1678139392 - - +mem pgmajfault 0 - 0 +mem rss 349814784 - - +mem swap 0 - - +net:eth0 rx 1754364530 41658344.87 1754364530 +net:eth0 tx 38837956 920817.97 38837956 +net:eth0 tx+rx 1793202486 42579162.83 1793202486 +net:keep0 rx 0 0.00 0 +net:keep0 tx 0 0.00 0 +net:keep0 tx+rx 0 0.00 0 +time elapsed 80 - 80 +# Max CPU time spent by a single task: 5.75s +# Max CPU usage in a single interval: 13.00% +# Overall CPU usage: 7.19% +# Max memory used by a single task: 0.35GB +# Max network traffic in a single task: 1.79GB +# Max network speed in a single interval: 42.58MB/s diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report index d056317827..38af3e7e8c 100644 --- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report +++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report @@ -1,10 +1,17 @@ -category metric max max_rate -mem rss 856064 - -mem cache 12288 - -mem swap 0 - -mem pgmajfault 0 - -cpu sys 0.00 - -cpu user 0.00 - -cpu cpus 8 - -net:eth0 rx 90 - -net:eth0 tx 90 - +category metric task_max task_max_rate job_total +cpu cpus 8 - - +cpu sys 0.00 - 0.00 +cpu user 0.00 - 0.00 +cpu user+sys 0.00 - 0.00 +mem cache 12288 - - +mem pgmajfault 0 - 0 +mem rss 856064 - - +mem swap 0 - - +net:eth0 rx 90 - 90 +net:eth0 tx 90 - 90 +net:eth0 tx+rx 180 - 180 +time elapsed 2 - 4 +# Max CPU time spent by a single task: 0.00s +# Overall CPU usage: 0.00% +# Max memory used by a single task: 0.00GB +# Max network traffic in a single task: 0.00GB diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report index d3939dc971..7e42d612b7 100644 --- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report +++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report @@ -1,10 +1,17 @@ -category metric max max_rate -mem rss 450560 - -mem cache 8192 - -mem swap 0 - -mem pgmajfault 0 - -cpu sys 0.00 - -cpu user 0.00 - -cpu cpus 8 - -net:eth0 rx 90 - -net:eth0 tx 90 - +category metric task_max task_max_rate job_total +cpu cpus 8 - - +cpu sys 0.00 - 0.00 +cpu user 0.00 - 0.00 +cpu user+sys 0.00 - 0.00 +mem cache 8192 - - +mem pgmajfault 0 - 0 +mem rss 450560 - - +mem swap 0 - - +net:eth0 rx 90 - 90 +net:eth0 tx 90 - 90 +net:eth0 tx+rx 180 - 180 +time elapsed 2 - 3 +# Max CPU time spent by a single task: 0.00s +# Overall CPU usage: 0.00% +# Max memory used by a single task: 0.00GB +# Max network traffic in a single task: 0.00GB -- 2.39.5