From b03a6a8b2c20a0579cd724baeb9283bd5f0d1f08 Mon Sep 17 00:00:00 2001 From: Tim Pierce Date: Fri, 12 Dec 2014 11:22:07 -0500 Subject: [PATCH] 4598: correct search criteria for Crunch job output Crunch stderr output is stored in the properties/text field of logs with event_type 'stderr'. Limited the filter criteria to event_type = 'stderr' and modified text pattern filtering appropriately. Now uses arvados.util.list_all to retrieve all stderr rows within the specified time frame. --- services/api/script/crunch-failure-report.py | 38 +++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/services/api/script/crunch-failure-report.py b/services/api/script/crunch-failure-report.py index 5ba34c3c22..e2b5d2de49 100755 --- a/services/api/script/crunch-failure-report.py +++ b/services/api/script/crunch-failure-report.py @@ -3,7 +3,7 @@ import argparse import datetime import json -import pprint +import re import sys import arvados @@ -19,10 +19,9 @@ def parse_arguments(arguments): '--end', help='End date and time') arg_parser.add_argument( - '--summary', - action='append', - default=[], - help='SQL pattern (ILIKE syntax) to match on summary lines') + '--match', + default='fail', + help='Regular expression to match on Crunch error output lines.') return arg_parser.parse_args(arguments) @@ -43,21 +42,26 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): now = datetime.datetime.utcnow() start_time = args.start or api_timestamp(now - datetime.timedelta(days=1)) end_time = args.end or api_timestamp(now) + match_re = re.compile(args.match) - summary_patterns = args.summary or ['%fail%'] - logfilters = [['summary', 'ilike', pattern] for pattern in summary_patterns ] - logfilters.append( ['created_at', '>=', start_time] ) - logfilters.append( ['created_at', '<=', end_time] ) - - logs = api.logs().list( - filters=json.dumps(logfilters) - ).execute() + logs = arvados.util.list_all( + api.logs().list, + filters=json.dumps([ ['created_at', '>=', start_time], + ['created_at', '<=', end_time], + ['event_type', '=', 'stderr'] ])) log_stats = {} - for log in logs['items']: - summary = log['summary'] - log_uuid = log['uuid'] - log_stats.setdefault(summary, []).append(log_uuid) + for log in logs: + for logline in log['properties']['text'].splitlines(): + # Remove timestamp at beginning of log lines + # Mon Dec 1 23:59:55 2014 + stderr_msg = re.sub( + '\w{3} \w{3} +\d+ \d{2}:\d{2}:\d{2} \d{4} +', + '', + logline) + if match_re.search(stderr_msg): + log_uuid = log['uuid'] + log_stats.setdefault(stderr_msg, []).append(log_uuid) # Sort the keys of log stats in decreasing order of frequency. for k in sorted(log_stats.keys(), cmp=lambda a,b: cmp(len(log_stats[b]), len(log_stats[a]))): -- 2.39.5