Do not pipe into `grep -q`, because that stops reading as soon as a
[arvados.git] / tools / crunchstat-summary / crunchstat_summary / command.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 import argparse
6 import gzip
7 from io import open
8 import logging
9 import sys
10
11 from crunchstat_summary import logger, summarizer
12
13
14 class ArgumentParser(argparse.ArgumentParser):
15     def __init__(self):
16         super(ArgumentParser, self).__init__(
17             description='Summarize resource usage of an Arvados Crunch job')
18         src = self.add_mutually_exclusive_group()
19         src.add_argument(
20             '--job', '--container-request',
21             type=str, metavar='UUID',
22             help='Look up the specified job or container request '
23             'and read its log data from Keep (or from the Arvados event log, '
24             'if the job is still running)')
25         src.add_argument(
26             '--container',
27             type=str, metavar='UUID',
28             help='[Deprecated] Look up the specified container find its container request '
29             'and read its log data from Keep (or from the Arvados event log, '
30             'if the job is still running)')
31         src.add_argument(
32             '--pipeline-instance', type=str, metavar='UUID',
33             help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
34         src.add_argument(
35             '--log-file', type=str,
36             help='Read log data from a regular file')
37         self.add_argument(
38             '--skip-child-jobs', action='store_true',
39             help='Do not include stats from child jobs/containers')
40         self.add_argument(
41             '--format', type=str, choices=('html', 'text'), default='text',
42             help='Report format')
43         self.add_argument(
44             '--threads', type=int, default=8,
45             help='Maximum worker threads to run')
46         self.add_argument(
47             '--verbose', '-v', action='count', default=0,
48             help='Log more information (once for progress, twice for debug)')
49
50
51 class UTF8Decode(object):
52     '''Wrap a file-like iterable to decode UTF-8 bytes into a strings
53     '''
54     def __init__(self, fh):
55         self.fh = fh
56
57     def __enter__(self):
58         return self
59
60     def __exit__(self, exc_type, exc_val, exc_tb):
61         self.close()
62
63     def __iter__(self):
64         return self
65
66     def __next__(self):
67         return next(self.fh).decode('utf-8')
68
69     next = __next__
70
71     def close(self):
72         # mimic Gzip behavior and don't close underlying object
73         pass
74
75
76 class Command(object):
77     def __init__(self, args):
78         self.args = args
79         logger.setLevel(logging.WARNING - 10 * args.verbose)
80
81     def run(self):
82         kwargs = {
83             'skip_child_jobs': self.args.skip_child_jobs,
84             'threads': self.args.threads,
85         }
86         if self.args.pipeline_instance:
87             self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
88         elif self.args.job:
89             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
90         elif self.args.container:
91             self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
92         elif self.args.log_file:
93             if self.args.log_file.endswith('.gz'):
94                 fh = UTF8Decode(gzip.open(self.args.log_file))
95             else:
96                 fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8')
97             self.summer = summarizer.Summarizer(fh, **kwargs)
98         else:
99             self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
100         return self.summer.run()
101
102     def report(self):
103         if self.args.format == 'html':
104             return self.summer.html_report()
105         elif self.args.format == 'text':
106             return self.summer.text_report()