From: Peter Amstutz Date: Fri, 23 Feb 2024 15:05:01 +0000 (-0500) Subject: 19744: Remove jobs/pipeline templates from crunchstat-summary X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/2c3c89fef054c10fad443fb549d99b199a4aa452 19744: Remove jobs/pipeline templates from crunchstat-summary Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/.licenseignore b/.licenseignore index d7faa0c3f1..1e1c12a53a 100644 --- a/.licenseignore +++ b/.licenseignore @@ -53,6 +53,8 @@ sdk/cwl/tests/tool/blub.txt sdk/cwl/tests/19109-upload-secondary/* sdk/cwl/tests/federation/data/* sdk/cwl/tests/fake-keep-mount/fake_collection_dir/.arvados#collection +sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt +sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt sdk/go/manifest/testdata/*_manifest sdk/java/.classpath sdk/java/pom.xml diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index 63e04a157e..70202743c4 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -536,8 +536,11 @@ class ArvadosContainer(JobBase): if logc is not None: try: - summerizer = crunchstat_summary.summarizer.Summarizer(crunchstat_summary.reader.CollectionReader(logc.manifest_locator(), collection_object=logc), - label=self.name, arv=self.arvrunner.api) + summerizer = crunchstat_summary.summarizer.ContainerRequestSummarizer( + record, + collection_object=logc, + label=self.name, + arv=self.arvrunner.api) summerizer.run() with logc.open("usage_report.html", "wt") as mr: mr.write(summerizer.html_report()) diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py index 4ece5c3b2e..c5a1068eff 100644 --- a/tools/crunchstat-summary/crunchstat_summary/command.py +++ b/tools/crunchstat-summary/crunchstat_summary/command.py @@ -9,7 +9,7 @@ import logging import sys import arvados -from crunchstat_summary import logger, summarizer +from crunchstat_summary import logger, summarizer, reader from crunchstat_summary._version import __version__ @@ -30,9 +30,6 @@ class ArgumentParser(argparse.ArgumentParser): help='[Deprecated] Look up the specified container find its container request ' 'and read its log data from Keep (or from the Arvados event log, ' 'if the job is still running)') - src.add_argument( - '--pipeline-instance', type=str, metavar='UUID', - help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)') src.add_argument( '--log-file', type=str, help='Read log data from a regular file') @@ -89,9 +86,7 @@ class Command(object): 'threads': self.args.threads, 'arv': arvados.api('v1') } - if self.args.pipeline_instance: - self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs) - elif self.args.job: + if self.args.job: self.summer = summarizer.NewSummarizer(self.args.job, **kwargs) elif self.args.container: self.summer = summarizer.NewSummarizer(self.args.container, **kwargs) @@ -100,9 +95,9 @@ class Command(object): fh = UTF8Decode(gzip.open(self.args.log_file)) else: fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8') - self.summer = summarizer.Summarizer(fh, **kwargs) + self.summer = summarizer.Summarizer(reader.StubReader(fh), **kwargs) else: - self.summer = summarizer.Summarizer(sys.stdin, **kwargs) + self.summer = summarizer.Summarizer(reader.StubReader(sys.stdin), **kwargs) return self.summer.run() def report(self): diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py index e79c4ef5b3..0198d765c3 100644 --- a/tools/crunchstat-summary/crunchstat_summary/reader.py +++ b/tools/crunchstat-summary/crunchstat_summary/reader.py @@ -74,7 +74,7 @@ class LiveLogReader(object): ['event_type', 'in', self.event_types]] try: while True: - page = arvados.api().logs().index( + page = arvados.api().logs().list( limit=1000, order=['id asc'], filters=filters + [['id','>',str(last_id)]], @@ -116,3 +116,25 @@ class LiveLogReader(object): def __exit__(self, exc_type, exc_val, exc_tb): pass + + def node_info(self): + return {} + +class StubReader(object): + def __init__(self, fh): + self.fh = fh + + def __str__(self): + return "" + + def __iter__(self): + return iter(self.fh) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def node_info(self): + return {} diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py index 9b6e5f1690..65cee6c176 100644 --- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py +++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py @@ -66,6 +66,8 @@ class Summarizer(object): # constructor will overwrite this with something useful. self.existing_constraints = {} self.node_info = {} + self.cost = 0 + self.arv_config = {} logger.info("%s: logdata %s", self.label, logdata) @@ -75,82 +77,23 @@ class Summarizer(object): self._run(logdata) def _run(self, logdata): - self.detected_crunch1 = False - if not self.node_info: self.node_info = logdata.node_info() for line in logdata: - if not self.detected_crunch1 and '-8i9sb-' in line: - self.detected_crunch1 = True - - if self.detected_crunch1: - m = re.search(r'^\S+ \S+ \d+ (?P\d+) job_task (?P\S+)$', line) - if m: - seq = int(m.group('seq')) - uuid = m.group('task_uuid') - self.seq_to_uuid[seq] = uuid - logger.debug('%s: seq %d is task %s', self.label, seq, uuid) - continue - - m = re.search(r'^\S+ \S+ \d+ (?P\d+) (success in|failure \(#., permanent\) after) (?P\d+) seconds', line) - if m: - task_id = self.seq_to_uuid[int(m.group('seq'))] - elapsed = int(m.group('elapsed')) - self.task_stats[task_id]['time'] = {'elapsed': elapsed} - if elapsed > self.stats_max['time']['elapsed']: - self.stats_max['time']['elapsed'] = elapsed - continue - - m = re.search(r'^\S+ \S+ \d+ (?P\d+) stderr Queued job (?P\S+)$', line) - if m: - uuid = m.group('uuid') - if self._skip_child_jobs: - logger.warning('%s: omitting stats from child job %s' - ' because --skip-child-jobs flag is on', - self.label, uuid) - continue - logger.debug('%s: follow %s', self.label, uuid) - child_summarizer = NewSummarizer(uuid) - child_summarizer.stats_max = self.stats_max - child_summarizer.task_stats = self.task_stats - child_summarizer.tasks = self.tasks - child_summarizer.starttime = self.starttime - child_summarizer.run() - logger.debug('%s: done %s', self.label, uuid) - continue - - # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get - m = re.search(r'^(?P[^\s.]+)(\.\d+)? (?P\S+) \d+ (?P\d+) stderr (?Pcrunchstat: )(?P\S+) (?P.*?)( -- interval (?P.*))?\n$', line) - if not m: - continue - else: - # crunch2 - # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get - m = re.search(r'^(?P\S+) (?Pcrunchstat: )?(?P\S+) (?P.*?)( -- interval (?P.*))?\n$', line) - if not m: - continue + # crunch2 + # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get + m = re.search(r'^(?P\S+) (?Pcrunchstat: )?(?P\S+) (?P.*?)( -- interval (?P.*))?\n$', line) + if not m: + continue if self.label is None: try: self.label = m.group('job_uuid') except IndexError: self.label = 'label #1' - category = m.group('category') - if category.endswith(':'): - # "stderr crunchstat: notice: ..." - continue - elif category in ('error', 'caught'): - continue - elif category in ('read', 'open', 'cgroup', 'CID', 'Running'): - # "stderr crunchstat: read /proc/1234/net/dev: ..." - # (old logs are less careful with unprefixed error messages) - continue - if self.detected_crunch1: - task_id = self.seq_to_uuid[int(m.group('seq'))] - else: - task_id = 'container' + task_id = 'container' task = self.tasks[task_id] # Use the first and last crunchstat timestamps as @@ -179,12 +122,23 @@ class Summarizer(object): if self.finishtime is None or timestamp > self.finishtime: self.finishtime = timestamp - if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None: + if task.starttime is not None and task.finishtime is not None: elapsed = (task.finishtime - task.starttime).seconds self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed + category = m.group('category') + if category.endswith(':'): + # "stderr crunchstat: notice: ..." + continue + elif category in ('error', 'caught'): + continue + elif category in ('read', 'open', 'cgroup', 'CID', 'Running'): + # "stderr crunchstat: read /proc/1234/net/dev: ..." + # (old logs are less careful with unprefixed error messages) + continue + this_interval_s = None for group in ['current', 'interval']: if not m.group(group): @@ -359,7 +313,7 @@ class Summarizer(object): ('Requested CPU cores', self.existing_constraints.get(self._map_runtime_constraint('vcpus')), None, - ''), + '') if self.existing_constraints.get(self._map_runtime_constraint('vcpus')) else None, ('Instance VCPUs', self.node_info.get('VCPUs'), @@ -374,12 +328,12 @@ class Summarizer(object): ('Requested RAM', self.existing_constraints.get(self._map_runtime_constraint('ram')), lambda x: x / 2**20, - 'MB'), + 'MB') if self.existing_constraints.get(self._map_runtime_constraint('ram')) else None, ('Maximum RAM request for this instance type', - (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', {}))*.95, + (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', 0))*.95, lambda x: x / 2**20, - 'MB'), + 'MB') if self.node_info.get('RAM') else None, ('Max network traffic{}'.format(by_single_task), self.stats_max['net:eth0']['tx+rx'] + @@ -509,6 +463,8 @@ class Summarizer(object): if used_bytes == float('-Inf'): logger.warning('%s: no memory usage data', self.label) return + if not self.existing_constraints.get(constraint_key): + return used_mib = math.ceil(float(used_bytes) / MB) asked_mib = self.existing_constraints.get(constraint_key) / MB @@ -574,18 +530,11 @@ class Summarizer(object): def _runtime_constraint_mem_unit(self): if hasattr(self, 'runtime_constraint_mem_unit'): return self.runtime_constraint_mem_unit - elif self.detected_crunch1: - return JobSummarizer.runtime_constraint_mem_unit else: return ContainerRequestSummarizer.runtime_constraint_mem_unit def _map_runtime_constraint(self, key): - if hasattr(self, 'map_runtime_constraint'): - return self.map_runtime_constraint[key] - elif self.detected_crunch1: - return JobSummarizer.map_runtime_constraint[key] - else: - return key + return key class CollectionSummarizer(Summarizer): @@ -617,14 +566,6 @@ def NewSummarizer(process_or_uuid, **kwargs): if process is None: process = arv.container_requests().get(uuid=uuid).execute() klass = ContainerRequestTreeSummarizer - elif '-8i9sb-' in uuid: - if process is None: - process = arv.jobs().get(uuid=uuid).execute() - klass = JobTreeSummarizer - elif '-d1hrv-' in uuid: - if process is None: - process = arv.pipeline_instances().get(uuid=uuid).execute() - klass = PipelineSummarizer elif '-4zz18-' in uuid: return CollectionSummarizer(collection_id=uuid) else: @@ -646,7 +587,10 @@ class ProcessSummarizer(Summarizer): log_collection = self.process.get('log', self.process.get('log_uuid')) if log_collection and self.process.get('state') != 'Uncommitted': # arvados.util.CR_UNCOMMITTED: try: - rdr = crunchstat_summary.reader.CollectionReader(log_collection, api_client=arv) + rdr = crunchstat_summary.reader.CollectionReader( + log_collection, + api_client=arv, + collection_object=kwargs.get("collection_object")) except arvados.errors.NotFoundError as e: logger.warning("Trying event logs after failing to read " "log collection %s: %s", self.process['log'], e) @@ -661,16 +605,6 @@ class ProcessSummarizer(Summarizer): self.cost = self.process.get('cost', 0) - -class JobSummarizer(ProcessSummarizer): - runtime_constraint_mem_unit = MB - map_runtime_constraint = { - 'keep_cache_ram': 'keep_cache_mb_per_task', - 'ram': 'min_ram_mb_per_node', - 'vcpus': 'min_cores_per_node', - } - - class ContainerRequestSummarizer(ProcessSummarizer): runtime_constraint_mem_unit = 1 @@ -741,51 +675,6 @@ class MultiSummarizer(object): return WEBCHART_CLASS(label, iter(self._descendants().values())).html(tophtml, bottomhtml) -class JobTreeSummarizer(MultiSummarizer): - """Summarizes a job and all children listed in its components field.""" - def __init__(self, job, label=None, **kwargs): - arv = kwargs.get("arv") or arvados.api('v1') - label = label or job.get('name', job['uuid']) - children = collections.OrderedDict() - children[job['uuid']] = JobSummarizer(job, label=label, **kwargs) - if job.get('components', None): - preloaded = {} - for j in arv.jobs().index( - limit=len(job['components']), - filters=[['uuid','in',list(job['components'].values())]]).execute()['items']: - preloaded[j['uuid']] = j - for cname in sorted(job['components'].keys()): - child_uuid = job['components'][cname] - j = (preloaded.get(child_uuid) or - arv.jobs().get(uuid=child_uuid).execute()) - children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs) - - super(JobTreeSummarizer, self).__init__( - children=children, - label=label, - **kwargs) - - -class PipelineSummarizer(MultiSummarizer): - def __init__(self, instance, **kwargs): - children = collections.OrderedDict() - for cname, component in instance['components'].items(): - if 'job' not in component: - logger.warning( - "%s: skipping component with no job assigned", cname) - else: - logger.info( - "%s: job %s", cname, component['job']['uuid']) - summarizer = JobTreeSummarizer(component['job'], label=cname, **kwargs) - summarizer.label = '{} {}'.format( - cname, component['job']['uuid']) - children[cname] = summarizer - super(PipelineSummarizer, self).__init__( - children=children, - label=instance['uuid'], - **kwargs) - - class ContainerRequestTreeSummarizer(MultiSummarizer): def __init__(self, root, skip_child_jobs=False, **kwargs): arv = kwargs.get("arv") or arvados.api('v1') diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report index 868f07b684..e00faafb00 100644 --- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report +++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report @@ -25,15 +25,14 @@ statfs available 397744787456 - 397744787456 statfs total 402611240960 - 402611240960 statfs used 4870303744 52426.18 4866453504 time elapsed 20 - 20 -# Number of tasks: 1 -# Max CPU time spent by a single task: 2.45s +# Elapsed time: 20s # Max CPU usage in a single interval: 23.70% # Overall CPU usage: 12.25% -# Max memory used by a single task: 0.07GB -# Max network traffic in a single task: 0.00GB +# Requested CPU cores: 1 +# Max memory used: 66.30MB +# Requested RAM: 2500.00MB +# Max network traffic: 0.00GB # Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 1.21% -#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732 -#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin" +# Keep cache miss rate: 0.00% +# Keep cache utilization: 0.00% +# Temp disk utilization: 1.21% diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report index f77059b824..6afdf9aa69 100644 --- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report +++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report @@ -11,13 +11,12 @@ net:keep0 rx 0 0 0 net:keep0 tx 0 0 0 net:keep0 tx+rx 0 0 0 time elapsed 10 - 10 -# Number of tasks: 1 -# Max CPU time spent by a single task: 0s +# Elapsed time: 10s # Max CPU usage in a single interval: 0% # Overall CPU usage: 0.00% -# Max memory used by a single task: 0.00GB -# Max network traffic in a single task: 0.00GB +# Max memory used: 0.00MB +# Max network traffic: 0.00GB # Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 0.00% +# Keep cache miss rate: 0.00% +# Keep cache utilization: 0.00% +# Temp disk utilization: 0.00% diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report index 87db98bb37..fa1ad04e7b 100644 --- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report +++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report @@ -14,15 +14,12 @@ statfs available 397744787456 - 397744787456 statfs total 402611240960 - 402611240960 statfs used 4870303744 52426.18 4866453504 time elapsed 20 - 20 -# Number of tasks: 1 -# Max CPU time spent by a single task: 2.45s +# Elapsed time: 20s # Max CPU usage in a single interval: 23.70% # Overall CPU usage: 12.25% -# Max memory used by a single task: 0.07GB -# Max network traffic in a single task: 0.00GB +# Max memory used: 66.30MB +# Max network traffic: 0.00GB # Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 1.21% -#!! label #1 max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732 -#!! label #1 max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin" +# Keep cache miss rate: 0.00% +# Keep cache utilization: 0.00% +# Temp disk utilization: 1.21% diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report index 868f07b684..e00faafb00 100644 --- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report +++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report @@ -25,15 +25,14 @@ statfs available 397744787456 - 397744787456 statfs total 402611240960 - 402611240960 statfs used 4870303744 52426.18 4866453504 time elapsed 20 - 20 -# Number of tasks: 1 -# Max CPU time spent by a single task: 2.45s +# Elapsed time: 20s # Max CPU usage in a single interval: 23.70% # Overall CPU usage: 12.25% -# Max memory used by a single task: 0.07GB -# Max network traffic in a single task: 0.00GB +# Requested CPU cores: 1 +# Max memory used: 66.30MB +# Requested RAM: 2500.00MB +# Max network traffic: 0.00GB # Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 1.21% -#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732 -#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin" +# Keep cache miss rate: 0.00% +# Keep cache utilization: 0.00% +# Temp disk utilization: 1.21% diff --git a/tools/crunchstat-summary/tests/crunchstat_error_messages.txt b/tools/crunchstat-summary/tests/crunchstat_error_messages.txt index bf6dd5ceaf..2b93639281 100644 --- a/tools/crunchstat-summary/tests/crunchstat_error_messages.txt +++ b/tools/crunchstat-summary/tests/crunchstat_error_messages.txt @@ -1,9 +1,9 @@ -2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr +2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr old error message: 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: read /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory -2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr +2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr new error message: 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: error reading /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr cancelled job: -2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt +2016-01-07_00:15:59 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz deleted file mode 100644 index bfdcdff26f..0000000000 Binary files a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz and /dev/null differ diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report deleted file mode 100644 index 173e93fe25..0000000000 --- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report +++ /dev/null @@ -1,35 +0,0 @@ -category metric task_max task_max_rate job_total -blkio:0:0 read 0 0 0 -blkio:0:0 write 0 0 0 -cpu cpus 8.00 - - -cpu sys 1.92 0.04 1.92 -cpu user 3.83 0.09 3.83 -cpu user+sys 5.75 0.13 5.75 -fuseops read 0 0 0 -fuseops write 0 0 0 -keepcache hit 0 0 0 -keepcache miss 0 0 0 -keepcalls get 0 0 0 -keepcalls put 0 0 0 -mem cache 1678139392 - - -mem pgmajfault 0 - 0 -mem rss 349814784 - - -mem swap 0 - - -net:eth0 rx 1754364530 41658344.87 1754364530 -net:eth0 tx 38837956 920817.97 38837956 -net:eth0 tx+rx 1793202486 42579162.83 1793202486 -net:keep0 rx 0 0 0 -net:keep0 tx 0 0 0 -net:keep0 tx+rx 0 0 0 -time elapsed 80 - 80 -# Number of tasks: 1 -# Max CPU time spent by a single task: 5.75s -# Max CPU usage in a single interval: 13.00% -# Overall CPU usage: 7.19% -# Max memory used by a single task: 0.35GB -# Max network traffic in a single task: 1.79GB -# Max network speed in a single interval: 42.58MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 0.00% -#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972 diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz deleted file mode 100644 index 17af535108..0000000000 Binary files a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz and /dev/null differ diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report deleted file mode 100644 index b31a055e9f..0000000000 --- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report +++ /dev/null @@ -1,24 +0,0 @@ -category metric task_max task_max_rate job_total -cpu cpus 8.00 - - -cpu sys 0 - 0.00 -cpu user 0 - 0.00 -cpu user+sys 0 - 0.00 -mem cache 12288 - - -mem pgmajfault 0 - 0 -mem rss 856064 - - -mem swap 0 - - -net:eth0 rx 90 - 90 -net:eth0 tx 90 - 90 -net:eth0 tx+rx 180 - 180 -time elapsed 2 - 4 -# Number of tasks: 2 -# Max CPU time spent by a single task: 0s -# Max CPU usage in a single interval: 0% -# Overall CPU usage: 0.00% -# Max memory used by a single task: 0.00GB -# Max network traffic in a single task: 0.00GB -# Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 0.00% -#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972 diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz deleted file mode 100644 index 8826f70470..0000000000 Binary files a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz and /dev/null differ diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report deleted file mode 100644 index 9ddf5acc32..0000000000 --- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report +++ /dev/null @@ -1,24 +0,0 @@ -category metric task_max task_max_rate job_total -cpu cpus 8.00 - - -cpu sys 0 - 0.00 -cpu user 0 - 0.00 -cpu user+sys 0 - 0.00 -mem cache 8192 - - -mem pgmajfault 0 - 0 -mem rss 450560 - - -mem swap 0 - - -net:eth0 rx 90 - 90 -net:eth0 tx 90 - 90 -net:eth0 tx+rx 180 - 180 -time elapsed 2 - 3 -# Number of tasks: 2 -# Max CPU time spent by a single task: 0s -# Max CPU usage in a single interval: 0% -# Overall CPU usage: 0.00% -# Max memory used by a single task: 0.00GB -# Max network traffic in a single task: 0.00GB -# Max network speed in a single interval: 0.00MB/s -# Keep cache miss rate 0.00% -# Keep cache utilization 0.00% -# Temp disk utilization 0.00% -#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972 diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py index 444cfe4ef8..5a20d3283f 100644 --- a/tools/crunchstat-summary/tests/test_examples.py +++ b/tools/crunchstat-summary/tests/test_examples.py @@ -16,7 +16,7 @@ import sys import unittest from crunchstat_summary.command import UTF8Decode -from crunchstat_summary import logger +from crunchstat_summary import logger, reader TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -71,14 +71,13 @@ class HTMLFromFile(TestCase): class SummarizeEdgeCases(TestCase): def test_error_messages(self): logfile = io.open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8') - s = crunchstat_summary.summarizer.Summarizer(logfile) + s = crunchstat_summary.summarizer.Summarizer(reader.StubReader(logfile)) s.run() self.assertRegex(self.logbuf.getvalue(), r'CPU stats are missing -- possible cluster configuration issue') self.assertRegex(self.logbuf.getvalue(), r'memory stats are missing -- possible cluster configuration issue') self.assertRegex(self.logbuf.getvalue(), r'network I/O stats are missing -- possible cluster configuration issue') self.assertRegex(self.logbuf.getvalue(), r'storage space stats are missing -- possible cluster configuration issue') - class SummarizeContainerCommon(TestCase): fake_container = { 'uuid': '9tee4-dz642-lymtndkpy39eibk', @@ -106,20 +105,19 @@ class SummarizeContainerCommon(TestCase): @mock.patch('arvados.api') def check_common(self, mock_api, mock_cr): items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100 - # Index and list mean the same thing, but are used in different places in the - # code. It's fragile, but exploit that fact to distinguish the two uses. - mock_api().container_requests().index().execute.return_value = {'items': [] } # child_crs mock_api().container_requests().list().execute.side_effect = items # parent request mock_api().container_requests().get().execute.return_value = self.fake_request mock_api().containers().get().execute.return_value = self.fake_container mock_cr().__iter__.return_value = [ 'crunch-run.txt', 'stderr.txt', 'node-info.txt', 'container.json', 'crunchstat.txt', 'arv-mount.txt'] - def _open(n): + def _open(n, mode): if n == "crunchstat.txt": return UTF8Decode(gzip.open(self.logfile)) elif n == "arv-mount.txt": return UTF8Decode(gzip.open(self.arvmountlog)) + elif n == "node.json": + return io.StringIO("{}") mock_cr().open.side_effect = _open args = crunchstat_summary.command.ArgumentParser().parse_args( self.arg_strings) @@ -147,184 +145,3 @@ class SummarizeContainerRequest(SummarizeContainerCommon): self.check_common() self.assertNotRegex(self.logbuf.getvalue(), r'stats are missing') self.assertNotRegex(self.logbuf.getvalue(), r'possible cluster configuration issue') - - -class SummarizeJob(TestCase): - fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh' - fake_log_id = 'fake-log-collection-id' - fake_job = { - 'uuid': fake_job_uuid, - 'log': fake_log_id, - } - logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz') - - @mock.patch('arvados.collection.CollectionReader') - @mock.patch('arvados.api') - def test_job_report(self, mock_api, mock_cr): - mock_api().jobs().get().execute.return_value = self.fake_job - mock_cr().__iter__.return_value = ['fake-logfile.txt'] - mock_cr().open.return_value = UTF8Decode(gzip.open(self.logfile)) - args = crunchstat_summary.command.ArgumentParser().parse_args( - ['--job', self.fake_job_uuid]) - cmd = crunchstat_summary.command.Command(args) - cmd.run() - self.diff_known_report(self.logfile, cmd) - mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid) - mock_cr.assert_called_with(self.fake_log_id) - mock_cr().open.assert_called_with('fake-logfile.txt') - - -class SummarizePipeline(TestCase): - fake_instance = { - 'uuid': 'zzzzz-d1hrv-i3e77t9z5y8j9cc', - 'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz', - 'components': collections.OrderedDict([ - ['foo', { - 'job': { - 'uuid': 'zzzzz-8i9sb-000000000000000', - 'log': 'fake-log-pdh-0', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - }], - ['bar', { - 'job': { - 'uuid': 'zzzzz-8i9sb-000000000000001', - 'log': 'fake-log-pdh-1', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - }], - ['no-job-assigned', {}], - ['unfinished-job', { - 'job': { - 'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx', - }, - }], - ['baz', { - 'job': { - 'uuid': 'zzzzz-8i9sb-000000000000002', - 'log': 'fake-log-pdh-2', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - }]]), - } - - @mock.patch('arvados.collection.CollectionReader') - @mock.patch('arvados.api') - def test_pipeline(self, mock_api, mock_cr): - logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz') - mock_api().pipeline_instances().get().execute. \ - return_value = self.fake_instance - mock_cr().__iter__.return_value = ['fake-logfile.txt'] - mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)] - args = crunchstat_summary.command.ArgumentParser().parse_args( - ['--pipeline-instance', self.fake_instance['uuid']]) - cmd = crunchstat_summary.command.Command(args) - cmd.run() - - with io.open(logfile+'.report', encoding='utf-8') as f: - job_report = [line for line in f if not line.startswith('#!! ')] - expect = ( - ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] + - job_report + ['\n'] + - ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] + - job_report + ['\n'] + - ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n', - '(no report generated)\n', - '\n'] + - ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] + - job_report) - self.diff_report(cmd, expect) - mock_cr.assert_has_calls( - [ - mock.call('fake-log-pdh-0'), - mock.call('fake-log-pdh-1'), - mock.call('fake-log-pdh-2'), - ], any_order=True) - mock_cr().open.assert_called_with('fake-logfile.txt') - - -class SummarizeACRJob(TestCase): - fake_job = { - 'uuid': 'zzzzz-8i9sb-i3e77t9z5y8j9cc', - 'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz', - 'components': { - 'foo': 'zzzzz-8i9sb-000000000000000', - 'bar': 'zzzzz-8i9sb-000000000000001', - 'unfinished-job': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx', - 'baz': 'zzzzz-8i9sb-000000000000002', - } - } - fake_jobs_index = { 'items': [ - { - 'uuid': 'zzzzz-8i9sb-000000000000000', - 'log': 'fake-log-pdh-0', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - { - 'uuid': 'zzzzz-8i9sb-000000000000001', - 'log': 'fake-log-pdh-1', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - { - 'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx', - }, - { - 'uuid': 'zzzzz-8i9sb-000000000000002', - 'log': 'fake-log-pdh-2', - 'runtime_constraints': { - 'min_ram_mb_per_node': 900, - 'min_cores_per_node': 1, - }, - }, - ]} - @mock.patch('arvados.collection.CollectionReader') - @mock.patch('arvados.api') - def test_acr_job(self, mock_api, mock_cr): - logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz') - mock_api().jobs().index().execute.return_value = self.fake_jobs_index - mock_api().jobs().get().execute.return_value = self.fake_job - mock_cr().__iter__.return_value = ['fake-logfile.txt'] - mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)] - args = crunchstat_summary.command.ArgumentParser().parse_args( - ['--job', self.fake_job['uuid']]) - cmd = crunchstat_summary.command.Command(args) - cmd.run() - - with io.open(logfile+'.report', encoding='utf-8') as f: - job_report = [line for line in f if not line.startswith('#!! ')] - expect = ( - ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n', - '(no report generated)\n', - '\n'] + - ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] + - job_report + ['\n'] + - ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] + - job_report + ['\n'] + - ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] + - job_report + ['\n'] + - ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n', - '(no report generated)\n'] - ) - self.diff_report(cmd, expect) - mock_cr.assert_has_calls( - [ - mock.call('fake-log-pdh-0'), - mock.call('fake-log-pdh-1'), - mock.call('fake-log-pdh-2'), - ], any_order=True) - mock_cr().open.assert_called_with('fake-logfile.txt')