From: Peter Amstutz <peter.amstutz@curii.com>
Date: Fri, 23 Feb 2024 15:05:01 +0000 (-0500)
Subject: 19744: Remove jobs/pipeline templates from crunchstat-summary
X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/2c3c89fef054c10fad443fb549d99b199a4aa452

19744: Remove jobs/pipeline templates from crunchstat-summary

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
---

diff --git a/.licenseignore b/.licenseignore
index d7faa0c3f1..1e1c12a53a 100644
--- a/.licenseignore
+++ b/.licenseignore
@@ -53,6 +53,8 @@ sdk/cwl/tests/tool/blub.txt
 sdk/cwl/tests/19109-upload-secondary/*
 sdk/cwl/tests/federation/data/*
 sdk/cwl/tests/fake-keep-mount/fake_collection_dir/.arvados#collection
+sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt
+sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt
 sdk/go/manifest/testdata/*_manifest
 sdk/java/.classpath
 sdk/java/pom.xml
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 63e04a157e..70202743c4 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -536,8 +536,11 @@ class ArvadosContainer(JobBase):
 
             if logc is not None:
                 try:
-                    summerizer = crunchstat_summary.summarizer.Summarizer(crunchstat_summary.reader.CollectionReader(logc.manifest_locator(), collection_object=logc),
-                                                                          label=self.name, arv=self.arvrunner.api)
+                    summerizer = crunchstat_summary.summarizer.ContainerRequestSummarizer(
+                        record,
+                        collection_object=logc,
+                        label=self.name,
+                        arv=self.arvrunner.api)
                     summerizer.run()
                     with logc.open("usage_report.html", "wt") as mr:
                         mr.write(summerizer.html_report())
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 4ece5c3b2e..c5a1068eff 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -9,7 +9,7 @@ import logging
 import sys
 import arvados
 
-from crunchstat_summary import logger, summarizer
+from crunchstat_summary import logger, summarizer, reader
 from crunchstat_summary._version import __version__
 
 
@@ -30,9 +30,6 @@ class ArgumentParser(argparse.ArgumentParser):
             help='[Deprecated] Look up the specified container find its container request '
             'and read its log data from Keep (or from the Arvados event log, '
             'if the job is still running)')
-        src.add_argument(
-            '--pipeline-instance', type=str, metavar='UUID',
-            help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
@@ -89,9 +86,7 @@ class Command(object):
             'threads': self.args.threads,
             'arv': arvados.api('v1')
         }
-        if self.args.pipeline_instance:
-            self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
-        elif self.args.job:
+        if self.args.job:
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
         elif self.args.container:
             self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
@@ -100,9 +95,9 @@ class Command(object):
                 fh = UTF8Decode(gzip.open(self.args.log_file))
             else:
                 fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8')
-            self.summer = summarizer.Summarizer(fh, **kwargs)
+            self.summer = summarizer.Summarizer(reader.StubReader(fh), **kwargs)
         else:
-            self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
+            self.summer = summarizer.Summarizer(reader.StubReader(sys.stdin), **kwargs)
         return self.summer.run()
 
     def report(self):
diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index e79c4ef5b3..0198d765c3 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -74,7 +74,7 @@ class LiveLogReader(object):
             ['event_type', 'in', self.event_types]]
         try:
             while True:
-                page = arvados.api().logs().index(
+                page = arvados.api().logs().list(
                     limit=1000,
                     order=['id asc'],
                     filters=filters + [['id','>',str(last_id)]],
@@ -116,3 +116,25 @@ class LiveLogReader(object):
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         pass
+
+    def node_info(self):
+        return {}
+
+class StubReader(object):
+    def __init__(self, fh):
+        self.fh = fh
+
+    def __str__(self):
+        return ""
+
+    def __iter__(self):
+        return iter(self.fh)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def node_info(self):
+        return {}
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 9b6e5f1690..65cee6c176 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -66,6 +66,8 @@ class Summarizer(object):
         # constructor will overwrite this with something useful.
         self.existing_constraints = {}
         self.node_info = {}
+        self.cost = 0
+        self.arv_config = {}
 
         logger.info("%s: logdata %s", self.label, logdata)
 
@@ -75,82 +77,23 @@ class Summarizer(object):
             self._run(logdata)
 
     def _run(self, logdata):
-        self.detected_crunch1 = False
-
         if not self.node_info:
             self.node_info = logdata.node_info()
 
         for line in logdata:
-            if not self.detected_crunch1 and '-8i9sb-' in line:
-                self.detected_crunch1 = True
-
-            if self.detected_crunch1:
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
-                if m:
-                    seq = int(m.group('seq'))
-                    uuid = m.group('task_uuid')
-                    self.seq_to_uuid[seq] = uuid
-                    logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
-                    continue
-
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
-                if m:
-                    task_id = self.seq_to_uuid[int(m.group('seq'))]
-                    elapsed = int(m.group('elapsed'))
-                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
-                    if elapsed > self.stats_max['time']['elapsed']:
-                        self.stats_max['time']['elapsed'] = elapsed
-                    continue
-
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
-                if m:
-                    uuid = m.group('uuid')
-                    if self._skip_child_jobs:
-                        logger.warning('%s: omitting stats from child job %s'
-                                       ' because --skip-child-jobs flag is on',
-                                       self.label, uuid)
-                        continue
-                    logger.debug('%s: follow %s', self.label, uuid)
-                    child_summarizer = NewSummarizer(uuid)
-                    child_summarizer.stats_max = self.stats_max
-                    child_summarizer.task_stats = self.task_stats
-                    child_summarizer.tasks = self.tasks
-                    child_summarizer.starttime = self.starttime
-                    child_summarizer.run()
-                    logger.debug('%s: done %s', self.label, uuid)
-                    continue
-
-                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
-                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
-                if not m:
-                    continue
-            else:
-                # crunch2
-                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
-                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
-                if not m:
-                    continue
+            # crunch2
+            # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
+            m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+            if not m:
+                continue
 
             if self.label is None:
                 try:
                     self.label = m.group('job_uuid')
                 except IndexError:
                     self.label = 'label #1'
-            category = m.group('category')
-            if category.endswith(':'):
-                # "stderr crunchstat: notice: ..."
-                continue
-            elif category in ('error', 'caught'):
-                continue
-            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
-                # "stderr crunchstat: read /proc/1234/net/dev: ..."
-                # (old logs are less careful with unprefixed error messages)
-                continue
 
-            if self.detected_crunch1:
-                task_id = self.seq_to_uuid[int(m.group('seq'))]
-            else:
-                task_id = 'container'
+            task_id = 'container'
             task = self.tasks[task_id]
 
             # Use the first and last crunchstat timestamps as
@@ -179,12 +122,23 @@ class Summarizer(object):
             if self.finishtime is None or timestamp > self.finishtime:
                 self.finishtime = timestamp
 
-            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
+            if task.starttime is not None and task.finishtime is not None:
                 elapsed = (task.finishtime - task.starttime).seconds
                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
 
+            category = m.group('category')
+            if category.endswith(':'):
+                # "stderr crunchstat: notice: ..."
+                continue
+            elif category in ('error', 'caught'):
+                continue
+            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
+                # "stderr crunchstat: read /proc/1234/net/dev: ..."
+                # (old logs are less careful with unprefixed error messages)
+                continue
+
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -359,7 +313,7 @@ class Summarizer(object):
             ('Requested CPU cores',
              self.existing_constraints.get(self._map_runtime_constraint('vcpus')),
              None,
-             ''),
+             '') if self.existing_constraints.get(self._map_runtime_constraint('vcpus')) else None,
 
             ('Instance VCPUs',
              self.node_info.get('VCPUs'),
@@ -374,12 +328,12 @@ class Summarizer(object):
             ('Requested RAM',
              self.existing_constraints.get(self._map_runtime_constraint('ram')),
              lambda x: x / 2**20,
-             'MB'),
+             'MB') if self.existing_constraints.get(self._map_runtime_constraint('ram')) else None,
 
             ('Maximum RAM request for this instance type',
-             (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', {}))*.95,
+             (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', 0))*.95,
              lambda x: x / 2**20,
-             'MB'),
+             'MB') if self.node_info.get('RAM') else None,
 
             ('Max network traffic{}'.format(by_single_task),
              self.stats_max['net:eth0']['tx+rx'] +
@@ -509,6 +463,8 @@ class Summarizer(object):
         if used_bytes == float('-Inf'):
             logger.warning('%s: no memory usage data', self.label)
             return
+        if not self.existing_constraints.get(constraint_key):
+            return
         used_mib = math.ceil(float(used_bytes) / MB)
         asked_mib = self.existing_constraints.get(constraint_key) / MB
 
@@ -574,18 +530,11 @@ class Summarizer(object):
     def _runtime_constraint_mem_unit(self):
         if hasattr(self, 'runtime_constraint_mem_unit'):
             return self.runtime_constraint_mem_unit
-        elif self.detected_crunch1:
-            return JobSummarizer.runtime_constraint_mem_unit
         else:
             return ContainerRequestSummarizer.runtime_constraint_mem_unit
 
     def _map_runtime_constraint(self, key):
-        if hasattr(self, 'map_runtime_constraint'):
-            return self.map_runtime_constraint[key]
-        elif self.detected_crunch1:
-            return JobSummarizer.map_runtime_constraint[key]
-        else:
-            return key
+        return key
 
 
 class CollectionSummarizer(Summarizer):
@@ -617,14 +566,6 @@ def NewSummarizer(process_or_uuid, **kwargs):
         if process is None:
             process = arv.container_requests().get(uuid=uuid).execute()
         klass = ContainerRequestTreeSummarizer
-    elif '-8i9sb-' in uuid:
-        if process is None:
-            process = arv.jobs().get(uuid=uuid).execute()
-        klass = JobTreeSummarizer
-    elif '-d1hrv-' in uuid:
-        if process is None:
-            process = arv.pipeline_instances().get(uuid=uuid).execute()
-        klass = PipelineSummarizer
     elif '-4zz18-' in uuid:
         return CollectionSummarizer(collection_id=uuid)
     else:
@@ -646,7 +587,10 @@ class ProcessSummarizer(Summarizer):
         log_collection = self.process.get('log', self.process.get('log_uuid'))
         if log_collection and self.process.get('state') != 'Uncommitted': # arvados.util.CR_UNCOMMITTED:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(log_collection, api_client=arv)
+                rdr = crunchstat_summary.reader.CollectionReader(
+                    log_collection,
+                    api_client=arv,
+                    collection_object=kwargs.get("collection_object"))
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -661,16 +605,6 @@ class ProcessSummarizer(Summarizer):
         self.cost = self.process.get('cost', 0)
 
 
-
-class JobSummarizer(ProcessSummarizer):
-    runtime_constraint_mem_unit = MB
-    map_runtime_constraint = {
-        'keep_cache_ram': 'keep_cache_mb_per_task',
-        'ram': 'min_ram_mb_per_node',
-        'vcpus': 'min_cores_per_node',
-    }
-
-
 class ContainerRequestSummarizer(ProcessSummarizer):
     runtime_constraint_mem_unit = 1
 
@@ -741,51 +675,6 @@ class MultiSummarizer(object):
         return WEBCHART_CLASS(label, iter(self._descendants().values())).html(tophtml, bottomhtml)
 
 
-class JobTreeSummarizer(MultiSummarizer):
-    """Summarizes a job and all children listed in its components field."""
-    def __init__(self, job, label=None, **kwargs):
-        arv = kwargs.get("arv") or arvados.api('v1')
-        label = label or job.get('name', job['uuid'])
-        children = collections.OrderedDict()
-        children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
-        if job.get('components', None):
-            preloaded = {}
-            for j in arv.jobs().index(
-                    limit=len(job['components']),
-                    filters=[['uuid','in',list(job['components'].values())]]).execute()['items']:
-                preloaded[j['uuid']] = j
-            for cname in sorted(job['components'].keys()):
-                child_uuid = job['components'][cname]
-                j = (preloaded.get(child_uuid) or
-                     arv.jobs().get(uuid=child_uuid).execute())
-                children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs)
-
-        super(JobTreeSummarizer, self).__init__(
-            children=children,
-            label=label,
-            **kwargs)
-
-
-class PipelineSummarizer(MultiSummarizer):
-    def __init__(self, instance, **kwargs):
-        children = collections.OrderedDict()
-        for cname, component in instance['components'].items():
-            if 'job' not in component:
-                logger.warning(
-                    "%s: skipping component with no job assigned", cname)
-            else:
-                logger.info(
-                    "%s: job %s", cname, component['job']['uuid'])
-                summarizer = JobTreeSummarizer(component['job'], label=cname, **kwargs)
-                summarizer.label = '{} {}'.format(
-                    cname, component['job']['uuid'])
-                children[cname] = summarizer
-        super(PipelineSummarizer, self).__init__(
-            children=children,
-            label=instance['uuid'],
-            **kwargs)
-
-
 class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
         arv = kwargs.get("arv") or arvados.api('v1')
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
index 868f07b684..e00faafb00 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
@@ -25,15 +25,14 @@ statfs	available	397744787456	-	397744787456
 statfs	total	402611240960	-	402611240960
 statfs	used	4870303744	52426.18	4866453504
 time	elapsed	20	-	20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Requested CPU cores: 1
+# Max memory used: 66.30MB
+# Requested RAM: 2500.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
index f77059b824..6afdf9aa69 100644
--- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
@@ -11,13 +11,12 @@ net:keep0	rx	0	0	0
 net:keep0	tx	0	0	0
 net:keep0	tx+rx	0	0	0
 time	elapsed	10	-	10
-# Number of tasks: 1
-# Max CPU time spent by a single task: 0s
+# Elapsed time: 10s
 # Max CPU usage in a single interval: 0%
 # Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
+# Max memory used: 0.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 0.00%
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
index 87db98bb37..fa1ad04e7b 100644
--- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
@@ -14,15 +14,12 @@ statfs	available	397744787456	-	397744787456
 statfs	total	402611240960	-	402611240960
 statfs	used	4870303744	52426.18	4866453504
 time	elapsed	20	-	20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Max memory used: 66.30MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! label #1 max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! label #1 max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
index 868f07b684..e00faafb00 100644
--- a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
@@ -25,15 +25,14 @@ statfs	available	397744787456	-	397744787456
 statfs	total	402611240960	-	402611240960
 statfs	used	4870303744	52426.18	4866453504
 time	elapsed	20	-	20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Requested CPU cores: 1
+# Max memory used: 66.30MB
+# Requested RAM: 2500.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
diff --git a/tools/crunchstat-summary/tests/crunchstat_error_messages.txt b/tools/crunchstat-summary/tests/crunchstat_error_messages.txt
index bf6dd5ceaf..2b93639281 100644
--- a/tools/crunchstat-summary/tests/crunchstat_error_messages.txt
+++ b/tools/crunchstat-summary/tests/crunchstat_error_messages.txt
@@ -1,9 +1,9 @@
-2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 
+2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr old error message:
 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: read /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory
-2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 
+2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr new error message:
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: error reading /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr cancelled job:
-2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt
+2016-01-07_00:15:59 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz
deleted file mode 100644
index bfdcdff26f..0000000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
deleted file mode 100644
index 173e93fe25..0000000000
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ /dev/null
@@ -1,35 +0,0 @@
-category	metric	task_max	task_max_rate	job_total
-blkio:0:0	read	0	0	0
-blkio:0:0	write	0	0	0
-cpu	cpus	8.00	-	-
-cpu	sys	1.92	0.04	1.92
-cpu	user	3.83	0.09	3.83
-cpu	user+sys	5.75	0.13	5.75
-fuseops	read	0	0	0
-fuseops	write	0	0	0
-keepcache	hit	0	0	0
-keepcache	miss	0	0	0
-keepcalls	get	0	0	0
-keepcalls	put	0	0	0
-mem	cache	1678139392	-	-
-mem	pgmajfault	0	-	0
-mem	rss	349814784	-	-
-mem	swap	0	-	-
-net:eth0	rx	1754364530	41658344.87	1754364530
-net:eth0	tx	38837956	920817.97	38837956
-net:eth0	tx+rx	1793202486	42579162.83	1793202486
-net:keep0	rx	0	0	0
-net:keep0	tx	0	0	0
-net:keep0	tx+rx	0	0	0
-time	elapsed	80	-	80
-# Number of tasks: 1
-# Max CPU time spent by a single task: 5.75s
-# Max CPU usage in a single interval: 13.00%
-# Overall CPU usage: 7.19%
-# Max memory used by a single task: 0.35GB
-# Max network traffic in a single task: 1.79GB
-# Max network speed in a single interval: 42.58MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz
deleted file mode 100644
index 17af535108..0000000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
deleted file mode 100644
index b31a055e9f..0000000000
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ /dev/null
@@ -1,24 +0,0 @@
-category	metric	task_max	task_max_rate	job_total
-cpu	cpus	8.00	-	-
-cpu	sys	0	-	0.00
-cpu	user	0	-	0.00
-cpu	user+sys	0	-	0.00
-mem	cache	12288	-	-
-mem	pgmajfault	0	-	0
-mem	rss	856064	-	-
-mem	swap	0	-	-
-net:eth0	rx	90	-	90
-net:eth0	tx	90	-	90
-net:eth0	tx+rx	180	-	180
-time	elapsed	2	-	4
-# Number of tasks: 2
-# Max CPU time spent by a single task: 0s
-# Max CPU usage in a single interval: 0%
-# Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz
deleted file mode 100644
index 8826f70470..0000000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
deleted file mode 100644
index 9ddf5acc32..0000000000
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ /dev/null
@@ -1,24 +0,0 @@
-category	metric	task_max	task_max_rate	job_total
-cpu	cpus	8.00	-	-
-cpu	sys	0	-	0.00
-cpu	user	0	-	0.00
-cpu	user+sys	0	-	0.00
-mem	cache	8192	-	-
-mem	pgmajfault	0	-	0
-mem	rss	450560	-	-
-mem	swap	0	-	-
-net:eth0	rx	90	-	90
-net:eth0	tx	90	-	90
-net:eth0	tx+rx	180	-	180
-time	elapsed	2	-	3
-# Number of tasks: 2
-# Max CPU time spent by a single task: 0s
-# Max CPU usage in a single interval: 0%
-# Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 444cfe4ef8..5a20d3283f 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -16,7 +16,7 @@ import sys
 import unittest
 
 from crunchstat_summary.command import UTF8Decode
-from crunchstat_summary import logger
+from crunchstat_summary import logger, reader
 
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -71,14 +71,13 @@ class HTMLFromFile(TestCase):
 class SummarizeEdgeCases(TestCase):
     def test_error_messages(self):
         logfile = io.open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
-        s = crunchstat_summary.summarizer.Summarizer(logfile)
+        s = crunchstat_summary.summarizer.Summarizer(reader.StubReader(logfile))
         s.run()
         self.assertRegex(self.logbuf.getvalue(), r'CPU stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'memory stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'network I/O stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'storage space stats are missing -- possible cluster configuration issue')
 
-
 class SummarizeContainerCommon(TestCase):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
@@ -106,20 +105,19 @@ class SummarizeContainerCommon(TestCase):
     @mock.patch('arvados.api')
     def check_common(self, mock_api, mock_cr):
         items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
-        # Index and list mean the same thing, but are used in different places in the
-        # code. It's fragile, but exploit that fact to distinguish the two uses.
-        mock_api().container_requests().index().execute.return_value = {'items': [] }  # child_crs
         mock_api().container_requests().list().execute.side_effect = items # parent request
         mock_api().container_requests().get().execute.return_value = self.fake_request
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
             'crunch-run.txt', 'stderr.txt', 'node-info.txt',
             'container.json', 'crunchstat.txt', 'arv-mount.txt']
-        def _open(n):
+        def _open(n, mode):
             if n == "crunchstat.txt":
                 return UTF8Decode(gzip.open(self.logfile))
             elif n == "arv-mount.txt":
                 return UTF8Decode(gzip.open(self.arvmountlog))
+            elif n == "node.json":
+                return io.StringIO("{}")
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             self.arg_strings)
@@ -147,184 +145,3 @@ class SummarizeContainerRequest(SummarizeContainerCommon):
         self.check_common()
         self.assertNotRegex(self.logbuf.getvalue(), r'stats are missing')
         self.assertNotRegex(self.logbuf.getvalue(), r'possible cluster configuration issue')
-
-
-class SummarizeJob(TestCase):
-    fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
-    fake_log_id = 'fake-log-collection-id'
-    fake_job = {
-        'uuid': fake_job_uuid,
-        'log': fake_log_id,
-    }
-    logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_job_report(self, mock_api, mock_cr):
-        mock_api().jobs().get().execute.return_value = self.fake_job
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.return_value = UTF8Decode(gzip.open(self.logfile))
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--job', self.fake_job_uuid])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-        self.diff_known_report(self.logfile, cmd)
-        mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
-        mock_cr.assert_called_with(self.fake_log_id)
-        mock_cr().open.assert_called_with('fake-logfile.txt')
-
-
-class SummarizePipeline(TestCase):
-    fake_instance = {
-        'uuid': 'zzzzz-d1hrv-i3e77t9z5y8j9cc',
-        'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-        'components': collections.OrderedDict([
-            ['foo', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000000',
-                    'log': 'fake-log-pdh-0',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }],
-            ['bar', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000001',
-                    'log': 'fake-log-pdh-1',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }],
-            ['no-job-assigned', {}],
-            ['unfinished-job', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-                },
-            }],
-            ['baz', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000002',
-                    'log': 'fake-log-pdh-2',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }]]),
-    }
-
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_pipeline(self, mock_api, mock_cr):
-        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-        mock_api().pipeline_instances().get().execute. \
-            return_value = self.fake_instance
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--pipeline-instance', self.fake_instance['uuid']])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-
-        with io.open(logfile+'.report', encoding='utf-8') as f:
-            job_report = [line for line in f if not line.startswith('#!! ')]
-        expect = (
-            ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n',
-             '(no report generated)\n',
-             '\n'] +
-            ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
-            job_report)
-        self.diff_report(cmd, expect)
-        mock_cr.assert_has_calls(
-            [
-                mock.call('fake-log-pdh-0'),
-                mock.call('fake-log-pdh-1'),
-                mock.call('fake-log-pdh-2'),
-            ], any_order=True)
-        mock_cr().open.assert_called_with('fake-logfile.txt')
-
-
-class SummarizeACRJob(TestCase):
-    fake_job = {
-        'uuid': 'zzzzz-8i9sb-i3e77t9z5y8j9cc',
-        'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-        'components': {
-            'foo': 'zzzzz-8i9sb-000000000000000',
-            'bar': 'zzzzz-8i9sb-000000000000001',
-            'unfinished-job': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-            'baz': 'zzzzz-8i9sb-000000000000002',
-        }
-    }
-    fake_jobs_index = { 'items': [
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000000',
-            'log': 'fake-log-pdh-0',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000001',
-            'log': 'fake-log-pdh-1',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000002',
-            'log': 'fake-log-pdh-2',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-    ]}
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_acr_job(self, mock_api, mock_cr):
-        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-        mock_api().jobs().index().execute.return_value = self.fake_jobs_index
-        mock_api().jobs().get().execute.return_value = self.fake_job
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--job', self.fake_job['uuid']])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-
-        with io.open(logfile+'.report', encoding='utf-8') as f:
-            job_report = [line for line in f if not line.startswith('#!! ')]
-        expect = (
-            ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n',
-             '(no report generated)\n',
-             '\n'] +
-            ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n',
-             '(no report generated)\n']
-        )
-        self.diff_report(cmd, expect)
-        mock_cr.assert_has_calls(
-            [
-                mock.call('fake-log-pdh-0'),
-                mock.call('fake-log-pdh-1'),
-                mock.call('fake-log-pdh-2'),
-            ], any_order=True)
-        mock_cr().open.assert_called_with('fake-logfile.txt')