19744: Remove jobs/pipeline templates from crunchstat-summary
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 23 Feb 2024 15:05:01 +0000 (10:05 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 23 Feb 2024 15:11:26 +0000 (10:11 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

17 files changed:
.licenseignore
sdk/cwl/arvados_cwl/arvcontainer.py
tools/crunchstat-summary/crunchstat_summary/command.py
tools/crunchstat-summary/crunchstat_summary/reader.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
tools/crunchstat-summary/tests/crunchstat_error_messages.txt
tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz [deleted file]
tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report [deleted file]
tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz [deleted file]
tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report [deleted file]
tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz [deleted file]
tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report [deleted file]
tools/crunchstat-summary/tests/test_examples.py

index d7faa0c3f181ce5c1e110ce5a2e1175c2c00c048..1e1c12a53a79a2a46a0865bf863f8933691e3ba6 100644 (file)
@@ -53,6 +53,8 @@ sdk/cwl/tests/tool/blub.txt
 sdk/cwl/tests/19109-upload-secondary/*
 sdk/cwl/tests/federation/data/*
 sdk/cwl/tests/fake-keep-mount/fake_collection_dir/.arvados#collection
+sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt
+sdk/cwl/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt
 sdk/go/manifest/testdata/*_manifest
 sdk/java/.classpath
 sdk/java/pom.xml
index 63e04a157e29d4edb39844495e1c24e326251eb8..70202743c483f29691d1138c06c2cd120ec3fd8a 100644 (file)
@@ -536,8 +536,11 @@ class ArvadosContainer(JobBase):
 
             if logc is not None:
                 try:
-                    summerizer = crunchstat_summary.summarizer.Summarizer(crunchstat_summary.reader.CollectionReader(logc.manifest_locator(), collection_object=logc),
-                                                                          label=self.name, arv=self.arvrunner.api)
+                    summerizer = crunchstat_summary.summarizer.ContainerRequestSummarizer(
+                        record,
+                        collection_object=logc,
+                        label=self.name,
+                        arv=self.arvrunner.api)
                     summerizer.run()
                     with logc.open("usage_report.html", "wt") as mr:
                         mr.write(summerizer.html_report())
index 4ece5c3b2e6d8c05d5a29946826e6e31fbdc1967..c5a1068eff9b9e54b607ff814f13734ee367e8d5 100644 (file)
@@ -9,7 +9,7 @@ import logging
 import sys
 import arvados
 
-from crunchstat_summary import logger, summarizer
+from crunchstat_summary import logger, summarizer, reader
 from crunchstat_summary._version import __version__
 
 
@@ -30,9 +30,6 @@ class ArgumentParser(argparse.ArgumentParser):
             help='[Deprecated] Look up the specified container find its container request '
             'and read its log data from Keep (or from the Arvados event log, '
             'if the job is still running)')
-        src.add_argument(
-            '--pipeline-instance', type=str, metavar='UUID',
-            help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
@@ -89,9 +86,7 @@ class Command(object):
             'threads': self.args.threads,
             'arv': arvados.api('v1')
         }
-        if self.args.pipeline_instance:
-            self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
-        elif self.args.job:
+        if self.args.job:
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
         elif self.args.container:
             self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
@@ -100,9 +95,9 @@ class Command(object):
                 fh = UTF8Decode(gzip.open(self.args.log_file))
             else:
                 fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8')
-            self.summer = summarizer.Summarizer(fh, **kwargs)
+            self.summer = summarizer.Summarizer(reader.StubReader(fh), **kwargs)
         else:
-            self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
+            self.summer = summarizer.Summarizer(reader.StubReader(sys.stdin), **kwargs)
         return self.summer.run()
 
     def report(self):
index e79c4ef5b36e7dfebc62ce22cbfe6a690b02aba0..0198d765c3533df4cdeb42096fedc0cd57d20051 100644 (file)
@@ -74,7 +74,7 @@ class LiveLogReader(object):
             ['event_type', 'in', self.event_types]]
         try:
             while True:
-                page = arvados.api().logs().index(
+                page = arvados.api().logs().list(
                     limit=1000,
                     order=['id asc'],
                     filters=filters + [['id','>',str(last_id)]],
@@ -116,3 +116,25 @@ class LiveLogReader(object):
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         pass
+
+    def node_info(self):
+        return {}
+
+class StubReader(object):
+    def __init__(self, fh):
+        self.fh = fh
+
+    def __str__(self):
+        return ""
+
+    def __iter__(self):
+        return iter(self.fh)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def node_info(self):
+        return {}
index 9b6e5f16905054749d34004243792b5a9e24474c..65cee6c1765e0753e634ed7c4e6aa9fe49dfc56c 100644 (file)
@@ -66,6 +66,8 @@ class Summarizer(object):
         # constructor will overwrite this with something useful.
         self.existing_constraints = {}
         self.node_info = {}
+        self.cost = 0
+        self.arv_config = {}
 
         logger.info("%s: logdata %s", self.label, logdata)
 
@@ -75,82 +77,23 @@ class Summarizer(object):
             self._run(logdata)
 
     def _run(self, logdata):
-        self.detected_crunch1 = False
-
         if not self.node_info:
             self.node_info = logdata.node_info()
 
         for line in logdata:
-            if not self.detected_crunch1 and '-8i9sb-' in line:
-                self.detected_crunch1 = True
-
-            if self.detected_crunch1:
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
-                if m:
-                    seq = int(m.group('seq'))
-                    uuid = m.group('task_uuid')
-                    self.seq_to_uuid[seq] = uuid
-                    logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
-                    continue
-
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
-                if m:
-                    task_id = self.seq_to_uuid[int(m.group('seq'))]
-                    elapsed = int(m.group('elapsed'))
-                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
-                    if elapsed > self.stats_max['time']['elapsed']:
-                        self.stats_max['time']['elapsed'] = elapsed
-                    continue
-
-                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
-                if m:
-                    uuid = m.group('uuid')
-                    if self._skip_child_jobs:
-                        logger.warning('%s: omitting stats from child job %s'
-                                       ' because --skip-child-jobs flag is on',
-                                       self.label, uuid)
-                        continue
-                    logger.debug('%s: follow %s', self.label, uuid)
-                    child_summarizer = NewSummarizer(uuid)
-                    child_summarizer.stats_max = self.stats_max
-                    child_summarizer.task_stats = self.task_stats
-                    child_summarizer.tasks = self.tasks
-                    child_summarizer.starttime = self.starttime
-                    child_summarizer.run()
-                    logger.debug('%s: done %s', self.label, uuid)
-                    continue
-
-                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
-                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
-                if not m:
-                    continue
-            else:
-                # crunch2
-                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
-                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
-                if not m:
-                    continue
+            # crunch2
+            # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
+            m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+            if not m:
+                continue
 
             if self.label is None:
                 try:
                     self.label = m.group('job_uuid')
                 except IndexError:
                     self.label = 'label #1'
-            category = m.group('category')
-            if category.endswith(':'):
-                # "stderr crunchstat: notice: ..."
-                continue
-            elif category in ('error', 'caught'):
-                continue
-            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
-                # "stderr crunchstat: read /proc/1234/net/dev: ..."
-                # (old logs are less careful with unprefixed error messages)
-                continue
 
-            if self.detected_crunch1:
-                task_id = self.seq_to_uuid[int(m.group('seq'))]
-            else:
-                task_id = 'container'
+            task_id = 'container'
             task = self.tasks[task_id]
 
             # Use the first and last crunchstat timestamps as
@@ -179,12 +122,23 @@ class Summarizer(object):
             if self.finishtime is None or timestamp > self.finishtime:
                 self.finishtime = timestamp
 
-            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
+            if task.starttime is not None and task.finishtime is not None:
                 elapsed = (task.finishtime - task.starttime).seconds
                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
 
+            category = m.group('category')
+            if category.endswith(':'):
+                # "stderr crunchstat: notice: ..."
+                continue
+            elif category in ('error', 'caught'):
+                continue
+            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
+                # "stderr crunchstat: read /proc/1234/net/dev: ..."
+                # (old logs are less careful with unprefixed error messages)
+                continue
+
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -359,7 +313,7 @@ class Summarizer(object):
             ('Requested CPU cores',
              self.existing_constraints.get(self._map_runtime_constraint('vcpus')),
              None,
-             ''),
+             '') if self.existing_constraints.get(self._map_runtime_constraint('vcpus')) else None,
 
             ('Instance VCPUs',
              self.node_info.get('VCPUs'),
@@ -374,12 +328,12 @@ class Summarizer(object):
             ('Requested RAM',
              self.existing_constraints.get(self._map_runtime_constraint('ram')),
              lambda x: x / 2**20,
-             'MB'),
+             'MB') if self.existing_constraints.get(self._map_runtime_constraint('ram')) else None,
 
             ('Maximum RAM request for this instance type',
-             (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', {}))*.95,
+             (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', 0))*.95,
              lambda x: x / 2**20,
-             'MB'),
+             'MB') if self.node_info.get('RAM') else None,
 
             ('Max network traffic{}'.format(by_single_task),
              self.stats_max['net:eth0']['tx+rx'] +
@@ -509,6 +463,8 @@ class Summarizer(object):
         if used_bytes == float('-Inf'):
             logger.warning('%s: no memory usage data', self.label)
             return
+        if not self.existing_constraints.get(constraint_key):
+            return
         used_mib = math.ceil(float(used_bytes) / MB)
         asked_mib = self.existing_constraints.get(constraint_key) / MB
 
@@ -574,18 +530,11 @@ class Summarizer(object):
     def _runtime_constraint_mem_unit(self):
         if hasattr(self, 'runtime_constraint_mem_unit'):
             return self.runtime_constraint_mem_unit
-        elif self.detected_crunch1:
-            return JobSummarizer.runtime_constraint_mem_unit
         else:
             return ContainerRequestSummarizer.runtime_constraint_mem_unit
 
     def _map_runtime_constraint(self, key):
-        if hasattr(self, 'map_runtime_constraint'):
-            return self.map_runtime_constraint[key]
-        elif self.detected_crunch1:
-            return JobSummarizer.map_runtime_constraint[key]
-        else:
-            return key
+        return key
 
 
 class CollectionSummarizer(Summarizer):
@@ -617,14 +566,6 @@ def NewSummarizer(process_or_uuid, **kwargs):
         if process is None:
             process = arv.container_requests().get(uuid=uuid).execute()
         klass = ContainerRequestTreeSummarizer
-    elif '-8i9sb-' in uuid:
-        if process is None:
-            process = arv.jobs().get(uuid=uuid).execute()
-        klass = JobTreeSummarizer
-    elif '-d1hrv-' in uuid:
-        if process is None:
-            process = arv.pipeline_instances().get(uuid=uuid).execute()
-        klass = PipelineSummarizer
     elif '-4zz18-' in uuid:
         return CollectionSummarizer(collection_id=uuid)
     else:
@@ -646,7 +587,10 @@ class ProcessSummarizer(Summarizer):
         log_collection = self.process.get('log', self.process.get('log_uuid'))
         if log_collection and self.process.get('state') != 'Uncommitted': # arvados.util.CR_UNCOMMITTED:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(log_collection, api_client=arv)
+                rdr = crunchstat_summary.reader.CollectionReader(
+                    log_collection,
+                    api_client=arv,
+                    collection_object=kwargs.get("collection_object"))
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -661,16 +605,6 @@ class ProcessSummarizer(Summarizer):
         self.cost = self.process.get('cost', 0)
 
 
-
-class JobSummarizer(ProcessSummarizer):
-    runtime_constraint_mem_unit = MB
-    map_runtime_constraint = {
-        'keep_cache_ram': 'keep_cache_mb_per_task',
-        'ram': 'min_ram_mb_per_node',
-        'vcpus': 'min_cores_per_node',
-    }
-
-
 class ContainerRequestSummarizer(ProcessSummarizer):
     runtime_constraint_mem_unit = 1
 
@@ -741,51 +675,6 @@ class MultiSummarizer(object):
         return WEBCHART_CLASS(label, iter(self._descendants().values())).html(tophtml, bottomhtml)
 
 
-class JobTreeSummarizer(MultiSummarizer):
-    """Summarizes a job and all children listed in its components field."""
-    def __init__(self, job, label=None, **kwargs):
-        arv = kwargs.get("arv") or arvados.api('v1')
-        label = label or job.get('name', job['uuid'])
-        children = collections.OrderedDict()
-        children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
-        if job.get('components', None):
-            preloaded = {}
-            for j in arv.jobs().index(
-                    limit=len(job['components']),
-                    filters=[['uuid','in',list(job['components'].values())]]).execute()['items']:
-                preloaded[j['uuid']] = j
-            for cname in sorted(job['components'].keys()):
-                child_uuid = job['components'][cname]
-                j = (preloaded.get(child_uuid) or
-                     arv.jobs().get(uuid=child_uuid).execute())
-                children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs)
-
-        super(JobTreeSummarizer, self).__init__(
-            children=children,
-            label=label,
-            **kwargs)
-
-
-class PipelineSummarizer(MultiSummarizer):
-    def __init__(self, instance, **kwargs):
-        children = collections.OrderedDict()
-        for cname, component in instance['components'].items():
-            if 'job' not in component:
-                logger.warning(
-                    "%s: skipping component with no job assigned", cname)
-            else:
-                logger.info(
-                    "%s: job %s", cname, component['job']['uuid'])
-                summarizer = JobTreeSummarizer(component['job'], label=cname, **kwargs)
-                summarizer.label = '{} {}'.format(
-                    cname, component['job']['uuid'])
-                children[cname] = summarizer
-        super(PipelineSummarizer, self).__init__(
-            children=children,
-            label=instance['uuid'],
-            **kwargs)
-
-
 class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
         arv = kwargs.get("arv") or arvados.api('v1')
index 868f07b684eedad0544723ab50cf5b90a86329bd..e00faafb00f272738605b2b09201dfb61efc09ca 100644 (file)
@@ -25,15 +25,14 @@ statfs      available       397744787456    -       397744787456
 statfs total   402611240960    -       402611240960
 statfs used    4870303744      52426.18        4866453504
 time   elapsed 20      -       20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Requested CPU cores: 1
+# Max memory used: 66.30MB
+# Requested RAM: 2500.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
index f77059b82496f5825d9d634847a2b0537efaed72..6afdf9aa69d756c6edc2352b2dc37b8d997ac3c4 100644 (file)
@@ -11,13 +11,12 @@ net:keep0   rx      0       0       0
 net:keep0      tx      0       0       0
 net:keep0      tx+rx   0       0       0
 time   elapsed 10      -       10
-# Number of tasks: 1
-# Max CPU time spent by a single task: 0s
+# Elapsed time: 10s
 # Max CPU usage in a single interval: 0%
 # Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
+# Max memory used: 0.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 0.00%
index 87db98bb37cc468c645b3ce9af03f78e5d024b3e..fa1ad04e7b5171adf3e96d6248a7c29d506eea7b 100644 (file)
@@ -14,15 +14,12 @@ statfs      available       397744787456    -       397744787456
 statfs total   402611240960    -       402611240960
 statfs used    4870303744      52426.18        4866453504
 time   elapsed 20      -       20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Max memory used: 66.30MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! label #1 max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! label #1 max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
index 868f07b684eedad0544723ab50cf5b90a86329bd..e00faafb00f272738605b2b09201dfb61efc09ca 100644 (file)
@@ -25,15 +25,14 @@ statfs      available       397744787456    -       397744787456
 statfs total   402611240960    -       402611240960
 statfs used    4870303744      52426.18        4866453504
 time   elapsed 20      -       20
-# Number of tasks: 1
-# Max CPU time spent by a single task: 2.45s
+# Elapsed time: 20s
 # Max CPU usage in a single interval: 23.70%
 # Overall CPU usage: 12.25%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
+# Requested CPU cores: 1
+# Max memory used: 66.30MB
+# Requested RAM: 2500.00MB
+# Max network traffic: 0.00GB
 # Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 1.21%
-#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+# Keep cache miss rate: 0.00%
+# Keep cache utilization: 0.00%
+# Temp disk utilization: 1.21%
index bf6dd5ceaff9a0e689e9caa7afb6009c724261a5..2b93639281c8a659358f074a4c84f281841bfe12 100644 (file)
@@ -1,9 +1,9 @@
-2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 
+2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr old error message:
 2016-01-07_00:15:33 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: read /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory
-2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr 
+2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr new error message:
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: error reading /proc/3305/net/dev: open /proc/3305/net/dev: no such file or directory
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr
 2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr cancelled job:
-2016-01-07_00:15:34 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt
+2016-01-07_00:15:59 tb05z-8i9sb-khsk5rmf4xjdcbl 20819 0 stderr crunchstat: caught signal: interrupt
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz
deleted file mode 100644 (file)
index bfdcdff..0000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
deleted file mode 100644 (file)
index 173e93f..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-category       metric  task_max        task_max_rate   job_total
-blkio:0:0      read    0       0       0
-blkio:0:0      write   0       0       0
-cpu    cpus    8.00    -       -
-cpu    sys     1.92    0.04    1.92
-cpu    user    3.83    0.09    3.83
-cpu    user+sys        5.75    0.13    5.75
-fuseops        read    0       0       0
-fuseops        write   0       0       0
-keepcache      hit     0       0       0
-keepcache      miss    0       0       0
-keepcalls      get     0       0       0
-keepcalls      put     0       0       0
-mem    cache   1678139392      -       -
-mem    pgmajfault      0       -       0
-mem    rss     349814784       -       -
-mem    swap    0       -       -
-net:eth0       rx      1754364530      41658344.87     1754364530
-net:eth0       tx      38837956        920817.97       38837956
-net:eth0       tx+rx   1793202486      42579162.83     1793202486
-net:keep0      rx      0       0       0
-net:keep0      tx      0       0       0
-net:keep0      tx+rx   0       0       0
-time   elapsed 80      -       80
-# Number of tasks: 1
-# Max CPU time spent by a single task: 5.75s
-# Max CPU usage in a single interval: 13.00%
-# Overall CPU usage: 7.19%
-# Max memory used by a single task: 0.35GB
-# Max network traffic in a single task: 1.79GB
-# Max network speed in a single interval: 42.58MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz
deleted file mode 100644 (file)
index 17af535..0000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
deleted file mode 100644 (file)
index b31a055..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-category       metric  task_max        task_max_rate   job_total
-cpu    cpus    8.00    -       -
-cpu    sys     0       -       0.00
-cpu    user    0       -       0.00
-cpu    user+sys        0       -       0.00
-mem    cache   12288   -       -
-mem    pgmajfault      0       -       0
-mem    rss     856064  -       -
-mem    swap    0       -       -
-net:eth0       rx      90      -       90
-net:eth0       tx      90      -       90
-net:eth0       tx+rx   180     -       180
-time   elapsed 2       -       4
-# Number of tasks: 2
-# Max CPU time spent by a single task: 0s
-# Max CPU usage in a single interval: 0%
-# Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz
deleted file mode 100644 (file)
index 8826f70..0000000
Binary files a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
deleted file mode 100644 (file)
index 9ddf5ac..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-category       metric  task_max        task_max_rate   job_total
-cpu    cpus    8.00    -       -
-cpu    sys     0       -       0.00
-cpu    user    0       -       0.00
-cpu    user+sys        0       -       0.00
-mem    cache   8192    -       -
-mem    pgmajfault      0       -       0
-mem    rss     450560  -       -
-mem    swap    0       -       -
-net:eth0       rx      90      -       90
-net:eth0       tx      90      -       90
-net:eth0       tx+rx   180     -       180
-time   elapsed 2       -       3
-# Number of tasks: 2
-# Max CPU time spent by a single task: 0s
-# Max CPU usage in a single interval: 0%
-# Overall CPU usage: 0.00%
-# Max memory used by a single task: 0.00GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-# Temp disk utilization 0.00%
-#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
index 444cfe4ef83258543f5dd8905afbd6a0b9cf4829..5a20d3283f813341cc47e51b5e46231dc92b6829 100644 (file)
@@ -16,7 +16,7 @@ import sys
 import unittest
 
 from crunchstat_summary.command import UTF8Decode
-from crunchstat_summary import logger
+from crunchstat_summary import logger, reader
 
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -71,14 +71,13 @@ class HTMLFromFile(TestCase):
 class SummarizeEdgeCases(TestCase):
     def test_error_messages(self):
         logfile = io.open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
-        s = crunchstat_summary.summarizer.Summarizer(logfile)
+        s = crunchstat_summary.summarizer.Summarizer(reader.StubReader(logfile))
         s.run()
         self.assertRegex(self.logbuf.getvalue(), r'CPU stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'memory stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'network I/O stats are missing -- possible cluster configuration issue')
         self.assertRegex(self.logbuf.getvalue(), r'storage space stats are missing -- possible cluster configuration issue')
 
-
 class SummarizeContainerCommon(TestCase):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
@@ -106,20 +105,19 @@ class SummarizeContainerCommon(TestCase):
     @mock.patch('arvados.api')
     def check_common(self, mock_api, mock_cr):
         items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
-        # Index and list mean the same thing, but are used in different places in the
-        # code. It's fragile, but exploit that fact to distinguish the two uses.
-        mock_api().container_requests().index().execute.return_value = {'items': [] }  # child_crs
         mock_api().container_requests().list().execute.side_effect = items # parent request
         mock_api().container_requests().get().execute.return_value = self.fake_request
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
             'crunch-run.txt', 'stderr.txt', 'node-info.txt',
             'container.json', 'crunchstat.txt', 'arv-mount.txt']
-        def _open(n):
+        def _open(n, mode):
             if n == "crunchstat.txt":
                 return UTF8Decode(gzip.open(self.logfile))
             elif n == "arv-mount.txt":
                 return UTF8Decode(gzip.open(self.arvmountlog))
+            elif n == "node.json":
+                return io.StringIO("{}")
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             self.arg_strings)
@@ -147,184 +145,3 @@ class SummarizeContainerRequest(SummarizeContainerCommon):
         self.check_common()
         self.assertNotRegex(self.logbuf.getvalue(), r'stats are missing')
         self.assertNotRegex(self.logbuf.getvalue(), r'possible cluster configuration issue')
-
-
-class SummarizeJob(TestCase):
-    fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
-    fake_log_id = 'fake-log-collection-id'
-    fake_job = {
-        'uuid': fake_job_uuid,
-        'log': fake_log_id,
-    }
-    logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_job_report(self, mock_api, mock_cr):
-        mock_api().jobs().get().execute.return_value = self.fake_job
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.return_value = UTF8Decode(gzip.open(self.logfile))
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--job', self.fake_job_uuid])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-        self.diff_known_report(self.logfile, cmd)
-        mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
-        mock_cr.assert_called_with(self.fake_log_id)
-        mock_cr().open.assert_called_with('fake-logfile.txt')
-
-
-class SummarizePipeline(TestCase):
-    fake_instance = {
-        'uuid': 'zzzzz-d1hrv-i3e77t9z5y8j9cc',
-        'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-        'components': collections.OrderedDict([
-            ['foo', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000000',
-                    'log': 'fake-log-pdh-0',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }],
-            ['bar', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000001',
-                    'log': 'fake-log-pdh-1',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }],
-            ['no-job-assigned', {}],
-            ['unfinished-job', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-                },
-            }],
-            ['baz', {
-                'job': {
-                    'uuid': 'zzzzz-8i9sb-000000000000002',
-                    'log': 'fake-log-pdh-2',
-                    'runtime_constraints': {
-                        'min_ram_mb_per_node': 900,
-                        'min_cores_per_node': 1,
-                    },
-                },
-            }]]),
-    }
-
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_pipeline(self, mock_api, mock_cr):
-        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-        mock_api().pipeline_instances().get().execute. \
-            return_value = self.fake_instance
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--pipeline-instance', self.fake_instance['uuid']])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-
-        with io.open(logfile+'.report', encoding='utf-8') as f:
-            job_report = [line for line in f if not line.startswith('#!! ')]
-        expect = (
-            ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n',
-             '(no report generated)\n',
-             '\n'] +
-            ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
-            job_report)
-        self.diff_report(cmd, expect)
-        mock_cr.assert_has_calls(
-            [
-                mock.call('fake-log-pdh-0'),
-                mock.call('fake-log-pdh-1'),
-                mock.call('fake-log-pdh-2'),
-            ], any_order=True)
-        mock_cr().open.assert_called_with('fake-logfile.txt')
-
-
-class SummarizeACRJob(TestCase):
-    fake_job = {
-        'uuid': 'zzzzz-8i9sb-i3e77t9z5y8j9cc',
-        'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-        'components': {
-            'foo': 'zzzzz-8i9sb-000000000000000',
-            'bar': 'zzzzz-8i9sb-000000000000001',
-            'unfinished-job': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-            'baz': 'zzzzz-8i9sb-000000000000002',
-        }
-    }
-    fake_jobs_index = { 'items': [
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000000',
-            'log': 'fake-log-pdh-0',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000001',
-            'log': 'fake-log-pdh-1',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
-        },
-        {
-            'uuid': 'zzzzz-8i9sb-000000000000002',
-            'log': 'fake-log-pdh-2',
-            'runtime_constraints': {
-                'min_ram_mb_per_node': 900,
-                'min_cores_per_node': 1,
-            },
-        },
-    ]}
-    @mock.patch('arvados.collection.CollectionReader')
-    @mock.patch('arvados.api')
-    def test_acr_job(self, mock_api, mock_cr):
-        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-        mock_api().jobs().index().execute.return_value = self.fake_jobs_index
-        mock_api().jobs().get().execute.return_value = self.fake_job
-        mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
-        args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--job', self.fake_job['uuid']])
-        cmd = crunchstat_summary.command.Command(args)
-        cmd.run()
-
-        with io.open(logfile+'.report', encoding='utf-8') as f:
-            job_report = [line for line in f if not line.startswith('#!! ')]
-        expect = (
-            ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n',
-             '(no report generated)\n',
-             '\n'] +
-            ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
-            job_report + ['\n'] +
-            ['### Summary for unfinished-job (partial) (zzzzz-8i9sb-xxxxxxxxxxxxxxx)\n',
-             '(no report generated)\n']
-        )
-        self.diff_report(cmd, expect)
-        mock_cr.assert_has_calls(
-            [
-                mock.call('fake-log-pdh-0'),
-                mock.call('fake-log-pdh-1'),
-                mock.call('fake-log-pdh-2'),
-            ], any_order=True)
-        mock_cr().open.assert_called_with('fake-logfile.txt')