Update crunchstat-summary to Python 3. Refs #14939
authorTom Morris <tfmorris@veritasgenetics.com>
Fri, 8 Mar 2019 23:40:24 +0000 (18:40 -0500)
committerTom Morris <tfmorris@veritasgenetics.com>
Thu, 21 Mar 2019 19:35:36 +0000 (15:35 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>

tools/crunchstat-summary/crunchstat_summary/command.py
tools/crunchstat-summary/crunchstat_summary/dygraphs.py
tools/crunchstat-summary/crunchstat_summary/reader.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/crunchstat_summary/webchart.py
tools/crunchstat-summary/tests/test_examples.py

index 71bf38357b885952fd9c327317a458a81ab48984..aadc775823caf136c7f7094a0d2b55fcb50f4478 100644 (file)
@@ -4,6 +4,7 @@
 
 import argparse
 import gzip
+from io import open
 import logging
 import sys
 
@@ -41,6 +42,31 @@ class ArgumentParser(argparse.ArgumentParser):
             help='Log more information (once for progress, twice for debug)')
 
 
+class UTF8Decode(object):
+    '''Wrap a file-like iterable to decode UTF-8 bytes into a strings
+    '''
+    def __init__(self, fh):
+        self.fh = fh
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self.fh).decode('utf-8')
+
+    next = __next__
+
+    def close(self):
+        # mimic Gzip behavior and don't close underlying object
+        pass
+
+
 class Command(object):
     def __init__(self, args):
         self.args = args
@@ -57,9 +83,9 @@ class Command(object):
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
-                fh = gzip.open(self.args.log_file)
+                fh = UTF8Decode(gzip.open(self.args.log_file))
             else:
-                fh = open(self.args.log_file)
+                fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8')
             self.summer = summarizer.Summarizer(fh, **kwargs)
         else:
             self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
index 1314e9df35612817e260e6644212ef2e8a387bc3..6df440a14e37f87f8fcea5cac7c57ca1269915b4 100644 (file)
@@ -18,7 +18,7 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
             'data': self._collate_data(tasks, stat),
             'options': {
                 'connectSeparatedPoints': True,
-                'labels': ['elapsed']+[uuid for uuid, _ in tasks.iteritems()],
+                'labels': ['elapsed']+[uuid for uuid, _ in tasks.items()],
                 'title': '{}: {} {}'.format(label, stat[0], stat[1]),
             },
         }
@@ -26,7 +26,7 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
     def _collate_data(self, tasks, stat):
         data = []
         nulls = []
-        for uuid, task in tasks.iteritems():
+        for uuid, task in tasks.items():
             for pt in task.series[stat]:
                 data.append([pt[0].total_seconds()] + nulls + [pt[1]])
             nulls.append(None)
index 311c006c07d882a40ee5af8eaae651ba1e3c7145..8ccdbc2fcf04e45ca3ab3ec6e2270933d050ea1c 100644 (file)
@@ -2,11 +2,9 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-from __future__ import print_function
-
 import arvados
 import itertools
-import Queue
+import queue
 import threading
 
 from crunchstat_summary import logger
@@ -87,19 +85,21 @@ class LiveLogReader(object):
             self._queue.put(self.EOF)
 
     def __iter__(self):
-        self._queue = Queue.Queue()
+        self._queue = queue.Queue()
         self._thread = threading.Thread(target=self._get_all_pages)
         self._thread.daemon = True
         self._thread.start()
         return self
 
-    def next(self):
+    def __next__(self):
         line = self._queue.get()
         if line is self.EOF:
             self._thread.join()
             raise StopIteration
         return line
 
+    next = __next__ # for Python 2
+
     def __enter__(self):
         return self
 
index b2f6f1bb700b6d5d2a04f0212c699eb1ace15435..543011ea2c743129fd25dfec8ed96f3c82c8a394 100644 (file)
@@ -2,8 +2,6 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-from __future__ import print_function
-
 import arvados
 import collections
 import crunchstat_summary.dygraphs
@@ -209,7 +207,7 @@ class Summarizer(object):
                     stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                 if 'tx' in stats or 'rx' in stats:
                     stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
-                for stat, val in stats.iteritems():
+                for stat, val in stats.items():
                     if group == 'interval':
                         if stat == 'seconds':
                             this_interval_s = val
@@ -236,9 +234,9 @@ class Summarizer(object):
 
         self.job_tot = collections.defaultdict(
             functools.partial(collections.defaultdict, int))
-        for task_id, task_stat in self.task_stats.iteritems():
-            for category, stat_last in task_stat.iteritems():
-                for stat, val in stat_last.iteritems():
+        for task_id, task_stat in self.task_stats.items():
+            for category, stat_last in task_stat.items():
+                for stat, val in stat_last.items():
                     if stat in ['cpus', 'cache', 'swap', 'rss']:
                         # meaningless stats like 16 cpu cores x 5 tasks = 80
                         continue
@@ -273,8 +271,8 @@ class Summarizer(object):
 
     def _text_report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
-        for category, stat_max in sorted(self.stats_max.iteritems()):
-            for stat, val in sorted(stat_max.iteritems()):
+        for category, stat_max in sorted(self.stats_max.items()):
+            for stat, val in sorted(stat_max.items()):
                 if stat.endswith('__rate'):
                     continue
                 max_rate = self._format(stat_max.get(stat+'__rate', '-'))
@@ -539,7 +537,7 @@ class MultiSummarizer(object):
 
     def run(self):
         threads = []
-        for child in self.children.itervalues():
+        for child in self.children.values():
             self.throttle.acquire()
             t = threading.Thread(target=self.run_and_release, args=(child.run, ))
             t.daemon = True
@@ -551,7 +549,7 @@ class MultiSummarizer(object):
     def text_report(self):
         txt = ''
         d = self._descendants()
-        for child in d.itervalues():
+        for child in d.values():
             if len(d) > 1:
                 txt += '### Summary for {} ({})\n'.format(
                     child.label, child.process['uuid'])
@@ -566,7 +564,7 @@ class MultiSummarizer(object):
         MultiSummarizers) are omitted.
         """
         d = collections.OrderedDict()
-        for key, child in self.children.iteritems():
+        for key, child in self.children.items():
             if isinstance(child, Summarizer):
                 d[key] = child
             if isinstance(child, MultiSummarizer):
@@ -574,7 +572,7 @@ class MultiSummarizer(object):
         return d
 
     def html_report(self):
-        return WEBCHART_CLASS(self.label, self._descendants().itervalues()).html()
+        return WEBCHART_CLASS(self.label, iter(self._descendants().values())).html()
 
 
 class JobTreeSummarizer(MultiSummarizer):
@@ -588,7 +586,7 @@ class JobTreeSummarizer(MultiSummarizer):
             preloaded = {}
             for j in arv.jobs().index(
                     limit=len(job['components']),
-                    filters=[['uuid','in',job['components'].values()]]).execute()['items']:
+                    filters=[['uuid','in',list(job['components'].values())]]).execute()['items']:
                 preloaded[j['uuid']] = j
             for cname in sorted(job['components'].keys()):
                 child_uuid = job['components'][cname]
@@ -605,7 +603,7 @@ class JobTreeSummarizer(MultiSummarizer):
 class PipelineSummarizer(MultiSummarizer):
     def __init__(self, instance, **kwargs):
         children = collections.OrderedDict()
-        for cname, component in instance['components'].iteritems():
+        for cname, component in instance['components'].items():
             if 'job' not in component:
                 logger.warning(
                     "%s: skipping component with no job assigned", cname)
@@ -663,7 +661,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
                         cr['name'] = cr.get('name') or cr['uuid']
                         todo.append(cr)
         sorted_children = collections.OrderedDict()
-        for uuid in sorted(children.keys(), key=lambda uuid: children[uuid].sort_key):
+        for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
             sorted_children[uuid] = children[uuid]
         super(ContainerTreeSummarizer, self).__init__(
             children=sorted_children,
index 9d18883ce2506d71abe03e08abde2fee28006343..91a196ffbe660b4c9c5da0962896472ea319e76f 100644 (file)
@@ -33,7 +33,7 @@ class WebChart(object):
     def js(self):
         return 'var chartdata = {};\n{}'.format(
             json.dumps(self.sections()),
-            '\n'.join([pkg_resources.resource_string('crunchstat_summary', jsa) for jsa in self.JSASSETS]))
+            '\n'.join([pkg_resources.resource_string('crunchstat_summary', jsa).decode('utf-8') for jsa in self.JSASSETS]))
 
     def sections(self):
         return [
index af92becd80a6875d64e1d406d2b21f8bfbd6ec57..c25a908118665810cfc15c6cfc8e44445adcce2c 100644 (file)
@@ -8,20 +8,23 @@ import crunchstat_summary.command
 import difflib
 import glob
 import gzip
+from io import open
 import mock
 import os
 import unittest
 
+from crunchstat_summary.command import UTF8Decode
+
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 class ReportDiff(unittest.TestCase):
     def diff_known_report(self, logfile, cmd):
         expectfile = logfile+'.report'
-        expect = open(expectfile).readlines()
+        expect = open(expectfile, encoding='utf-8').readlines()
         self.diff_report(cmd, expect, expectfile=expectfile)
 
-    def diff_report(self, cmd, expect, expectfile=None):
+    def diff_report(self, cmd, expect, expectfile='(expected)'):
         got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
         self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
             expect, got, fromfile=expectfile, tofile="(generated)")))
@@ -51,10 +54,9 @@ class HTMLFromFile(ReportDiff):
             cmd.run()
             self.assertRegexpMatches(cmd.report(), r'(?is)<html>.*</html>\s*$')
 
-
 class SummarizeEdgeCases(unittest.TestCase):
     def test_error_messages(self):
-        logfile = open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'))
+        logfile = open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
         s = crunchstat_summary.summarizer.Summarizer(logfile)
         s.run()
 
@@ -89,9 +91,9 @@ class SummarizeContainer(ReportDiff):
             'container.json', 'crunchstat.txt', 'arv-mount.txt']
         def _open(n):
             if n == "crunchstat.txt":
-                return gzip.open(self.logfile)
+                return UTF8Decode(gzip.open(self.logfile))
             elif n == "arv-mount.txt":
-                return gzip.open(self.arvmountlog)
+                return UTF8Decode(gzip.open(self.arvmountlog))
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_request['uuid']])
@@ -114,7 +116,7 @@ class SummarizeJob(ReportDiff):
     def test_job_report(self, mock_api, mock_cr):
         mock_api().jobs().get().execute.return_value = self.fake_job
         mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.return_value = gzip.open(self.logfile)
+        mock_cr().open.return_value = UTF8Decode(gzip.open(self.logfile))
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_job_uuid])
         cmd = crunchstat_summary.command.Command(args)
@@ -175,14 +177,14 @@ class SummarizePipeline(ReportDiff):
         mock_api().pipeline_instances().get().execute. \
             return_value = self.fake_instance
         mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
+        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--pipeline-instance', self.fake_instance['uuid']])
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
 
         job_report = [
-            line for line in open(logfile+'.report').readlines()
+            line for line in open(logfile+'.report', encoding='utf-8').readlines()
             if not line.startswith('#!! ')]
         expect = (
             ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
@@ -251,14 +253,14 @@ class SummarizeACRJob(ReportDiff):
         mock_api().jobs().index().execute.return_value = self.fake_jobs_index
         mock_api().jobs().get().execute.return_value = self.fake_job
         mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
+        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_job['uuid']])
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
 
         job_report = [
-            line for line in open(logfile+'.report').readlines()
+            line for line in open(logfile+'.report', encoding='utf-8').readlines()
             if not line.startswith('#!! ')]
         expect = (
             ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n',