Update crunchstat-summary to Python 3. Refs #14939

author Tom Morris <tfmorris@veritasgenetics.com>

Fri, 8 Mar 2019 23:40:24 +0000 (18:40 -0500)

committer Tom Morris <tfmorris@veritasgenetics.com>

Thu, 21 Mar 2019 19:35:36 +0000 (15:35 -0400)
author Tom Morris <tfmorris@veritasgenetics.com>
Fri, 8 Mar 2019 23:40:24 +0000 (18:40 -0500)
committer Tom Morris <tfmorris@veritasgenetics.com>
Thu, 21 Mar 2019 19:35:36 +0000 (15:35 -0400)
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py

index 71bf38357b885952fd9c327317a458a81ab48984..aadc775823caf136c7f7094a0d2b55fcb50f4478 100644 (file)
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -4,6 +4,7 @@
  
  import argparse
  import gzip
+from io import open
  import logging
  import sys
  
@@ -41,6 +42,31 @@ class ArgumentParser(argparse.ArgumentParser):
              help='Log more information (once for progress, twice for debug)')
  
  
+class UTF8Decode(object):
+    '''Wrap a file-like iterable to decode UTF-8 bytes into a strings
+    '''
+    def __init__(self, fh):
+        self.fh = fh
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self.fh).decode('utf-8')
+
+    next = __next__
+
+    def close(self):
+        # mimic Gzip behavior and don't close underlying object
+        pass
+
+
  class Command(object):
      def __init__(self, args):
          self.args = args
@@ -57,9 +83,9 @@ class Command(object):
              self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
          elif self.args.log_file:
              if self.args.log_file.endswith('.gz'):
-                fh = gzip.open(self.args.log_file)
+                fh = UTF8Decode(gzip.open(self.args.log_file))
              else:
-                fh = open(self.args.log_file)
+                fh = open(self.args.log_file, mode = 'r', encoding = 'utf-8')
              self.summer = summarizer.Summarizer(fh, **kwargs)
          else:
              self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
diff --git a/tools/crunchstat-summary/crunchstat_summary/dygraphs.py b/tools/crunchstat-summary/crunchstat_summary/dygraphs.py

index 1314e9df35612817e260e6644212ef2e8a387bc3..6df440a14e37f87f8fcea5cac7c57ca1269915b4 100644 (file)
--- a/tools/crunchstat-summary/crunchstat_summary/dygraphs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/dygraphs.py
@@ -18,7 +18,7 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
              'data': self._collate_data(tasks, stat),
              'options': {
                  'connectSeparatedPoints': True,
-                'labels': ['elapsed']+[uuid for uuid, _ in tasks.iteritems()],
+                'labels': ['elapsed']+[uuid for uuid, _ in tasks.items()],
                  'title': '{}: {} {}'.format(label, stat[0], stat[1]),
              },
          }
@@ -26,7 +26,7 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
      def _collate_data(self, tasks, stat):
          data = []
          nulls = []
-        for uuid, task in tasks.iteritems():
+        for uuid, task in tasks.items():
              for pt in task.series[stat]:
                  data.append([pt[0].total_seconds()] + nulls + [pt[1]])
              nulls.append(None)
diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py

index 311c006c07d882a40ee5af8eaae651ba1e3c7145..8ccdbc2fcf04e45ca3ab3ec6e2270933d050ea1c 100644 (file)
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -2,11 +2,9 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
-from __future__ import print_function
-
  import arvados
  import itertools
-import Queue
+import queue
  import threading
  
  from crunchstat_summary import logger
@@ -87,19 +85,21 @@ class LiveLogReader(object):
              self._queue.put(self.EOF)
  
      def __iter__(self):
-        self._queue = Queue.Queue()
+        self._queue = queue.Queue()
          self._thread = threading.Thread(target=self._get_all_pages)
          self._thread.daemon = True
          self._thread.start()
          return self
  
-    def next(self):
+    def __next__(self):
          line = self._queue.get()
          if line is self.EOF:
              self._thread.join()
              raise StopIteration
          return line
  
+    next = __next__ # for Python 2
+
      def __enter__(self):
          return self
  
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py

index b2f6f1bb700b6d5d2a04f0212c699eb1ace15435..543011ea2c743129fd25dfec8ed96f3c82c8a394 100644 (file)
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -2,8 +2,6 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
-from __future__ import print_function
-
  import arvados
  import collections
  import crunchstat_summary.dygraphs
@@ -209,7 +207,7 @@ class Summarizer(object):
                      stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                  if 'tx' in stats or 'rx' in stats:
                      stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
-                for stat, val in stats.iteritems():
+                for stat, val in stats.items():
                      if group == 'interval':
                          if stat == 'seconds':
                              this_interval_s = val
@@ -236,9 +234,9 @@ class Summarizer(object):
  
          self.job_tot = collections.defaultdict(
              functools.partial(collections.defaultdict, int))
-        for task_id, task_stat in self.task_stats.iteritems():
-            for category, stat_last in task_stat.iteritems():
-                for stat, val in stat_last.iteritems():
+        for task_id, task_stat in self.task_stats.items():
+            for category, stat_last in task_stat.items():
+                for stat, val in stat_last.items():
                      if stat in ['cpus', 'cache', 'swap', 'rss']:
                          # meaningless stats like 16 cpu cores x 5 tasks = 80
                          continue
@@ -273,8 +271,8 @@ class Summarizer(object):
  
      def _text_report_gen(self):
          yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
-        for category, stat_max in sorted(self.stats_max.iteritems()):
-            for stat, val in sorted(stat_max.iteritems()):
+        for category, stat_max in sorted(self.stats_max.items()):
+            for stat, val in sorted(stat_max.items()):
                  if stat.endswith('__rate'):
                      continue
                  max_rate = self._format(stat_max.get(stat+'__rate', '-'))
@@ -539,7 +537,7 @@ class MultiSummarizer(object):
  
      def run(self):
          threads = []
-        for child in self.children.itervalues():
+        for child in self.children.values():
              self.throttle.acquire()
              t = threading.Thread(target=self.run_and_release, args=(child.run, ))
              t.daemon = True
@@ -551,7 +549,7 @@ class MultiSummarizer(object):
      def text_report(self):
          txt = ''
          d = self._descendants()
-        for child in d.itervalues():
+        for child in d.values():
              if len(d) > 1:
                  txt += '### Summary for {} ({})\n'.format(
                      child.label, child.process['uuid'])
@@ -566,7 +564,7 @@ class MultiSummarizer(object):
          MultiSummarizers) are omitted.
          """
          d = collections.OrderedDict()
-        for key, child in self.children.iteritems():
+        for key, child in self.children.items():
              if isinstance(child, Summarizer):
                  d[key] = child
              if isinstance(child, MultiSummarizer):
@@ -574,7 +572,7 @@ class MultiSummarizer(object):
          return d
  
      def html_report(self):
-        return WEBCHART_CLASS(self.label, self._descendants().itervalues()).html()
+        return WEBCHART_CLASS(self.label, iter(self._descendants().values())).html()
  
  
  class JobTreeSummarizer(MultiSummarizer):
@@ -588,7 +586,7 @@ class JobTreeSummarizer(MultiSummarizer):
              preloaded = {}
              for j in arv.jobs().index(
                      limit=len(job['components']),
-                    filters=[['uuid','in',job['components'].values()]]).execute()['items']:
+                    filters=[['uuid','in',list(job['components'].values())]]).execute()['items']:
                  preloaded[j['uuid']] = j
              for cname in sorted(job['components'].keys()):
                  child_uuid = job['components'][cname]
@@ -605,7 +603,7 @@ class JobTreeSummarizer(MultiSummarizer):
  class PipelineSummarizer(MultiSummarizer):
      def __init__(self, instance, **kwargs):
          children = collections.OrderedDict()
-        for cname, component in instance['components'].iteritems():
+        for cname, component in instance['components'].items():
              if 'job' not in component:
                  logger.warning(
                      "%s: skipping component with no job assigned", cname)
@@ -663,7 +661,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
                          cr['name'] = cr.get('name') or cr['uuid']
                          todo.append(cr)
          sorted_children = collections.OrderedDict()
-        for uuid in sorted(children.keys(), key=lambda uuid: children[uuid].sort_key):
+        for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
              sorted_children[uuid] = children[uuid]
          super(ContainerTreeSummarizer, self).__init__(
              children=sorted_children,
diff --git a/tools/crunchstat-summary/crunchstat_summary/webchart.py b/tools/crunchstat-summary/crunchstat_summary/webchart.py

index 9d18883ce2506d71abe03e08abde2fee28006343..91a196ffbe660b4c9c5da0962896472ea319e76f 100644 (file)
--- a/tools/crunchstat-summary/crunchstat_summary/webchart.py
+++ b/tools/crunchstat-summary/crunchstat_summary/webchart.py
@@ -33,7 +33,7 @@ class WebChart(object):
      def js(self):
          return 'var chartdata = {};\n{}'.format(
              json.dumps(self.sections()),
-            '\n'.join([pkg_resources.resource_string('crunchstat_summary', jsa) for jsa in self.JSASSETS]))
+            '\n'.join([pkg_resources.resource_string('crunchstat_summary', jsa).decode('utf-8') for jsa in self.JSASSETS]))
  
      def sections(self):
          return [
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py

index af92becd80a6875d64e1d406d2b21f8bfbd6ec57..c25a908118665810cfc15c6cfc8e44445adcce2c 100644 (file)
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -8,20 +8,23 @@ import crunchstat_summary.command
  import difflib
  import glob
  import gzip
+from io import open
  import mock
  import os
  import unittest
  
+from crunchstat_summary.command import UTF8Decode
+
  TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
  
  
  class ReportDiff(unittest.TestCase):
      def diff_known_report(self, logfile, cmd):
          expectfile = logfile+'.report'
-        expect = open(expectfile).readlines()
+        expect = open(expectfile, encoding='utf-8').readlines()
          self.diff_report(cmd, expect, expectfile=expectfile)
  
-    def diff_report(self, cmd, expect, expectfile=None):
+    def diff_report(self, cmd, expect, expectfile='(expected)'):
          got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
          self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
              expect, got, fromfile=expectfile, tofile="(generated)")))
@@ -51,10 +54,9 @@ class HTMLFromFile(ReportDiff):
              cmd.run()
              self.assertRegexpMatches(cmd.report(), r'(?is)<html>.*</html>\s*$')
  
-
  class SummarizeEdgeCases(unittest.TestCase):
      def test_error_messages(self):
-        logfile = open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'))
+        logfile = open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
          s = crunchstat_summary.summarizer.Summarizer(logfile)
          s.run()
  
@@ -89,9 +91,9 @@ class SummarizeContainer(ReportDiff):
              'container.json', 'crunchstat.txt', 'arv-mount.txt']
          def _open(n):
              if n == "crunchstat.txt":
-                return gzip.open(self.logfile)
+                return UTF8Decode(gzip.open(self.logfile))
              elif n == "arv-mount.txt":
-                return gzip.open(self.arvmountlog)
+                return UTF8Decode(gzip.open(self.arvmountlog))
          mock_cr().open.side_effect = _open
          args = crunchstat_summary.command.ArgumentParser().parse_args(
              ['--job', self.fake_request['uuid']])
@@ -114,7 +116,7 @@ class SummarizeJob(ReportDiff):
      def test_job_report(self, mock_api, mock_cr):
          mock_api().jobs().get().execute.return_value = self.fake_job
          mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.return_value = gzip.open(self.logfile)
+        mock_cr().open.return_value = UTF8Decode(gzip.open(self.logfile))
          args = crunchstat_summary.command.ArgumentParser().parse_args(
              ['--job', self.fake_job_uuid])
          cmd = crunchstat_summary.command.Command(args)
@@ -175,14 +177,14 @@ class SummarizePipeline(ReportDiff):
          mock_api().pipeline_instances().get().execute. \
              return_value = self.fake_instance
          mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
+        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
          args = crunchstat_summary.command.ArgumentParser().parse_args(
              ['--pipeline-instance', self.fake_instance['uuid']])
          cmd = crunchstat_summary.command.Command(args)
          cmd.run()
  
          job_report = [
-            line for line in open(logfile+'.report').readlines()
+            line for line in open(logfile+'.report', encoding='utf-8').readlines()
              if not line.startswith('#!! ')]
          expect = (
              ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
@@ -251,14 +253,14 @@ class SummarizeACRJob(ReportDiff):
          mock_api().jobs().index().execute.return_value = self.fake_jobs_index
          mock_api().jobs().get().execute.return_value = self.fake_job
          mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
+        mock_cr().open.side_effect = [UTF8Decode(gzip.open(logfile)) for _ in range(3)]
          args = crunchstat_summary.command.ArgumentParser().parse_args(
              ['--job', self.fake_job['uuid']])
          cmd = crunchstat_summary.command.Command(args)
          cmd.run()
  
          job_report = [
-            line for line in open(logfile+'.report').readlines()
+            line for line in open(logfile+'.report', encoding='utf-8').readlines()
              if not line.startswith('#!! ')]
          expect = (
              ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n',
author	Tom Morris <tfmorris@veritasgenetics.com>
	Fri, 8 Mar 2019 23:40:24 +0000 (18:40 -0500)
committer	Tom Morris <tfmorris@veritasgenetics.com>
	Thu, 21 Mar 2019 19:35:36 +0000 (15:35 -0400)
tools/crunchstat-summary/crunchstat_summary/command.py		patch \| blob \| history
tools/crunchstat-summary/crunchstat_summary/dygraphs.py		patch \| blob \| history
tools/crunchstat-summary/crunchstat_summary/reader.py		patch \| blob \| history
tools/crunchstat-summary/crunchstat_summary/summarizer.py		patch \| blob \| history
tools/crunchstat-summary/crunchstat_summary/webchart.py		patch \| blob \| history
tools/crunchstat-summary/tests/test_examples.py		patch \| blob \| history