20705: Warn if some stats are missing.
authorTom Clegg <tom@curii.com>
Mon, 31 Jul 2023 17:54:24 +0000 (13:54 -0400)
committerTom Clegg <tom@curii.com>
Mon, 31 Jul 2023 17:54:24 +0000 (13:54 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

tools/crunchstat-summary/crunchstat_summary/__init__.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/test_examples.py

index 9bdf3589ab6ef0589dcac19ef3f44194220f84ba..610766e198589078bfe4601f452c3088b2a73f50 100644 (file)
@@ -3,6 +3,9 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 import logging
+import sys
+
 
 logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
+logger.addHandler(logging.StreamHandler(stream=sys.stderr))
+logger.setLevel(logging.WARNING)
index 463c552c4f1eb5caf0868337858197a747bc8fa8..a876257abc2a5ae00c8def17f4ae23e0219de446 100644 (file)
@@ -245,6 +245,27 @@ class Summarizer(object):
                     self.job_tot[category][stat] += val
         logger.debug('%s: done totals', self.label)
 
+        missing_category = {
+            'cpu': 'CPU',
+            'mem': 'memory',
+            'net:': 'network I/O',
+            'statfs': 'storage space',
+        }
+        for task_stat in self.task_stats.values():
+            for category in task_stat.keys():
+                for checkcat in missing_category:
+                    if checkcat.endswith(':'):
+                        if category.startswith(checkcat):
+                            missing_category.pop(checkcat)
+                            break
+                    else:
+                        if category == checkcat:
+                            missing_category.pop(checkcat)
+                            break
+        for catlabel in missing_category.values():
+            logger.warning('%s: %s stats are missing -- possible cluster configuration issue',
+                        self.label, catlabel)
+
     def long_label(self):
         label = self.label
         if hasattr(self, 'process') and self.process['uuid'] not in label:
index fb23eab39e9072f9b44ac5e3b766d25c524e5668..444cfe4ef83258543f5dd8905afbd6a0b9cf4829 100644 (file)
@@ -8,21 +8,32 @@ import crunchstat_summary.command
 import difflib
 import glob
 import gzip
-from io import open
+import io
+import logging
 import mock
 import os
 import sys
 import unittest
 
 from crunchstat_summary.command import UTF8Decode
+from crunchstat_summary import logger
 
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
-class ReportDiff(unittest.TestCase):
+class TestCase(unittest.TestCase):
+    def setUp(self):
+        self.logbuf = io.StringIO()
+        self.loghandler = logging.StreamHandler(stream=self.logbuf)
+        logger.addHandler(self.loghandler)
+        logger.setLevel(logging.WARNING)
+
+    def tearDown(self):
+        logger.removeHandler(self.loghandler)
+
     def diff_known_report(self, logfile, cmd):
         expectfile = logfile+'.report'
-        with open(expectfile, encoding='utf-8') as f:
+        with io.open(expectfile, encoding='utf-8') as f:
             expect = f.readlines()
         self.diff_report(cmd, expect, expectfile=expectfile)
 
@@ -32,7 +43,7 @@ class ReportDiff(unittest.TestCase):
             expect, got, fromfile=expectfile, tofile="(generated)")))
 
 
-class SummarizeFile(ReportDiff):
+class SummarizeFile(TestCase):
     def test_example_files(self):
         for fnm in glob.glob(os.path.join(TESTS_DIR, '*.txt.gz')):
             logfile = os.path.join(TESTS_DIR, fnm)
@@ -43,7 +54,7 @@ class SummarizeFile(ReportDiff):
             self.diff_known_report(logfile, cmd)
 
 
-class HTMLFromFile(ReportDiff):
+class HTMLFromFile(TestCase):
     def test_example_files(self):
         # Note we don't test the output content at all yet; we're
         # mainly just verifying the --format=html option isn't ignored
@@ -54,20 +65,21 @@ class HTMLFromFile(ReportDiff):
                 ['--format=html', '--log-file', logfile])
             cmd = crunchstat_summary.command.Command(args)
             cmd.run()
-            if sys.version_info >= (3,2):
-                self.assertRegex(cmd.report(), r'(?is)<html>.*</html>\s*$')
-            else:
-                self.assertRegexpMatches(cmd.report(), r'(?is)<html>.*</html>\s*$')
+            self.assertRegex(cmd.report(), r'(?is)<html>.*</html>\s*$')
 
 
-class SummarizeEdgeCases(unittest.TestCase):
+class SummarizeEdgeCases(TestCase):
     def test_error_messages(self):
-        logfile = open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
+        logfile = io.open(os.path.join(TESTS_DIR, 'crunchstat_error_messages.txt'), encoding='utf-8')
         s = crunchstat_summary.summarizer.Summarizer(logfile)
         s.run()
+        self.assertRegex(self.logbuf.getvalue(), r'CPU stats are missing -- possible cluster configuration issue')
+        self.assertRegex(self.logbuf.getvalue(), r'memory stats are missing -- possible cluster configuration issue')
+        self.assertRegex(self.logbuf.getvalue(), r'network I/O stats are missing -- possible cluster configuration issue')
+        self.assertRegex(self.logbuf.getvalue(), r'storage space stats are missing -- possible cluster configuration issue')
 
 
-class SummarizeContainerCommon(ReportDiff):
+class SummarizeContainerCommon(TestCase):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
@@ -133,9 +145,11 @@ class SummarizeContainerRequest(SummarizeContainerCommon):
 
     def test_container_request(self):
         self.check_common()
+        self.assertNotRegex(self.logbuf.getvalue(), r'stats are missing')
+        self.assertNotRegex(self.logbuf.getvalue(), r'possible cluster configuration issue')
 
 
-class SummarizeJob(ReportDiff):
+class SummarizeJob(TestCase):
     fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'
     fake_job = {
@@ -160,7 +174,7 @@ class SummarizeJob(ReportDiff):
         mock_cr().open.assert_called_with('fake-logfile.txt')
 
 
-class SummarizePipeline(ReportDiff):
+class SummarizePipeline(TestCase):
     fake_instance = {
         'uuid': 'zzzzz-d1hrv-i3e77t9z5y8j9cc',
         'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
@@ -216,7 +230,7 @@ class SummarizePipeline(ReportDiff):
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
 
-        with open(logfile+'.report', encoding='utf-8') as f:
+        with io.open(logfile+'.report', encoding='utf-8') as f:
             job_report = [line for line in f if not line.startswith('#!! ')]
         expect = (
             ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
@@ -238,7 +252,7 @@ class SummarizePipeline(ReportDiff):
         mock_cr().open.assert_called_with('fake-logfile.txt')
 
 
-class SummarizeACRJob(ReportDiff):
+class SummarizeACRJob(TestCase):
     fake_job = {
         'uuid': 'zzzzz-8i9sb-i3e77t9z5y8j9cc',
         'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
@@ -291,7 +305,7 @@ class SummarizeACRJob(ReportDiff):
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
 
-        with open(logfile+'.report', encoding='utf-8') as f:
+        with io.open(logfile+'.report', encoding='utf-8') as f:
             job_report = [line for line in f if not line.startswith('#!! ')]
         expect = (
             ['### Summary for zzzzz-8i9sb-i3e77t9z5y8j9cc (partial) (zzzzz-8i9sb-i3e77t9z5y8j9cc)\n',