12690 & 12748: Read stats from arv-mount.txt
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Wed, 3 Oct 2018 20:38:30 +0000 (16:38 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Wed, 3 Oct 2018 20:38:30 +0000 (16:38 -0400)
* Compute elapsed time for containers correctly
* Update tests

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz [deleted file]
tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report [deleted file]
tools/crunchstat-summary/tests/test_examples.py

index 0148203e3ff8b0ad2b9c7ba475053d162ff3b9ce..b2f6f1bb700b6d5d2a04f0212c699eb1ace15435 100644 (file)
@@ -37,6 +37,7 @@ WEBCHART_CLASS = crunchstat_summary.dygraphs.DygraphsChart
 class Task(object):
     def __init__(self):
         self.starttime = None
+        self.finishtime = None
         self.series = collections.defaultdict(list)
 
 
@@ -116,13 +117,13 @@ class Summarizer(object):
                     continue
 
                 # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
-                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue
             else:
                 # crunch2
                 # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
-                m = re.search(r'^(?P<timestamp>\S+) (crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue
 
@@ -160,18 +161,21 @@ class Summarizer(object):
                 raise ValueError("Cannot parse timestamp {!r}".format(
                     timestamp))
 
-            if not task.starttime:
-                task.starttime = timestamp
+            if task.starttime is None:
                 logger.debug('%s: task %s starttime %s',
                              self.label, task_id, timestamp)
-            task.finishtime = timestamp
+            if task.starttime is None or timestamp < task.starttime:
+                task.starttime = timestamp
+            if task.finishtime is None or timestamp > task.finishtime:
+                task.finishtime = timestamp
 
-            if not self.starttime:
+            if self.starttime is None or timestamp < task.starttime:
                 self.starttime = timestamp
-            self.finishtime = timestamp
+            if self.finishtime is None or timestamp < task.finishtime:
+                self.finishtime = timestamp
 
-            if not self.detected_crunch1:
-                elapsed = (timestamp - task.starttime).seconds
+            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
+                elapsed = (task.finishtime - task.starttime).seconds
                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
@@ -190,10 +194,16 @@ class Summarizer(object):
                         else:
                             stats[stat] = int(val)
                 except ValueError as e:
-                    logger.warning(
-                        'Error parsing value %r (stat %r, category %r): %r',
-                        val, stat, category, e)
-                    logger.warning('%s', line)
+                    # If the line doesn't start with 'crunchstat:' we
+                    # might have mistaken an error message for a
+                    # structured crunchstat line.
+                    if m.group("crunchstat") is None or m.group("category") == "crunchstat":
+                        logger.warning("%s: log contains message\n  %s", self.label, line)
+                    else:
+                        logger.warning(
+                            '%s: Error parsing value %r (stat %r, category %r): %r',
+                            self.label, val, stat, category, e)
+                        logger.warning('%s', line)
                     continue
                 if 'user' in stats or 'sys' in stats:
                     stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz
new file mode 100644 (file)
index 0000000..ff7dd30
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
new file mode 100644 (file)
index 0000000..9819461
--- /dev/null
@@ -0,0 +1,24 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+time   elapsed 10      -       10
+# Number of tasks: 1
+# Max CPU time spent by a single task: 0s
+# Max CPU usage in a single interval: 0%
+# Overall CPU usage: 0%
+# Max memory used by a single task: 0.00GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 0% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 0 MiB -- try runtime_constraints "ram":0
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz
new file mode 100644 (file)
index 0000000..249ad22
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
new file mode 100644 (file)
index 0000000..b61da15
--- /dev/null
@@ -0,0 +1,27 @@
+category       metric  task_max        task_max_rate   job_total
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
new file mode 100644 (file)
index 0000000..9d3cd78
--- /dev/null
@@ -0,0 +1,38 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz
deleted file mode 100644 (file)
index 8b069e7..0000000
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report
deleted file mode 100644 (file)
index 88e06a3..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-category       metric  task_max        task_max_rate   job_total
-cpu    cpus    20      -       -
-cpu    sys     0.82    0.08    0.82
-cpu    user    2.31    0.22    2.31
-cpu    user+sys        3.13    0.30    3.13
-mem    cache   23846912        -       -
-mem    pgmajfault      121     -       121
-mem    rss     65470464        -       -
-mem    swap    0       -       -
-net:eth0       rx      500762  951.15  500762
-net:eth0       tx      36242   226.61  36242
-net:eth0       tx+rx   537004  1177.76 537004
-# Number of tasks: 1
-# Max CPU time spent by a single task: 3.13s
-# Max CPU usage in a single interval: 29.89%
-# Overall CPU usage: 0%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-#!! container max CPU usage was 30% -- try runtime_constraints "vcpus":1
-#!! container max RSS was 63 MiB -- try runtime_constraints "ram":1020054732
index 6271f5665c040cd45d203cf4e590b56d19d97f86..af92becd80a6875d64e1d406d2b21f8bfbd6ec57 100644 (file)
@@ -61,7 +61,7 @@ class SummarizeEdgeCases(unittest.TestCase):
 
 class SummarizeContainer(ReportDiff):
     fake_container = {
-        'uuid': '9tee4-dz642-mjfb0i5hzojp16a',
+        'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
         'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
     }
@@ -71,8 +71,12 @@ class SummarizeContainer(ReportDiff):
         'created_at': '2017-08-18T14:27:25.242339223Z',
         'container_uuid': fake_container['uuid'],
     }
+    reportfile = os.path.join(
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
     logfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz')
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+    arvmountlog = os.path.join(
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
@@ -82,13 +86,18 @@ class SummarizeContainer(ReportDiff):
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
             'crunch-run.txt', 'stderr.txt', 'node-info.txt',
-            'container.json', 'crunchstat.txt']
-        mock_cr().open.return_value = gzip.open(self.logfile)
+            'container.json', 'crunchstat.txt', 'arv-mount.txt']
+        def _open(n):
+            if n == "crunchstat.txt":
+                return gzip.open(self.logfile)
+            elif n == "arv-mount.txt":
+                return gzip.open(self.arvmountlog)
+        mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_request['uuid']])
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
-        self.diff_known_report(self.logfile, cmd)
+        self.diff_known_report(self.reportfile, cmd)
 
 
 class SummarizeJob(ReportDiff):