12690: Report Keep stats for Crunch2 container requests.
authorTom Morris <tfmorris@veritasgenetics.com>
Tue, 5 Dec 2017 00:00:14 +0000 (19:00 -0500)
committerTom Morris <tfmorris@veritasgenetics.com>
Tue, 14 Aug 2018 20:21:32 +0000 (16:21 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>

tools/crunchstat-summary/crunchstat_summary/reader.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py

index 98dda673d5a3ab70d65ab1d3989b49f539959b69..5db7e75eb36f464d70fcc44ad43da54c1dd20d66 100644 (file)
@@ -5,6 +5,7 @@
 from __future__ import print_function
 
 import arvados
+import itertools
 import Queue
 import threading
 import _strptime
@@ -16,7 +17,7 @@ class CollectionReader(object):
     def __init__(self, collection_id):
         self._collection_id = collection_id
         self._label = collection_id
-        self._reader = None
+        self._readers = []
 
     def __str__(self):
         return self._label
@@ -25,21 +26,22 @@ class CollectionReader(object):
         logger.debug('load collection %s', self._collection_id)
         collection = arvados.collection.CollectionReader(self._collection_id)
         filenames = [filename for filename in collection]
-        if len(filenames) == 1:
-            filename = filenames[0]
-        else:
-            filename = 'crunchstat.txt'
-        self._label = "{}/{}".format(self._collection_id, filename)
-        self._reader = collection.open(filename)
-        return iter(self._reader)
+        # Crunch2 has multiple stats files
+        if len(filenames) > 1:
+            filenames = ['crunchstat.txt', 'arv-mount.txt']
+        for filename in filenames:
+            self._readers.append(collection.open(filename))
+        self._label = "{}/{}".format(self._collection_id, filenames[0])
+        return itertools.chain(*[iter(reader) for reader in self._readers])
 
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        if self._reader:
-            self._reader.close()
-            self._reader = None
+        if self._readers:
+            for reader in self._readers:
+                reader.close()
+            self._readers = []
 
 
 class LiveLogReader(object):
index d91161c70c3aabdaa6223063946686f437adeaa0..ea30d0c5493ee26216b29127be375d818a9a1599 100644 (file)
@@ -115,12 +115,14 @@ class Summarizer(object):
                     logger.debug('%s: done %s', self.label, uuid)
                     continue
 
+                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
                 m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue
             else:
                 # crunch2
-                m = re.search(r'^(?P<timestamp>\S+) (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
+                m = re.search(r'^(?P<timestamp>\S+) (crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue