tools/crunchstat-summary/crunchstat_summary/reader.py

   1 from __future__ import print_function
   2
   3 import arvados
   4 import collections
   5 import threading
   6
   7 from crunchstat_summary import logger
   8
   9
  10 class CollectionReader(object):
  11     def __init__(self, collection_id):
  12         logger.debug('load collection %s', collection_id)
  13         collection = arvados.collection.CollectionReader(collection_id)
  14         filenames = [filename for filename in collection]
  15         if len(filenames) != 1:
  16             raise ValueError(
  17                 "collection {} has {} files; need exactly one".format(
  18                     collection_id, len(filenames)))
  19         self._reader = collection.open(filenames[0])
  20
  21     def __iter__(self):
  22         return iter(self._reader)
  23
  24
  25 class LiveLogReader(object):
  26     def __init__(self, job_uuid):
  27         logger.debug('load stderr events for job %s', job_uuid)
  28         self._filters = [
  29             ['object_uuid', '=', job_uuid],
  30             ['event_type', '=', 'stderr']]
  31         self._buffer = collections.deque()
  32         self._got = 0
  33         self._label = job_uuid
  34         self._last_id = 0
  35         self._start_getting_next_page()
  36
  37     def _start_getting_next_page(self):
  38         self._thread = threading.Thread(target=self._get_next_page)
  39         self._thread.daemon = True
  40         self._thread.start()
  41
  42     def _get_next_page(self):
  43         page = arvados.api().logs().index(
  44             limit=1000,
  45             order=['id asc'],
  46             filters=self._filters + [['id','>',str(self._last_id)]],
  47         ).execute()
  48         self._got += len(page['items'])
  49         logger.debug(
  50             '%s: received %d of %d log events',
  51             self._label, self._got,
  52             self._got + page['items_available'] - len(page['items']))
  53         self._page = page
  54
  55     def _buffer_page(self):
  56         """Wait for current worker, copy results to _buffer, start next worker.
  57
  58         Return True if anything was added to the buffer."""
  59         if self._thread is None:
  60             return False
  61         self._thread.join()
  62         self._thread = None
  63         page = self._page
  64         if len(page['items']) == 0:
  65             return False
  66         if len(page['items']) < page['items_available']:
  67             self._start_getting_next_page()
  68         for i in page['items']:
  69             for line in i['properties']['text'].split('\n'):
  70                 self._buffer.append(line)
  71             self._last_id = i['id']
  72         return True
  73
  74     def __iter__(self):
  75         return self
  76
  77     def next(self):
  78         if len(self._buffer) == 0:
  79             if not self._buffer_page():
  80                 raise StopIteration
  81         return self._buffer.popleft() + '\n'