tools/crunchstat-summary/crunchstat_summary/reader.py

   1 # Copyright (C) The Arvados Authors. All rights reserved.
   2 #
   3 # SPDX-License-Identifier: AGPL-3.0
   4
   5 from __future__ import print_function
   6
   7 import arvados
   8 import Queue
   9 import threading
  10 import _strptime
  11
  12 from crunchstat_summary import logger
  13
  14
  15 class CollectionReader(object):
  16     def __init__(self, collection_id):
  17         logger.debug('load collection %s', collection_id)
  18         collection = arvados.collection.CollectionReader(collection_id)
  19         filenames = [filename for filename in collection]
  20         if len(filenames) != 1:
  21             raise ValueError(
  22                 "collection {} has {} files; need exactly one".format(
  23                     collection_id, len(filenames)))
  24         self._reader = collection.open(filenames[0])
  25         self._label = "{}/{}".format(collection_id, filenames[0])
  26
  27     def __str__(self):
  28         return self._label
  29
  30     def __iter__(self):
  31         return iter(self._reader)
  32
  33
  34 class LiveLogReader(object):
  35     EOF = None
  36
  37     def __init__(self, job_uuid):
  38         logger.debug('load stderr events for job %s', job_uuid)
  39         self.job_uuid = job_uuid
  40
  41     def __str__(self):
  42         return self.job_uuid
  43
  44     def _get_all_pages(self):
  45         got = 0
  46         last_id = 0
  47         filters = [
  48             ['object_uuid', '=', self.job_uuid],
  49             ['event_type', '=', 'stderr']]
  50         try:
  51             while True:
  52                 page = arvados.api().logs().index(
  53                     limit=1000,
  54                     order=['id asc'],
  55                     filters=filters + [['id','>',str(last_id)]],
  56                     select=['id', 'properties'],
  57                 ).execute(num_retries=2)
  58                 got += len(page['items'])
  59                 logger.debug(
  60                     '%s: received %d of %d log events',
  61                     self.job_uuid, got,
  62                     got + page['items_available'] - len(page['items']))
  63                 for i in page['items']:
  64                     for line in i['properties']['text'].split('\n'):
  65                         self._queue.put(line+'\n')
  66                     last_id = i['id']
  67                 if (len(page['items']) == 0 or
  68                     len(page['items']) >= page['items_available']):
  69                     break
  70         finally:
  71             self._queue.put(self.EOF)
  72
  73     def __iter__(self):
  74         self._queue = Queue.Queue()
  75         self._thread = threading.Thread(target=self._get_all_pages)
  76         self._thread.daemon = True
  77         self._thread.start()
  78         return self
  79
  80     def next(self):
  81         line = self._queue.get()
  82         if line is self.EOF:
  83             self._thread.join()
  84             raise StopIteration
  85         return line