Use default of 1 if no cores requested. refs #12026
[arvados.git] / tools / crunchstat-summary / crunchstat_summary / summarizer.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 import arvados
6 import collections
7 import crunchstat_summary.dygraphs
8 import crunchstat_summary.reader
9 import datetime
10 import functools
11 import itertools
12 import math
13 import re
14 import sys
15 import threading
16 import _strptime
17
18 from arvados.api import OrderedJsonModel
19 from crunchstat_summary import logger
20
21 # Recommend memory constraints that are this multiple of an integral
22 # number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
23 # that have amounts like 7.5 GiB according to the kernel.)
24 AVAILABLE_RAM_RATIO = 0.95
25 MB=2**20
26
27 # Workaround datetime.datetime.strptime() thread-safety bug by calling
28 # it once before starting threads.  https://bugs.python.org/issue7980
29 datetime.datetime.strptime('1999-12-31_23:59:59', '%Y-%m-%d_%H:%M:%S')
30
31
32 WEBCHART_CLASS = crunchstat_summary.dygraphs.DygraphsChart
33
34
35 class Task(object):
36     def __init__(self):
37         self.starttime = None
38         self.finishtime = None
39         self.series = collections.defaultdict(list)
40
41
42 class Summarizer(object):
43     def __init__(self, logdata, label=None, skip_child_jobs=False, uuid=None, **kwargs):
44         self._logdata = logdata
45
46         self.uuid = uuid
47         self.label = label
48         self.starttime = None
49         self.finishtime = None
50         self._skip_child_jobs = skip_child_jobs
51
52         # stats_max: {category: {stat: val}}
53         self.stats_max = collections.defaultdict(
54             functools.partial(collections.defaultdict, lambda: 0))
55         # task_stats: {task_id: {category: {stat: val}}}
56         self.task_stats = collections.defaultdict(
57             functools.partial(collections.defaultdict, dict))
58
59         self.seq_to_uuid = {}
60         self.tasks = collections.defaultdict(Task)
61
62         # We won't bother recommending new runtime constraints if the
63         # constraints given when running the job are known to us and
64         # are already suitable.  If applicable, the subclass
65         # constructor will overwrite this with something useful.
66         self.existing_constraints = {}
67
68         logger.debug("%s: logdata %s", self.label, logdata)
69
70     def run(self):
71         logger.debug("%s: parsing logdata %s", self.label, self._logdata)
72         with self._logdata as logdata:
73             self._run(logdata)
74
75     def _run(self, logdata):
76         self.detected_crunch1 = False
77         for line in logdata:
78             if not self.detected_crunch1 and '-8i9sb-' in line:
79                 self.detected_crunch1 = True
80
81             if self.detected_crunch1:
82                 m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
83                 if m:
84                     seq = int(m.group('seq'))
85                     uuid = m.group('task_uuid')
86                     self.seq_to_uuid[seq] = uuid
87                     logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
88                     continue
89
90                 m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
91                 if m:
92                     task_id = self.seq_to_uuid[int(m.group('seq'))]
93                     elapsed = int(m.group('elapsed'))
94                     self.task_stats[task_id]['time'] = {'elapsed': elapsed}
95                     if elapsed > self.stats_max['time']['elapsed']:
96                         self.stats_max['time']['elapsed'] = elapsed
97                     continue
98
99                 m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
100                 if m:
101                     uuid = m.group('uuid')
102                     if self._skip_child_jobs:
103                         logger.warning('%s: omitting stats from child job %s'
104                                        ' because --skip-child-jobs flag is on',
105                                        self.label, uuid)
106                         continue
107                     logger.debug('%s: follow %s', self.label, uuid)
108                     child_summarizer = ProcessSummarizer(uuid)
109                     child_summarizer.stats_max = self.stats_max
110                     child_summarizer.task_stats = self.task_stats
111                     child_summarizer.tasks = self.tasks
112                     child_summarizer.starttime = self.starttime
113                     child_summarizer.run()
114                     logger.debug('%s: done %s', self.label, uuid)
115                     continue
116
117                 # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
118                 m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
119                 if not m:
120                     continue
121             else:
122                 # crunch2
123                 # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
124                 m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
125                 if not m:
126                     continue
127
128             if self.label is None:
129                 try:
130                     self.label = m.group('job_uuid')
131                 except IndexError:
132                     self.label = 'container'
133             if m.group('category').endswith(':'):
134                 # "stderr crunchstat: notice: ..."
135                 continue
136             elif m.group('category') in ('error', 'caught'):
137                 continue
138             elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
139                 # "stderr crunchstat: read /proc/1234/net/dev: ..."
140                 # (old logs are less careful with unprefixed error messages)
141                 continue
142
143             if self.detected_crunch1:
144                 task_id = self.seq_to_uuid[int(m.group('seq'))]
145             else:
146                 task_id = 'container'
147             task = self.tasks[task_id]
148
149             # Use the first and last crunchstat timestamps as
150             # approximations of starttime and finishtime.
151             timestamp = m.group('timestamp')
152             if timestamp[10:11] == '_':
153                 timestamp = datetime.datetime.strptime(
154                     timestamp, '%Y-%m-%d_%H:%M:%S')
155             elif timestamp[10:11] == 'T':
156                 timestamp = datetime.datetime.strptime(
157                     timestamp[:19], '%Y-%m-%dT%H:%M:%S')
158             else:
159                 raise ValueError("Cannot parse timestamp {!r}".format(
160                     timestamp))
161
162             if task.starttime is None:
163                 logger.debug('%s: task %s starttime %s',
164                              self.label, task_id, timestamp)
165             if task.starttime is None or timestamp < task.starttime:
166                 task.starttime = timestamp
167             if task.finishtime is None or timestamp > task.finishtime:
168                 task.finishtime = timestamp
169
170             if self.starttime is None or timestamp < task.starttime:
171                 self.starttime = timestamp
172             if self.finishtime is None or timestamp < task.finishtime:
173                 self.finishtime = timestamp
174
175             if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
176                 elapsed = (task.finishtime - task.starttime).seconds
177                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
178                 if elapsed > self.stats_max['time']['elapsed']:
179                     self.stats_max['time']['elapsed'] = elapsed
180
181             this_interval_s = None
182             for group in ['current', 'interval']:
183                 if not m.group(group):
184                     continue
185                 category = m.group('category')
186                 words = m.group(group).split(' ')
187                 stats = {}
188                 try:
189                     for val, stat in zip(words[::2], words[1::2]):
190                         if '.' in val:
191                             stats[stat] = float(val)
192                         else:
193                             stats[stat] = int(val)
194                 except ValueError as e:
195                     # If the line doesn't start with 'crunchstat:' we
196                     # might have mistaken an error message for a
197                     # structured crunchstat line.
198                     if m.group("crunchstat") is None or m.group("category") == "crunchstat":
199                         logger.warning("%s: log contains message\n  %s", self.label, line)
200                     else:
201                         logger.warning(
202                             '%s: Error parsing value %r (stat %r, category %r): %r',
203                             self.label, val, stat, category, e)
204                         logger.warning('%s', line)
205                     continue
206                 if 'user' in stats or 'sys' in stats:
207                     stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
208                 if 'tx' in stats or 'rx' in stats:
209                     stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
210                 for stat, val in stats.items():
211                     if group == 'interval':
212                         if stat == 'seconds':
213                             this_interval_s = val
214                             continue
215                         elif not (this_interval_s > 0):
216                             logger.error(
217                                 "BUG? interval stat given with duration {!r}".
218                                 format(this_interval_s))
219                             continue
220                         else:
221                             stat = stat + '__rate'
222                             val = val / this_interval_s
223                             if stat in ['user+sys__rate', 'tx+rx__rate']:
224                                 task.series[category, stat].append(
225                                     (timestamp - self.starttime, val))
226                     else:
227                         if stat in ['rss']:
228                             task.series[category, stat].append(
229                                 (timestamp - self.starttime, val))
230                         self.task_stats[task_id][category][stat] = val
231                     if val > self.stats_max[category][stat]:
232                         self.stats_max[category][stat] = val
233         logger.debug('%s: done parsing', self.label)
234
235         self.job_tot = collections.defaultdict(
236             functools.partial(collections.defaultdict, int))
237         for task_id, task_stat in self.task_stats.items():
238             for category, stat_last in task_stat.items():
239                 for stat, val in stat_last.items():
240                     if stat in ['cpus', 'cache', 'swap', 'rss']:
241                         # meaningless stats like 16 cpu cores x 5 tasks = 80
242                         continue
243                     self.job_tot[category][stat] += val
244         logger.debug('%s: done totals', self.label)
245
246     def long_label(self):
247         label = self.label
248         if hasattr(self, 'process') and self.process['uuid'] not in label:
249             label = '{} ({})'.format(label, self.process['uuid'])
250         if self.finishtime:
251             label += ' -- elapsed time '
252             s = (self.finishtime - self.starttime).total_seconds()
253             if s > 86400:
254                 label += '{}d'.format(int(s/86400))
255             if s > 3600:
256                 label += '{}h'.format(int(s/3600) % 24)
257             if s > 60:
258                 label += '{}m'.format(int(s/60) % 60)
259             label += '{}s'.format(int(s) % 60)
260         return label
261
262     def text_report(self):
263         if not self.tasks:
264             return "(no report generated)\n"
265         return "\n".join(itertools.chain(
266             self._text_report_gen(),
267             self._recommend_gen())) + "\n"
268
269     def html_report(self):
270         return WEBCHART_CLASS(self.label, [self]).html()
271
272     def _text_report_gen(self):
273         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
274         for category, stat_max in sorted(self.stats_max.items()):
275             for stat, val in sorted(stat_max.items()):
276                 if stat.endswith('__rate'):
277                     continue
278                 max_rate = self._format(stat_max.get(stat+'__rate', '-'))
279                 val = self._format(val)
280                 tot = self._format(self.job_tot[category].get(stat, '-'))
281                 yield "\t".join([category, stat, str(val), max_rate, tot])
282         for args in (
283                 ('Number of tasks: {}',
284                  len(self.tasks),
285                  None),
286                 ('Max CPU time spent by a single task: {}s',
287                  self.stats_max['cpu']['user+sys'],
288                  None),
289                 ('Max CPU usage in a single interval: {}%',
290                  self.stats_max['cpu']['user+sys__rate'],
291                  lambda x: x * 100),
292                 ('Overall CPU usage: {}%',
293                  float(self.job_tot['cpu']['user+sys']) /
294                  self.job_tot['time']['elapsed']
295                  if self.job_tot['time']['elapsed'] > 0 else 0,
296                  lambda x: x * 100),
297                 ('Max memory used by a single task: {}GB',
298                  self.stats_max['mem']['rss'],
299                  lambda x: x / 1e9),
300                 ('Max network traffic in a single task: {}GB',
301                  self.stats_max['net:eth0']['tx+rx'] +
302                  self.stats_max['net:keep0']['tx+rx'],
303                  lambda x: x / 1e9),
304                 ('Max network speed in a single interval: {}MB/s',
305                  self.stats_max['net:eth0']['tx+rx__rate'] +
306                  self.stats_max['net:keep0']['tx+rx__rate'],
307                  lambda x: x / 1e6),
308                 ('Keep cache miss rate {}%',
309                  (float(self.job_tot['keepcache']['miss']) /
310                  float(self.job_tot['keepcalls']['get']))
311                  if self.job_tot['keepcalls']['get'] > 0 else 0,
312                  lambda x: x * 100.0),
313                 ('Keep cache utilization {}%',
314                  (float(self.job_tot['blkio:0:0']['read']) /
315                  float(self.job_tot['net:keep0']['rx']))
316                  if self.job_tot['net:keep0']['rx'] > 0 else 0,
317                  lambda x: x * 100.0)):
318             format_string, val, transform = args
319             if val == float('-Inf'):
320                 continue
321             if transform:
322                 val = transform(val)
323             yield "# "+format_string.format(self._format(val))
324
325     def _recommend_gen(self):
326         # TODO recommend fixing job granularity if elapsed time is too short
327         return itertools.chain(
328             self._recommend_cpu(),
329             self._recommend_ram(),
330             self._recommend_keep_cache())
331
332     def _recommend_cpu(self):
333         """Recommend asking for 4 cores if max CPU usage was 333%"""
334
335         constraint_key = self._map_runtime_constraint('vcpus')
336         cpu_max_rate = self.stats_max['cpu']['user+sys__rate']
337         if cpu_max_rate == float('-Inf') or cpu_max_rate == 0.0:
338             logger.warning('%s: no CPU usage data', self.label)
339             return
340         # TODO Don't necessarily want to recommend on isolated max peak
341         # take average CPU usage into account as well or % time at max
342         used_cores = max(1, int(math.ceil(cpu_max_rate)))
343         asked_cores = self.existing_constraints.get(constraint_key)
344         if asked_cores is None:
345             asked_cores = 1
346         if used_cores < asked_cores:
347             yield (
348                 '#!! {} max CPU usage was {}% -- '
349                 'try runtime_constraints "{}":{}'
350             ).format(
351                 self.label,
352                 math.ceil(cpu_max_rate*100),
353                 constraint_key,
354                 int(used_cores))
355
356     def _recommend_ram(self):
357         """Recommend an economical RAM constraint for this job.
358
359         Nodes that are advertised as "8 gibibytes" actually have what
360         we might call "8 nearlygibs" of memory available for jobs.
361         Here, we calculate a whole number of nearlygibs that would
362         have sufficed to run the job, then recommend requesting a node
363         with that number of nearlygibs (expressed as mebibytes).
364
365         Requesting a node with "nearly 8 gibibytes" is our best hope
366         of getting a node that actually has nearly 8 gibibytes
367         available.  If the node manager is smart enough to account for
368         the discrepancy itself when choosing/creating a node, we'll
369         get an 8 GiB node with nearly 8 GiB available.  Otherwise, the
370         advertised size of the next-size-smaller node (say, 6 GiB)
371         will be too low to satisfy our request, so we will effectively
372         get rounded up to 8 GiB.
373
374         For example, if we need 7500 MiB, we can ask for 7500 MiB, and
375         we will generally get a node that is advertised as "8 GiB" and
376         has at least 7500 MiB available.  However, asking for 8192 MiB
377         would either result in an unnecessarily expensive 12 GiB node
378         (if node manager knows about the discrepancy), or an 8 GiB
379         node which has less than 8192 MiB available and is therefore
380         considered by crunch-dispatch to be too small to meet our
381         constraint.
382
383         When node manager learns how to predict the available memory
384         for each node type such that crunch-dispatch always agrees
385         that a node is big enough to run the job it was brought up
386         for, all this will be unnecessary.  We'll just ask for exactly
387         the memory we want -- even if that happens to be 8192 MiB.
388         """
389
390         constraint_key = self._map_runtime_constraint('ram')
391         used_bytes = self.stats_max['mem']['rss']
392         if used_bytes == float('-Inf'):
393             logger.warning('%s: no memory usage data', self.label)
394             return
395         used_mib = math.ceil(float(used_bytes) / MB)
396         asked_mib = self.existing_constraints.get(constraint_key)
397
398         nearlygibs = lambda mebibytes: mebibytes/AVAILABLE_RAM_RATIO/1024
399         if used_mib > 0 and (asked_mib is None or (
400                 math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib))):
401             yield (
402                 '#!! {} max RSS was {} MiB -- '
403                 'try runtime_constraints "{}":{}'
404             ).format(
405                 self.label,
406                 int(used_mib),
407                 constraint_key,
408                 int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024*(MB)/self._runtime_constraint_mem_unit()))
409
410     def _recommend_keep_cache(self):
411         """Recommend increasing keep cache if utilization < 80%"""
412         constraint_key = self._map_runtime_constraint('keep_cache_ram')
413         if self.job_tot['net:keep0']['rx'] == 0:
414             return
415         utilization = (float(self.job_tot['blkio:0:0']['read']) /
416                        float(self.job_tot['net:keep0']['rx']))
417         # FIXME: the default on this get won't work correctly
418         asked_cache = self.existing_constraints.get(constraint_key, 256) * self._runtime_constraint_mem_unit()
419
420         if utilization < 0.8:
421             yield (
422                 '#!! {} Keep cache utilization was {:.2f}% -- '
423                 'try runtime_constraints "{}":{} (or more)'
424             ).format(
425                 self.label,
426                 utilization * 100.0,
427                 constraint_key,
428                 math.ceil(asked_cache * 2 / self._runtime_constraint_mem_unit()))
429
430
431     def _format(self, val):
432         """Return a string representation of a stat.
433
434         {:.2f} for floats, default format for everything else."""
435         if isinstance(val, float):
436             return '{:.2f}'.format(val)
437         else:
438             return '{}'.format(val)
439
440     def _runtime_constraint_mem_unit(self):
441         if hasattr(self, 'runtime_constraint_mem_unit'):
442             return self.runtime_constraint_mem_unit
443         elif self.detected_crunch1:
444             return JobSummarizer.runtime_constraint_mem_unit
445         else:
446             return ContainerSummarizer.runtime_constraint_mem_unit
447
448     def _map_runtime_constraint(self, key):
449         if hasattr(self, 'map_runtime_constraint'):
450             return self.map_runtime_constraint[key]
451         elif self.detected_crunch1:
452             return JobSummarizer.map_runtime_constraint[key]
453         else:
454             return key
455
456
457 class CollectionSummarizer(Summarizer):
458     def __init__(self, collection_id, **kwargs):
459         super(CollectionSummarizer, self).__init__(
460             crunchstat_summary.reader.CollectionReader(collection_id), **kwargs)
461         self.label = collection_id
462
463
464 def NewSummarizer(process_or_uuid, **kwargs):
465     """Construct with the appropriate subclass for this uuid/object."""
466
467     if isinstance(process_or_uuid, dict):
468         process = process_or_uuid
469         uuid = process['uuid']
470     else:
471         uuid = process_or_uuid
472         process = None
473         arv = arvados.api('v1', model=OrderedJsonModel())
474
475     if '-dz642-' in uuid:
476         if process is None:
477             process = arv.containers().get(uuid=uuid).execute()
478         klass = ContainerTreeSummarizer
479     elif '-xvhdp-' in uuid:
480         if process is None:
481             process = arv.container_requests().get(uuid=uuid).execute()
482         klass = ContainerTreeSummarizer
483     elif '-8i9sb-' in uuid:
484         if process is None:
485             process = arv.jobs().get(uuid=uuid).execute()
486         klass = JobTreeSummarizer
487     elif '-d1hrv-' in uuid:
488         if process is None:
489             process = arv.pipeline_instances().get(uuid=uuid).execute()
490         klass = PipelineSummarizer
491     elif '-4zz18-' in uuid:
492         return CollectionSummarizer(collection_id=uuid)
493     else:
494         raise ArgumentError("Unrecognized uuid %s", uuid)
495     return klass(process, uuid=uuid, **kwargs)
496
497
498 class ProcessSummarizer(Summarizer):
499     """Process is a job, pipeline, container, or container request."""
500
501     def __init__(self, process, label=None, **kwargs):
502         rdr = None
503         self.process = process
504         if label is None:
505             label = self.process.get('name', self.process['uuid'])
506         if self.process.get('log'):
507             try:
508                 rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
509             except arvados.errors.NotFoundError as e:
510                 logger.warning("Trying event logs after failing to read "
511                                "log collection %s: %s", self.process['log'], e)
512         if rdr is None:
513             rdr = crunchstat_summary.reader.LiveLogReader(self.process['uuid'])
514             label = label + ' (partial)'
515         super(ProcessSummarizer, self).__init__(rdr, label=label, **kwargs)
516         self.existing_constraints = self.process.get('runtime_constraints', {})
517
518
519 class JobSummarizer(ProcessSummarizer):
520     runtime_constraint_mem_unit = MB
521     map_runtime_constraint = {
522         'keep_cache_ram': 'keep_cache_mb_per_task',
523         'ram': 'min_ram_mb_per_node',
524         'vcpus': 'min_cores_per_node',
525     }
526
527
528 class ContainerSummarizer(ProcessSummarizer):
529     runtime_constraint_mem_unit = 1
530
531
532 class MultiSummarizer(object):
533     def __init__(self, children={}, label=None, threads=1, **kwargs):
534         self.throttle = threading.Semaphore(threads)
535         self.children = children
536         self.label = label
537
538     def run_and_release(self, target, *args, **kwargs):
539         try:
540             return target(*args, **kwargs)
541         finally:
542             self.throttle.release()
543
544     def run(self):
545         threads = []
546         for child in self.children.values():
547             self.throttle.acquire()
548             t = threading.Thread(target=self.run_and_release, args=(child.run, ))
549             t.daemon = True
550             t.start()
551             threads.append(t)
552         for t in threads:
553             t.join()
554
555     def text_report(self):
556         txt = ''
557         d = self._descendants()
558         for child in d.values():
559             if len(d) > 1:
560                 txt += '### Summary for {} ({})\n'.format(
561                     child.label, child.process['uuid'])
562             txt += child.text_report()
563             txt += '\n'
564         return txt
565
566     def _descendants(self):
567         """Dict of self and all descendants.
568
569         Nodes with nothing of their own to report (like
570         MultiSummarizers) are omitted.
571         """
572         d = collections.OrderedDict()
573         for key, child in self.children.items():
574             if isinstance(child, Summarizer):
575                 d[key] = child
576             if isinstance(child, MultiSummarizer):
577                 d.update(child._descendants())
578         return d
579
580     def html_report(self):
581         return WEBCHART_CLASS(self.label, iter(self._descendants().values())).html()
582
583
584 class JobTreeSummarizer(MultiSummarizer):
585     """Summarizes a job and all children listed in its components field."""
586     def __init__(self, job, label=None, **kwargs):
587         arv = arvados.api('v1', model=OrderedJsonModel())
588         label = label or job.get('name', job['uuid'])
589         children = collections.OrderedDict()
590         children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
591         if job.get('components', None):
592             preloaded = {}
593             for j in arv.jobs().index(
594                     limit=len(job['components']),
595                     filters=[['uuid','in',list(job['components'].values())]]).execute()['items']:
596                 preloaded[j['uuid']] = j
597             for cname in sorted(job['components'].keys()):
598                 child_uuid = job['components'][cname]
599                 j = (preloaded.get(child_uuid) or
600                      arv.jobs().get(uuid=child_uuid).execute())
601                 children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs)
602
603         super(JobTreeSummarizer, self).__init__(
604             children=children,
605             label=label,
606             **kwargs)
607
608
609 class PipelineSummarizer(MultiSummarizer):
610     def __init__(self, instance, **kwargs):
611         children = collections.OrderedDict()
612         for cname, component in instance['components'].items():
613             if 'job' not in component:
614                 logger.warning(
615                     "%s: skipping component with no job assigned", cname)
616             else:
617                 logger.info(
618                     "%s: job %s", cname, component['job']['uuid'])
619                 summarizer = JobTreeSummarizer(component['job'], label=cname, **kwargs)
620                 summarizer.label = '{} {}'.format(
621                     cname, component['job']['uuid'])
622                 children[cname] = summarizer
623         super(PipelineSummarizer, self).__init__(
624             children=children,
625             label=instance['uuid'],
626             **kwargs)
627
628
629 class ContainerTreeSummarizer(MultiSummarizer):
630     def __init__(self, root, skip_child_jobs=False, **kwargs):
631         arv = arvados.api('v1', model=OrderedJsonModel())
632
633         label = kwargs.pop('label', None) or root.get('name') or root['uuid']
634         root['name'] = label
635
636         children = collections.OrderedDict()
637         todo = collections.deque((root, ))
638         while len(todo) > 0:
639             current = todo.popleft()
640             label = current['name']
641             sort_key = current['created_at']
642             if current['uuid'].find('-xvhdp-') > 0:
643                 current = arv.containers().get(uuid=current['container_uuid']).execute()
644
645             summer = ContainerSummarizer(current, label=label, **kwargs)
646             summer.sort_key = sort_key
647             children[current['uuid']] = summer
648
649             page_filters = []
650             while True:
651                 child_crs = arv.container_requests().index(
652                     order=['uuid asc'],
653                     filters=page_filters+[
654                         ['requesting_container_uuid', '=', current['uuid']]],
655                 ).execute()
656                 if not child_crs['items']:
657                     break
658                 elif skip_child_jobs:
659                     logger.warning('%s: omitting stats from %d child containers'
660                                    ' because --skip-child-jobs flag is on',
661                                    label, child_crs['items_available'])
662                     break
663                 page_filters = [['uuid', '>', child_crs['items'][-1]['uuid']]]
664                 for cr in child_crs['items']:
665                     if cr['container_uuid']:
666                         logger.debug('%s: container req %s', current['uuid'], cr['uuid'])
667                         cr['name'] = cr.get('name') or cr['uuid']
668                         todo.append(cr)
669         sorted_children = collections.OrderedDict()
670         for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
671             sorted_children[uuid] = children[uuid]
672         super(ContainerTreeSummarizer, self).__init__(
673             children=sorted_children,
674             label=root['name'],
675             **kwargs)