description='Summarize resource usage of an Arvados Crunch job')
src = self.add_mutually_exclusive_group()
src.add_argument(
- '--job', '--container', '--container-request',
+ '--job', '--container-request',
type=str, metavar='UUID',
- help='Look up the specified job, container, or container request '
+ help='Look up the specified job or container request '
+ 'and read its log data from Keep (or from the Arvados event log, '
+ 'if the job is still running)')
+ src.add_argument(
+ '--container',
+ type=str, metavar='UUID',
+ help='[Deprecated] Look up the specified container find its container request '
'and read its log data from Keep (or from the Arvados event log, '
'if the job is still running)')
src.add_argument(
'--pipeline-instance', type=str, metavar='UUID',
- help='Summarize each component of the given pipeline instance')
+ help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
src.add_argument(
'--log-file', type=str,
help='Read log data from a regular file')
self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
elif self.args.job:
self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
+ elif self.args.container:
+ self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
elif self.args.log_file:
if self.args.log_file.endswith('.gz'):
fh = UTF8Decode(gzip.open(self.args.log_file))
elif self.detected_crunch1:
return JobSummarizer.runtime_constraint_mem_unit
else:
- return ContainerSummarizer.runtime_constraint_mem_unit
+ return ContainerRequestSummarizer.runtime_constraint_mem_unit
def _map_runtime_constraint(self, key):
if hasattr(self, 'map_runtime_constraint'):
if '-dz642-' in uuid:
if process is None:
- process = arv.containers().get(uuid=uuid).execute()
- klass = ContainerTreeSummarizer
+ # Get the associated CR. Doesn't matter which since they all have the same logs
+ crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items']
+ if len(crs) > 0:
+ process = crs[0]
+ klass = ContainerRequestTreeSummarizer
elif '-xvhdp-' in uuid:
if process is None:
process = arv.container_requests().get(uuid=uuid).execute()
- klass = ContainerTreeSummarizer
+ klass = ContainerRequestTreeSummarizer
elif '-8i9sb-' in uuid:
if process is None:
process = arv.jobs().get(uuid=uuid).execute()
self.process = process
if label is None:
label = self.process.get('name', self.process['uuid'])
- if self.process.get('log'):
+ # Pre-Arvados v1.4 everything is in 'log'
+ # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log'
+ log_collection = self.process.get('log')
+ if not log_collection:
+ log_collection = self.process.get('log_uuid')
+ if log_collection:
try:
- rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
+ rdr = crunchstat_summary.reader.CollectionReader(log_collection)
except arvados.errors.NotFoundError as e:
logger.warning("Trying event logs after failing to read "
"log collection %s: %s", self.process['log'], e)
}
-class ContainerSummarizer(ProcessSummarizer):
+class ContainerRequestSummarizer(ProcessSummarizer):
runtime_constraint_mem_unit = 1
**kwargs)
-class ContainerTreeSummarizer(MultiSummarizer):
+class ContainerRequestTreeSummarizer(MultiSummarizer):
def __init__(self, root, skip_child_jobs=False, **kwargs):
arv = arvados.api('v1', model=OrderedJsonModel())
current = todo.popleft()
label = current['name']
sort_key = current['created_at']
- if current['uuid'].find('-xvhdp-') > 0:
- current = arv.containers().get(uuid=current['container_uuid']).execute()
- summer = ContainerSummarizer(current, label=label, **kwargs)
+ summer = ContainerRequestSummarizer(current, label=label, **kwargs)
summer.sort_key = sort_key
children[current['uuid']] = summer
child_crs = arv.container_requests().index(
order=['uuid asc'],
filters=page_filters+[
- ['requesting_container_uuid', '=', current['uuid']]],
+ ['requesting_container_uuid', '=', current['container_uuid']]],
).execute()
if not child_crs['items']:
break
sorted_children = collections.OrderedDict()
for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
sorted_children[uuid] = children[uuid]
- super(ContainerTreeSummarizer, self).__init__(
+ super(ContainerRequestTreeSummarizer, self).__init__(
children=sorted_children,
label=root['name'],
**kwargs)
--- /dev/null
+category metric task_max task_max_rate job_total
+blkio:0:0 read 0 0 0
+blkio:0:0 write 0 0 0
+cpu cpus 20 - -
+cpu sys 0.39 0.04 0.39
+cpu user 2.06 0.20 2.06
+cpu user+sys 2.45 0.24 2.45
+fuseops read 0 0 0
+fuseops write 0 0 0
+keepcache hit 0 0 0
+keepcache miss 0 0 0
+keepcalls get 0 0 0
+keepcalls put 0 0 0
+mem cache 172032 - -
+mem pgmajfault 0 - 0
+mem rss 69525504 - -
+mem swap 0 - -
+net:eth0 rx 859480 1478.97 859480
+net:eth0 tx 55888 395.71 55888
+net:eth0 tx+rx 915368 1874.69 915368
+net:keep0 rx 0 0 0
+net:keep0 tx 0 0 0
+net:keep0 tx+rx 0 0 0
+statfs available 397744787456 - 397744787456
+statfs total 402611240960 - 402611240960
+statfs used 4870303744 52426.18 4866453504
+time elapsed 20 - 20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+# Temp disk utilization 1.21%
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
+#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
s.run()
-class SummarizeContainer(ReportDiff):
+class SummarizeContainerCommon(ReportDiff):
fake_container = {
'uuid': '9tee4-dz642-lymtndkpy39eibk',
'created_at': '2017-08-18T14:27:25.371388141',
'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
}
fake_request = {
- 'uuid': '9tee4-xvhdp-uper95jktm10d3w',
+ 'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y',
'name': 'container',
'created_at': '2017-08-18T14:27:25.242339223Z',
'container_uuid': fake_container['uuid'],
- }
- reportfile = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 2621440000
+ },
+ 'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y'
+ }
+
logfile = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+ TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz')
arvmountlog = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
+ TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz')
@mock.patch('arvados.collection.CollectionReader')
@mock.patch('arvados.api')
- def test_container(self, mock_api, mock_cr):
- mock_api().container_requests().index().execute.return_value = {'items':[]}
+ def check_common(self, mock_api, mock_cr):
+ items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
+ # Index and list mean the same thing, but are used in different places in the
+ # code. It's fragile, but exploit that fact to distinguish the two uses.
+ mock_api().container_requests().index().execute.return_value = {'items': [] } # child_crs
+ mock_api().container_requests().list().execute.side_effect = items # parent request
mock_api().container_requests().get().execute.return_value = self.fake_request
mock_api().containers().get().execute.return_value = self.fake_container
mock_cr().__iter__.return_value = [
return UTF8Decode(gzip.open(self.arvmountlog))
mock_cr().open.side_effect = _open
args = crunchstat_summary.command.ArgumentParser().parse_args(
- ['--container', self.fake_request['uuid']])
+ self.arg_strings)
cmd = crunchstat_summary.command.Command(args)
cmd.run()
self.diff_known_report(self.reportfile, cmd)
+
+class SummarizeContainer(SummarizeContainerCommon):
+ uuid = '9tee4-dz642-lymtndkpy39eibk'
+ reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid)
+ arg_strings = ['--container', uuid, '-v', '-v']
+
+ def test_container(self):
+ self.check_common()
+
+
+class SummarizeContainerRequest(SummarizeContainerCommon):
+ uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y'
+ reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid)
+ arg_strings = ['--container-request', uuid, '-v', '-v']
+
+ def test_container_request(self):
+ self.check_common()
+
+
class SummarizeJob(ReportDiff):
fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
fake_log_id = 'fake-log-collection-id'