From 8dae253559240fb16564fb934fece2cbcf001866 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Thu, 25 Jul 2019 17:01:40 -0400 Subject: [PATCH] 15486: Fix crunchstat-summary to work with Arvados v1.4+ Arvados v1.4 changed things so that container logs are no longer accessible via PDH so we need to go to the container request and access the log via UUID from there. Arvados-DCO-1.1-Signed-off-by: Tom Morris --- .../crunchstat_summary/command.py | 14 ++++-- .../crunchstat_summary/summarizer.py | 32 +++++++----- ...ee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz | Bin 235 -> 0 bytes ...ee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz | Bin 0 -> 274 bytes ...p-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report} | 0 ...4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz} | Bin 663 -> 724 bytes ...-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report} | 0 ..._9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report | 39 +++++++++++++++ .../crunchstat-summary/tests/test_examples.py | 47 ++++++++++++++---- 9 files changed, 106 insertions(+), 26 deletions(-) delete mode 100644 tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz create mode 100644 tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz rename tools/crunchstat-summary/tests/{container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report => container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report} (100%) rename tools/crunchstat-summary/tests/{container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz => container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz} (66%) rename tools/crunchstat-summary/tests/{container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report => container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report} (100%) create mode 100644 tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py index aadc775823..ec7acb8083 100644 --- a/tools/crunchstat-summary/crunchstat_summary/command.py +++ b/tools/crunchstat-summary/crunchstat_summary/command.py @@ -17,14 +17,20 @@ class ArgumentParser(argparse.ArgumentParser): description='Summarize resource usage of an Arvados Crunch job') src = self.add_mutually_exclusive_group() src.add_argument( - '--job', '--container', '--container-request', + '--job', '--container-request', type=str, metavar='UUID', - help='Look up the specified job, container, or container request ' + help='Look up the specified job or container request ' + 'and read its log data from Keep (or from the Arvados event log, ' + 'if the job is still running)') + src.add_argument( + '--container', + type=str, metavar='UUID', + help='[Deprecated] Look up the specified container find its container request ' 'and read its log data from Keep (or from the Arvados event log, ' 'if the job is still running)') src.add_argument( '--pipeline-instance', type=str, metavar='UUID', - help='Summarize each component of the given pipeline instance') + help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)') src.add_argument( '--log-file', type=str, help='Read log data from a regular file') @@ -81,6 +87,8 @@ class Command(object): self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs) elif self.args.job: self.summer = summarizer.NewSummarizer(self.args.job, **kwargs) + elif self.args.container: + self.summer = summarizer.NewSummarizer(self.args.container, **kwargs) elif self.args.log_file: if self.args.log_file.endswith('.gz'): fh = UTF8Decode(gzip.open(self.args.log_file)) diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py index e962ced314..d99d3c1cf8 100644 --- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py +++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py @@ -470,7 +470,7 @@ class Summarizer(object): elif self.detected_crunch1: return JobSummarizer.runtime_constraint_mem_unit else: - return ContainerSummarizer.runtime_constraint_mem_unit + return ContainerRequestSummarizer.runtime_constraint_mem_unit def _map_runtime_constraint(self, key): if hasattr(self, 'map_runtime_constraint'): @@ -501,12 +501,15 @@ def NewSummarizer(process_or_uuid, **kwargs): if '-dz642-' in uuid: if process is None: - process = arv.containers().get(uuid=uuid).execute() - klass = ContainerTreeSummarizer + # Get the associated CR. Doesn't matter which since they all have the same logs + crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items'] + if len(crs) > 0: + process = crs[0] + klass = ContainerRequestTreeSummarizer elif '-xvhdp-' in uuid: if process is None: process = arv.container_requests().get(uuid=uuid).execute() - klass = ContainerTreeSummarizer + klass = ContainerRequestTreeSummarizer elif '-8i9sb-' in uuid: if process is None: process = arv.jobs().get(uuid=uuid).execute() @@ -530,9 +533,14 @@ class ProcessSummarizer(Summarizer): self.process = process if label is None: label = self.process.get('name', self.process['uuid']) - if self.process.get('log'): + # Pre-Arvados v1.4 everything is in 'log' + # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log' + log_collection = self.process.get('log') + if not log_collection: + log_collection = self.process.get('log_uuid') + if log_collection: try: - rdr = crunchstat_summary.reader.CollectionReader(self.process['log']) + rdr = crunchstat_summary.reader.CollectionReader(log_collection) except arvados.errors.NotFoundError as e: logger.warning("Trying event logs after failing to read " "log collection %s: %s", self.process['log'], e) @@ -552,7 +560,7 @@ class JobSummarizer(ProcessSummarizer): } -class ContainerSummarizer(ProcessSummarizer): +class ContainerRequestSummarizer(ProcessSummarizer): runtime_constraint_mem_unit = 1 @@ -653,7 +661,7 @@ class PipelineSummarizer(MultiSummarizer): **kwargs) -class ContainerTreeSummarizer(MultiSummarizer): +class ContainerRequestTreeSummarizer(MultiSummarizer): def __init__(self, root, skip_child_jobs=False, **kwargs): arv = arvados.api('v1', model=OrderedJsonModel()) @@ -666,10 +674,8 @@ class ContainerTreeSummarizer(MultiSummarizer): current = todo.popleft() label = current['name'] sort_key = current['created_at'] - if current['uuid'].find('-xvhdp-') > 0: - current = arv.containers().get(uuid=current['container_uuid']).execute() - summer = ContainerSummarizer(current, label=label, **kwargs) + summer = ContainerRequestSummarizer(current, label=label, **kwargs) summer.sort_key = sort_key children[current['uuid']] = summer @@ -678,7 +684,7 @@ class ContainerTreeSummarizer(MultiSummarizer): child_crs = arv.container_requests().index( order=['uuid asc'], filters=page_filters+[ - ['requesting_container_uuid', '=', current['uuid']]], + ['requesting_container_uuid', '=', current['container_uuid']]], ).execute() if not child_crs['items']: break @@ -696,7 +702,7 @@ class ContainerTreeSummarizer(MultiSummarizer): sorted_children = collections.OrderedDict() for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key): sorted_children[uuid] = children[uuid] - super(ContainerTreeSummarizer, self).__init__( + super(ContainerRequestTreeSummarizer, self).__init__( children=sorted_children, label=root['name'], **kwargs) diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz deleted file mode 100644 index ff7dd30b2ef583e955b9237825f6b5221b892e2f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 235 zcmVb;m_;p03 z!Xk1b1utebU(X|biK1-;Hn`r4R!gD9cJ;2*23sZM5iAeEx|B`Ua3Sig>AD06{gBK2 zqzp|1K4ju=(}BPhmIsM!4Ckl4H~D`kP}U=cC7N$#@U6o9Zp}gN*H}j->HPT1FP~Dy z`CFVreGG|WKjyFO^9VATbA@_bor9gai;o&LF3HKlZPae7)J`1X;2=84j+o$T5|{wQ l9J!WzVVS0(1sX2CK|Q!gmfG`2*UegJ)1v-4sE002qqZpr`v diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..3fe0220ca424eb9d6b5566db39dde48d8afcf634 GIT binary patch literal 274 zcmV+t0qy=DiwFqbH#=Pb17mM)bYW?3WpZC~WpQ<7b97%hbY*2UEqHclWNn7>-~w$1q(5>FSMHFGmK56t zsP_2MXreBf+Jv&0)th`w@)*!Ep^(7-NK3(}pm}=WJcE)`b|T6St*V`uUXTW{SJD_q zfb@e8^NOA%3DK=bdzXe#NFhApP<3tfliRWXp%%TEFbJ!D48x8K%saIPd0nGw44bu{ zetFZ*EjXXx8tO7Q?D}c{VYQz?i?UjvzN2ffvuSiEKtN261uo-7&UryMEyX+(u@p-tWl3tPiEc$%MoNKhcD6xQ jqG585MUqi=kzu87a#3ksaz=4UVu@Z!MG3=3u3jbpYlj?2 delta 19 acmcb@I-QkEzMF$VUvO(Q1M^1rUM2uDj|B_> diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report similarity index 100% rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report new file mode 100644 index 0000000000..5152e577f5 --- /dev/null +++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report @@ -0,0 +1,39 @@ +category metric task_max task_max_rate job_total +blkio:0:0 read 0 0 0 +blkio:0:0 write 0 0 0 +cpu cpus 20 - - +cpu sys 0.39 0.04 0.39 +cpu user 2.06 0.20 2.06 +cpu user+sys 2.45 0.24 2.45 +fuseops read 0 0 0 +fuseops write 0 0 0 +keepcache hit 0 0 0 +keepcache miss 0 0 0 +keepcalls get 0 0 0 +keepcalls put 0 0 0 +mem cache 172032 - - +mem pgmajfault 0 - 0 +mem rss 69525504 - - +mem swap 0 - - +net:eth0 rx 859480 1478.97 859480 +net:eth0 tx 55888 395.71 55888 +net:eth0 tx+rx 915368 1874.69 915368 +net:keep0 rx 0 0 0 +net:keep0 tx 0 0 0 +net:keep0 tx+rx 0 0 0 +statfs available 397744787456 - 397744787456 +statfs total 402611240960 - 402611240960 +statfs used 4870303744 52426.18 4866453504 +time elapsed 20 - 20 +# Number of tasks: 1 +# Max CPU time spent by a single task: 2.45s +# Max CPU usage in a single interval: 23.70% +# Overall CPU usage: 12.25% +# Max memory used by a single task: 0.07GB +# Max network traffic in a single task: 0.00GB +# Max network speed in a single interval: 0.00MB/s +# Keep cache miss rate 0.00% +# Keep cache utilization 0.00% +# Temp disk utilization 1.21% +#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732 +#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin" diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py index 0270eaaec0..fb23eab39e 100644 --- a/tools/crunchstat-summary/tests/test_examples.py +++ b/tools/crunchstat-summary/tests/test_examples.py @@ -67,29 +67,37 @@ class SummarizeEdgeCases(unittest.TestCase): s.run() -class SummarizeContainer(ReportDiff): +class SummarizeContainerCommon(ReportDiff): fake_container = { 'uuid': '9tee4-dz642-lymtndkpy39eibk', 'created_at': '2017-08-18T14:27:25.371388141', 'log': '9tee4-4zz18-ihyzym9tcwjwg4r', } fake_request = { - 'uuid': '9tee4-xvhdp-uper95jktm10d3w', + 'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y', 'name': 'container', 'created_at': '2017-08-18T14:27:25.242339223Z', 'container_uuid': fake_container['uuid'], - } - reportfile = os.path.join( - TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz') + 'runtime_constraints': { + 'vcpus': 1, + 'ram': 2621440000 + }, + 'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y' + } + logfile = os.path.join( - TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz') + TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz') arvmountlog = os.path.join( - TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz') + TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz') @mock.patch('arvados.collection.CollectionReader') @mock.patch('arvados.api') - def test_container(self, mock_api, mock_cr): - mock_api().container_requests().index().execute.return_value = {'items':[]} + def check_common(self, mock_api, mock_cr): + items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100 + # Index and list mean the same thing, but are used in different places in the + # code. It's fragile, but exploit that fact to distinguish the two uses. + mock_api().container_requests().index().execute.return_value = {'items': [] } # child_crs + mock_api().container_requests().list().execute.side_effect = items # parent request mock_api().container_requests().get().execute.return_value = self.fake_request mock_api().containers().get().execute.return_value = self.fake_container mock_cr().__iter__.return_value = [ @@ -102,12 +110,31 @@ class SummarizeContainer(ReportDiff): return UTF8Decode(gzip.open(self.arvmountlog)) mock_cr().open.side_effect = _open args = crunchstat_summary.command.ArgumentParser().parse_args( - ['--container', self.fake_request['uuid']]) + self.arg_strings) cmd = crunchstat_summary.command.Command(args) cmd.run() self.diff_known_report(self.reportfile, cmd) + +class SummarizeContainer(SummarizeContainerCommon): + uuid = '9tee4-dz642-lymtndkpy39eibk' + reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid) + arg_strings = ['--container', uuid, '-v', '-v'] + + def test_container(self): + self.check_common() + + +class SummarizeContainerRequest(SummarizeContainerCommon): + uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y' + reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid) + arg_strings = ['--container-request', uuid, '-v', '-v'] + + def test_container_request(self): + self.check_common() + + class SummarizeJob(ReportDiff): fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh' fake_log_id = 'fake-log-collection-id' -- 2.30.2