15486: Fix crunchstat-summary to work with Arvados v1.4+
authorTom Morris <tfmorris@veritasgenetics.com>
Thu, 25 Jul 2019 21:01:40 +0000 (17:01 -0400)
committerTom Morris <tfmorris@veritasgenetics.com>
Tue, 13 Aug 2019 21:02:53 +0000 (17:02 -0400)
Arvados v1.4 changed things so that container logs are no longer
accessible via PDH so we need to go to the container request and
access the log via UUID from there.

Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>

tools/crunchstat-summary/crunchstat_summary/command.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz [deleted file]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report with 100% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz with 66% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report with 100% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/test_examples.py

index aadc775823caf136c7f7094a0d2b55fcb50f4478..ec7acb8083928f6f35f2af7835ee92f8a4a895dc 100644 (file)
@@ -17,14 +17,20 @@ class ArgumentParser(argparse.ArgumentParser):
             description='Summarize resource usage of an Arvados Crunch job')
         src = self.add_mutually_exclusive_group()
         src.add_argument(
-            '--job', '--container', '--container-request',
+            '--job', '--container-request',
             type=str, metavar='UUID',
-            help='Look up the specified job, container, or container request '
+            help='Look up the specified job or container request '
+            'and read its log data from Keep (or from the Arvados event log, '
+            'if the job is still running)')
+        src.add_argument(
+            '--container',
+            type=str, metavar='UUID',
+            help='[Deprecated] Look up the specified container find its container request '
             'and read its log data from Keep (or from the Arvados event log, '
             'if the job is still running)')
         src.add_argument(
             '--pipeline-instance', type=str, metavar='UUID',
-            help='Summarize each component of the given pipeline instance')
+            help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
@@ -81,6 +87,8 @@ class Command(object):
             self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
         elif self.args.job:
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
+        elif self.args.container:
+            self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = UTF8Decode(gzip.open(self.args.log_file))
index e962ced31404bfe26a7da36d034871941663ef23..d99d3c1cf8324c4dd33f9bcdd72a15fa4f015d7c 100644 (file)
@@ -470,7 +470,7 @@ class Summarizer(object):
         elif self.detected_crunch1:
             return JobSummarizer.runtime_constraint_mem_unit
         else:
-            return ContainerSummarizer.runtime_constraint_mem_unit
+            return ContainerRequestSummarizer.runtime_constraint_mem_unit
 
     def _map_runtime_constraint(self, key):
         if hasattr(self, 'map_runtime_constraint'):
@@ -501,12 +501,15 @@ def NewSummarizer(process_or_uuid, **kwargs):
 
     if '-dz642-' in uuid:
         if process is None:
-            process = arv.containers().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+            # Get the associated CR. Doesn't matter which since they all have the same logs
+            crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items']
+            if len(crs) > 0:
+                process = crs[0]
+        klass = ContainerRequestTreeSummarizer
     elif '-xvhdp-' in uuid:
         if process is None:
             process = arv.container_requests().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+        klass = ContainerRequestTreeSummarizer
     elif '-8i9sb-' in uuid:
         if process is None:
             process = arv.jobs().get(uuid=uuid).execute()
@@ -530,9 +533,14 @@ class ProcessSummarizer(Summarizer):
         self.process = process
         if label is None:
             label = self.process.get('name', self.process['uuid'])
-        if self.process.get('log'):
+        # Pre-Arvados v1.4 everything is in 'log'
+        # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log'
+        log_collection = self.process.get('log')
+        if not log_collection:
+            log_collection = self.process.get('log_uuid')
+        if log_collection:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
+                rdr = crunchstat_summary.reader.CollectionReader(log_collection)
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -552,7 +560,7 @@ class JobSummarizer(ProcessSummarizer):
     }
 
 
-class ContainerSummarizer(ProcessSummarizer):
+class ContainerRequestSummarizer(ProcessSummarizer):
     runtime_constraint_mem_unit = 1
 
 
@@ -653,7 +661,7 @@ class PipelineSummarizer(MultiSummarizer):
             **kwargs)
 
 
-class ContainerTreeSummarizer(MultiSummarizer):
+class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
 
@@ -666,10 +674,8 @@ class ContainerTreeSummarizer(MultiSummarizer):
             current = todo.popleft()
             label = current['name']
             sort_key = current['created_at']
-            if current['uuid'].find('-xvhdp-') > 0:
-                current = arv.containers().get(uuid=current['container_uuid']).execute()
 
-            summer = ContainerSummarizer(current, label=label, **kwargs)
+            summer = ContainerRequestSummarizer(current, label=label, **kwargs)
             summer.sort_key = sort_key
             children[current['uuid']] = summer
 
@@ -678,7 +684,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
                 child_crs = arv.container_requests().index(
                     order=['uuid asc'],
                     filters=page_filters+[
-                        ['requesting_container_uuid', '=', current['uuid']]],
+                        ['requesting_container_uuid', '=', current['container_uuid']]],
                 ).execute()
                 if not child_crs['items']:
                     break
@@ -696,7 +702,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
         sorted_children = collections.OrderedDict()
         for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
             sorted_children[uuid] = children[uuid]
-        super(ContainerTreeSummarizer, self).__init__(
+        super(ContainerRequestTreeSummarizer, self).__init__(
             children=sorted_children,
             label=root['name'],
             **kwargs)
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz
deleted file mode 100644 (file)
index ff7dd30..0000000
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz
new file mode 100644 (file)
index 0000000..3fe0220
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz differ
similarity index 66%
rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz
rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz
index 249ad22e47157aa24a7a42304533676b150ce652..fc01ce9a8f124e2fe3d88ef20394e966700b2326 100644 (file)
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
new file mode 100644 (file)
index 0000000..5152e57
--- /dev/null
@@ -0,0 +1,39 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+# Temp disk utilization 1.21%
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
+#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
index 0270eaaec06d7fa521e8279022035b6a3bf5bd01..fb23eab39e9072f9b44ac5e3b766d25c524e5668 100644 (file)
@@ -67,29 +67,37 @@ class SummarizeEdgeCases(unittest.TestCase):
         s.run()
 
 
-class SummarizeContainer(ReportDiff):
+class SummarizeContainerCommon(ReportDiff):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
         'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
     }
     fake_request = {
-        'uuid': '9tee4-xvhdp-uper95jktm10d3w',
+        'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y',
         'name': 'container',
         'created_at': '2017-08-18T14:27:25.242339223Z',
         'container_uuid': fake_container['uuid'],
-    }
-    reportfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
+        'runtime_constraints': {
+            'vcpus': 1,
+            'ram': 2621440000
+            },
+        'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y'
+        }
+
     logfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz')
     arvmountlog = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
-    def test_container(self, mock_api, mock_cr):
-        mock_api().container_requests().index().execute.return_value = {'items':[]}
+    def check_common(self, mock_api, mock_cr):
+        items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
+        # Index and list mean the same thing, but are used in different places in the
+        # code. It's fragile, but exploit that fact to distinguish the two uses.
+        mock_api().container_requests().index().execute.return_value = {'items': [] }  # child_crs
+        mock_api().container_requests().list().execute.side_effect = items # parent request
         mock_api().container_requests().get().execute.return_value = self.fake_request
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
@@ -102,12 +110,31 @@ class SummarizeContainer(ReportDiff):
                 return UTF8Decode(gzip.open(self.arvmountlog))
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--container', self.fake_request['uuid']])
+            self.arg_strings)
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
         self.diff_known_report(self.reportfile, cmd)
 
 
+
+class SummarizeContainer(SummarizeContainerCommon):
+    uuid = '9tee4-dz642-lymtndkpy39eibk'
+    reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid)
+    arg_strings = ['--container', uuid, '-v', '-v']
+
+    def test_container(self):
+        self.check_common()
+
+
+class SummarizeContainerRequest(SummarizeContainerCommon):
+    uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y'
+    reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid)
+    arg_strings = ['--container-request', uuid, '-v', '-v']
+
+    def test_container_request(self):
+        self.check_common()
+
+
 class SummarizeJob(ReportDiff):
     fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'