Merge branch 'master' into 8079-api-client-auth-uuid
[arvados.git] / tools / crunchstat-summary / crunchstat_summary / summarizer.py
index 6f8b74cc72caea8e412caab68751197f38116a14..f422501b10ff1858f9b636621aaaba4bad662d5b 100644 (file)
@@ -197,6 +197,8 @@ class Summarizer(object):
         return label
 
     def text_report(self):
+        if not self.tasks:
+            return "(no report generated)\n"
         return "\n".join(itertools.chain(
             self._text_report_gen(),
             self._recommend_gen())) + "\n"
@@ -226,7 +228,8 @@ class Summarizer(object):
                  lambda x: x * 100),
                 ('Overall CPU usage: {}%',
                  self.job_tot['cpu']['user+sys'] /
-                 self.job_tot['time']['elapsed'],
+                 self.job_tot['time']['elapsed']
+                 if self.job_tot['time']['elapsed'] > 0 else 0,
                  lambda x: x * 100),
                 ('Max memory used by a single task: {}GB',
                  self.stats_max['mem']['rss'],
@@ -333,19 +336,20 @@ class Summarizer(object):
                 int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024))
 
     def _recommend_keep_cache(self):
-        """Recommend increasing keep cache if miss rate is above 0.5%"""
-        if self.job_tot['keepcalls']['get'] == 0:
+        """Recommend increasing keep cache if utilization < 80%"""
+        if self.job_tot['net:keep0']['rx'] == 0:
             return
-        miss_rate = float(self.job_tot['keepcache']['miss']) / float(self.job_tot['keepcalls']['get']) * 100.0
+        utilization = (float(self.job_tot['blkio:0:0']['read']) /
+                       float(self.job_tot['net:keep0']['rx']))
         asked_mib = self.existing_constraints.get('keep_cache_mb_per_task', 256)
 
-        if miss_rate > 0.5:
+        if utilization < 0.8:
             yield (
-                '#!! {} Keep cache miss rate was {:.2f}% -- '
-                'try runtime_constraints "keep_cache_mb_per_task":{}'
+                '#!! {} Keep cache utilization was {:.2f}% -- '
+                'try runtime_constraints "keep_cache_mb_per_task":{} (or more)'
             ).format(
                 self.label,
-                miss_rate,
+                utilization * 100.0,
                 asked_mib*2)
 
 
@@ -374,7 +378,7 @@ class JobSummarizer(Summarizer):
         else:
             self.job = job
         rdr = None
-        if self.job['log']:
+        if self.job.get('log'):
             try:
                 rdr = crunchstat_summary.reader.CollectionReader(self.job['log'])
             except arvados.errors.NotFoundError as e:
@@ -400,15 +404,12 @@ class PipelineSummarizer(object):
             if 'job' not in component:
                 logger.warning(
                     "%s: skipping component with no job assigned", cname)
-            elif component['job'].get('log') is None:
-                logger.warning(
-                    "%s: skipping job %s with no log available",
-                    cname, component['job'].get('uuid'))
             else:
                 logger.info(
-                    "%s: logdata %s", cname, component['job']['log'])
+                    "%s: job %s", cname, component['job']['uuid'])
                 summarizer = JobSummarizer(component['job'], **kwargs)
-                summarizer.label = cname
+                summarizer.label = '{} {}'.format(
+                    cname, component['job']['uuid'])
                 self.summarizers[cname] = summarizer
         self.label = pipeline_instance_uuid