8123: Aim 5% below GiB memory size boundaries.
authorTom Clegg <tom@curoverse.com>
Tue, 22 Dec 2015 21:16:15 +0000 (16:16 -0500)
committerTom Clegg <tom@curoverse.com>
Tue, 12 Jan 2016 15:47:21 +0000 (10:47 -0500)
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
tools/crunchstat-summary/tests/test_examples.py

index 49b67ffa1f4c4cf4fccd33bec0e74c4b4e58ffef..d221363d088e61ac99d7f3217ab77962c632ce0a 100644 (file)
@@ -12,6 +12,11 @@ import sys
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
+# Recommend memory constraints that are this multiple of an integral
+# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
+# that have amounts like 7.5 GiB according to the kernel.)
+AVAILABLE_RAM_RATIO = 0.95
+
 class Summarizer(object):
     existing_constraints = {}
 
@@ -159,7 +164,7 @@ class Summarizer(object):
                 int(used_cores))
 
     def _recommend_ram(self):
-        """Recommend asking for 2048 MiB RAM if max rss was 1248 MiB"""
+        """Recommend asking for (2048*0.95) MiB RAM if max rss was 1248 MiB"""
 
         used_ram = self.stats_max['mem']['rss']
         if used_ram == float('-Inf'):
@@ -167,14 +172,16 @@ class Summarizer(object):
             return
         used_ram = math.ceil(float(used_ram) / (1<<20))
         asked_ram = self.existing_constraints.get('min_ram_mb_per_node')
-        if asked_ram is None or math.ceil(used_ram/(1<<10)) < asked_ram/(1<<10):
+        if asked_ram is None or (
+                math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10)) <
+                (asked_ram/AVAILABLE_RAM_RATIO)/(1<<10)):
             yield (
-                '#!! {} never used more than {} MiB RAM -- '
+                '#!! {} max RSS was {} MiB -- '
                 'try runtime_constraints "min_ram_mb_per_node":{}'
             ).format(
                 self.label,
                 int(used_ram),
-                int(math.ceil(used_ram/(1<<10))*(1<<10)))
+                int(math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10))*(1<<10)*AVAILABLE_RAM_RATIO))
 
     def _format(self, val):
         """Return a string representation of a stat.
index c94cd24d8a102b2847fe70166d7cfab4d5d00aee..b12e93117f3e1488d6536d2b0181c27986003d8a 100644 (file)
@@ -29,4 +29,4 @@ time  elapsed 80      -       80
 # Max network traffic in a single task: 1.79GB
 # Max network speed in a single interval: 42.58MB/s
 #!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
-#!! job never used more than 334 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
index e71182449374c7f90bc5a6a66166794ba42d26ac..8e1a2d893777a98e3583ba449af680d2bd1e3a57 100644 (file)
@@ -15,4 +15,4 @@ time  elapsed 2       -       4
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
index 5772cb44504a5317c29075315d53735734efd953..dbe9321a040183150733a5742e70eb978d662c5e 100644 (file)
@@ -15,4 +15,4 @@ time  elapsed 2       -       3
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
index a19d7ad0a4fdfa0465a2ee614a9f0d39bb6ee0b5..cf810ae87e8f9a51e44248f66c46b8bae2b2f75d 100644 (file)
@@ -70,7 +70,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000000',
                     'log': 'fake-log-pdh-0',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },
@@ -80,7 +80,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000001',
                     'log': 'fake-log-pdh-1',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },
@@ -96,7 +96,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000002',
                     'log': 'fake-log-pdh-2',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },