# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
# that have amounts like 7.5 GiB according to the kernel.)
AVAILABLE_RAM_RATIO = 0.95
-
+MB=2**20
# Workaround datetime.datetime.strptime() thread-safety bug by calling
# it once before starting threads. https://bugs.python.org/issue7980
yield "# "+format_string.format(self._format(val))
def _recommend_gen(self):
+ # TODO recommend fixing job granularity if elapsed time is too short
return itertools.chain(
self._recommend_cpu(),
self._recommend_ram(),
constraint_key = self._map_runtime_constraint('vcpus')
cpu_max_rate = self.stats_max['cpu']['user+sys__rate']
- if cpu_max_rate == float('-Inf'):
+ if cpu_max_rate == float('-Inf') or cpu_max_rate == 0.0:
logger.warning('%s: no CPU usage data', self.label)
return
+ # TODO Don't necessarily want to recommend on isolated max peak
+ # take average CPU usage into account as well or % time at max
used_cores = max(1, int(math.ceil(cpu_max_rate)))
asked_cores = self.existing_constraints.get(constraint_key)
- if asked_cores is None or used_cores < asked_cores:
+ if asked_cores is None:
+ asked_cores = 1
+ # TODO: This should be more nuanced in cases where max >> avg
+ if used_cores < asked_cores:
yield (
'#!! {} max CPU usage was {}% -- '
- 'try runtime_constraints "{}":{}'
+ 'try reducing runtime_constraints to "{}":{}'
).format(
self.label,
math.ceil(cpu_max_rate*100),
constraint_key,
int(used_cores))
+ # FIXME: This needs to be updated to account for current nodemanager algorithms
def _recommend_ram(self):
"""Recommend an economical RAM constraint for this job.
if used_bytes == float('-Inf'):
logger.warning('%s: no memory usage data', self.label)
return
- used_mib = math.ceil(float(used_bytes) / 1048576)
+ used_mib = math.ceil(float(used_bytes) / MB)
asked_mib = self.existing_constraints.get(constraint_key)
nearlygibs = lambda mebibytes: mebibytes/AVAILABLE_RAM_RATIO/1024
- if asked_mib is None or (
- math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib)):
+ if used_mib > 0 and (asked_mib is None or (
+ math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib))):
yield (
'#!! {} max RSS was {} MiB -- '
- 'try runtime_constraints "{}":{}'
+ 'try reducing runtime_constraints to "{}":{}'
).format(
self.label,
int(used_mib),
constraint_key,
- int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024*(2**20)/self._runtime_constraint_mem_unit()))
+ int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024*(MB)/self._runtime_constraint_mem_unit()))
def _recommend_keep_cache(self):
"""Recommend increasing keep cache if utilization < 80%"""
return
utilization = (float(self.job_tot['blkio:0:0']['read']) /
float(self.job_tot['net:keep0']['rx']))
- asked_mib = self.existing_constraints.get(constraint_key, 256)
+ # FIXME: the default on this get won't work correctly
+ asked_cache = self.existing_constraints.get(constraint_key, 256) * self._runtime_constraint_mem_unit()
if utilization < 0.8:
yield (
'#!! {} Keep cache utilization was {:.2f}% -- '
- 'try runtime_constraints "{}":{} (or more)'
+ 'try doubling runtime_constraints to "{}":{} (or more)'
).format(
self.label,
utilization * 100.0,
constraint_key,
- asked_mib*2*(2**20)/self._runtime_constraint_mem_unit())
+ math.ceil(asked_cache * 2 / self._runtime_constraint_mem_unit()))
def _format(self, val):
class JobSummarizer(ProcessSummarizer):
- runtime_constraint_mem_unit = 1048576
+ runtime_constraint_mem_unit = MB
map_runtime_constraint = {
'keep_cache_ram': 'keep_cache_mb_per_task',
'ram': 'min_ram_mb_per_node',