From eef816fa8450fd3ce5388f4701ed888691ba897a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 17 Jul 2018 16:53:31 -0400 Subject: [PATCH] 13804: Update comments for comments for "consecutive_idle_count" Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .../install-nodemanager.html.textile.liquid | 27 +++++++++++++++++++ .../computenode/dispatch/__init__.py | 2 +- services/nodemanager/doc/azure.example.cfg | 10 +++++++ services/nodemanager/doc/ec2.example.cfg | 9 +++++++ services/nodemanager/doc/gce.example.cfg | 9 +++++++ 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/doc/install/install-nodemanager.html.textile.liquid b/doc/install/install-nodemanager.html.textile.liquid index 09c6b5cb1b..6e2be8bc03 100644 --- a/doc/install/install-nodemanager.html.textile.liquid +++ b/doc/install/install-nodemanager.html.textile.liquid @@ -113,6 +113,15 @@ boot_fail_after = 1800 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. @@ -282,6 +291,15 @@ poll_stale_after = 600 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. @@ -470,6 +488,15 @@ boot_fail_after = 1800 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 30ca16805d..bdc179587c 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -458,7 +458,7 @@ class ComputeNodeMonitorActor(config.actor_class): boot_grace = "boot exceeded" if crunch_worker_state == "idle": - # Must report as "idle" at least two consecutive times + # Must report as "idle" at least "consecutive_idle_count" times if self.consecutive_idle < self.consecutive_idle_count: idle_grace = 'idle wait' else: diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg index efd2445175..8ba68018d5 100644 --- a/services/nodemanager/doc/azure.example.cfg +++ b/services/nodemanager/doc/azure.example.cfg @@ -65,6 +65,15 @@ boot_fail_after = 1800 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. @@ -74,6 +83,7 @@ node_mem_scaling = 0.95 # File path for Certificate Authorities certs_file = /etc/ssl/certs/ca-certificates.crt + [Logging] # Log file path file = /var/log/arvados/node-manager.log diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg index a1fa2dc32c..2b6152a9a0 100644 --- a/services/nodemanager/doc/ec2.example.cfg +++ b/services/nodemanager/doc/ec2.example.cfg @@ -65,6 +65,15 @@ boot_fail_after = 1800 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg index 8a244a4444..acd3fd1e3e 100644 --- a/services/nodemanager/doc/gce.example.cfg +++ b/services/nodemanager/doc/gce.example.cfg @@ -54,6 +54,15 @@ poll_stale_after = 600 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + # Scaling factor to be applied to nodes' available RAM size. Usually there's a # variable discrepancy between the advertised RAM value on cloud nodes and the # actual amount available. -- 2.30.2