X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/707ed2964a24ebe940ae440af22fdcb56781fcbd..5c0a4eb517a5f0b81e11df1b610fafdf3fab1dcc:/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 597a011e72..9106ea67cc 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -20,6 +20,7 @@ from .. import \ arvados_node_missing, RetryMixin from ...clientactor import _notify_subscribers from ... import config +from ... import status from .transitions import transitions QuotaExceeded = "QuotaExceeded" @@ -122,7 +123,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase): def prepare_arvados_node(self, node): self._clean_arvados_node(node, "Prepared by Node Manager") self.arvados_node = self._arvados.nodes().update( - body={}, assign_slot=True).execute() + uuid=node['uuid'], body={}, assign_slot=True).execute() self._later.create_cloud_node() @ComputeNodeStateChangeBase._finish_on_exception @@ -272,6 +273,9 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase): self.cancel_shutdown("No longer eligible for shut down because %s" % reason, try_resume=True) return + # If boot failed, count the event + if self._monitor.get_state().get() == 'unpaired': + status.tracker.counter_add('boot_failures') self._destroy_node() def _destroy_node(self): @@ -409,6 +413,12 @@ class ComputeNodeMonitorActor(config.actor_class): #if state == 'idle' and self.arvados_node['job_uuid']: # state = 'busy' + # Update idle node times tracker + if state == 'idle': + status.tracker.idle_in(self.arvados_node['hostname']) + else: + status.tracker.idle_out(self.arvados_node['hostname']) + return state def in_state(self, *states):