12085: Idle node times tracking, with tests.
[arvados.git] / services / nodemanager / arvnodeman / computenode / dispatch / __init__.py
index 597a011e72075975706a42a31ed03bc74ff42b36..9106ea67ccc8ffac7813d64baa5ebc537548fa21 100644 (file)
@@ -20,6 +20,7 @@ from .. import \
     arvados_node_missing, RetryMixin
 from ...clientactor import _notify_subscribers
 from ... import config
+from ... import status
 from .transitions import transitions
 
 QuotaExceeded = "QuotaExceeded"
@@ -122,7 +123,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
     def prepare_arvados_node(self, node):
         self._clean_arvados_node(node, "Prepared by Node Manager")
         self.arvados_node = self._arvados.nodes().update(
-            body={}, assign_slot=True).execute()
+            uuid=node['uuid'], body={}, assign_slot=True).execute()
         self._later.create_cloud_node()
 
     @ComputeNodeStateChangeBase._finish_on_exception
@@ -272,6 +273,9 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
                 self.cancel_shutdown("No longer eligible for shut down because %s" % reason,
                                      try_resume=True)
                 return
+        # If boot failed, count the event
+        if self._monitor.get_state().get() == 'unpaired':
+            status.tracker.counter_add('boot_failures')
         self._destroy_node()
 
     def _destroy_node(self):
@@ -409,6 +413,12 @@ class ComputeNodeMonitorActor(config.actor_class):
         #if state == 'idle' and self.arvados_node['job_uuid']:
         #    state = 'busy'
 
+        # Update idle node times tracker
+        if state == 'idle':
+            status.tracker.idle_in(self.arvados_node['hostname'])
+        else:
+            status.tracker.idle_out(self.arvados_node['hostname'])
+
         return state
 
     def in_state(self, *states):