12085: Idle node times tracking, with tests.
authorLucas Di Pentima <ldipentima@veritasgenetics.com>
Wed, 28 Mar 2018 18:51:14 +0000 (15:51 -0300)
committerLucas Di Pentima <ldipentima@veritasgenetics.com>
Thu, 5 Apr 2018 14:17:03 +0000 (11:17 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>

services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/status.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_status.py

index 340668eff5857e8a49d2f0bcbf711314e3002671..9106ea67ccc8ffac7813d64baa5ebc537548fa21 100644 (file)
@@ -413,6 +413,12 @@ class ComputeNodeMonitorActor(config.actor_class):
         #if state == 'idle' and self.arvados_node['job_uuid']:
         #    state = 'busy'
 
+        # Update idle node times tracker
+        if state == 'idle':
+            status.tracker.idle_in(self.arvados_node['hostname'])
+        else:
+            status.tracker.idle_out(self.arvados_node['hostname'])
+
         return state
 
     def in_state(self, *states):
index b2eb7c827f64d56eb978d6a4b45c5f7478c898f1..20e30e14178c331c48a10916450f28a5e0075094 100644 (file)
@@ -6,6 +6,7 @@ from __future__ import absolute_import, print_function
 from future import standard_library
 
 import http.server
+import time
 import json
 import logging
 import socketserver
@@ -82,10 +83,16 @@ class Tracker(object):
             'actor_exceptions': 0
         }
         self._version = {'Version' : __version__}
+        self._idle_nodes = {}
 
     def get_json(self):
         with self._mtx:
-            return json.dumps(dict(self._latest, **self._version))
+            times = {'idle_times' : {}}
+            now = time.time()
+            for node, ts in self._idle_nodes.items():
+                times['idle_times'][node] = int(now - ts)
+            return json.dumps(
+                dict(dict(self._latest, **self._version), **times))
 
     def keys(self):
         with self._mtx:
@@ -104,4 +111,17 @@ class Tracker(object):
             self._latest.setdefault(counter, 0)
             self._latest[counter] += value
 
+    def idle_in(self, nodename):
+        with self._mtx:
+            if self._idle_nodes.get(nodename):
+                return
+            self._idle_nodes[nodename] = time.time()
+
+    def idle_out(self, nodename):
+        with self._mtx:
+            try:
+                del self._idle_nodes[nodename]
+            except KeyError:
+                pass
+
 tracker = Tracker()
index d93c940d3f18e7d1ca2340f6cf75eca2fb2c17c2..5775aa659a31391f13a5071929d9f5562ba3969d 100644 (file)
@@ -376,16 +376,26 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertTrue(self.node_state('down'))
 
     def test_in_idle_state(self):
+        idle_nodes_before = status.tracker._idle_nodes.keys()
         self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
         self.assertTrue(self.node_state('idle'))
         self.assertFalse(self.node_state('busy'))
         self.assertTrue(self.node_state('idle', 'busy'))
+        idle_nodes_after = status.tracker._idle_nodes.keys()
+        new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before]
+        # There should be 1 additional idle node
+        self.assertEqual(1, len(new_idle_nodes))
 
     def test_in_busy_state(self):
+        idle_nodes_before = status.tracker._idle_nodes.keys()
         self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
         self.assertFalse(self.node_state('idle'))
         self.assertTrue(self.node_state('busy'))
         self.assertTrue(self.node_state('idle', 'busy'))
+        idle_nodes_after = status.tracker._idle_nodes.keys()
+        new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before]
+        # There shouldn't be any additional idle node
+        self.assertEqual(0, len(new_idle_nodes))
 
     def test_init_shutdown_scheduling(self):
         self.make_actor()
index a3f0d177f7e219095a88abcc37fc9ed50fa0e9f1..2088a0d5bf2b9d43bac9145a14e5d861c1828034 100644 (file)
@@ -7,6 +7,8 @@ from __future__ import absolute_import, print_function
 from future import standard_library
 
 import json
+import mock
+import random
 import requests
 import unittest
 
@@ -62,16 +64,39 @@ class StatusServerUpdates(unittest.TestCase):
     def test_counters(self):
         with TestServer() as srv:
             resp = srv.get_status()
-            # Test initial values
+            # Test counters existance
             for counter in ['cloud_errors', 'boot_failures', 'actor_exceptions']:
                 self.assertIn(counter, resp)
-                self.assertEqual(0, resp[counter])
             # Test counter increment
             for count in range(1, 3):
                 status.tracker.counter_add('a_counter')
                 resp = srv.get_status()
                 self.assertEqual(count, resp['a_counter'])
 
+    @mock.patch('time.time')
+    def test_idle_times(self, time_mock):
+        with TestServer() as srv:
+            resp = srv.get_status()
+            node_name = 'idle_compute{}'.format(random.randint(1, 1024))
+            self.assertIn('idle_times', resp)
+            # Test add an idle node
+            time_mock.return_value = 10
+            status.tracker.idle_in(node_name)
+            time_mock.return_value += 10
+            resp = srv.get_status()
+            self.assertEqual(10, resp['idle_times'][node_name])
+            # Test adding the same idle node a 2nd time
+            time_mock.return_value += 10
+            status.tracker.idle_in(node_name)
+            time_mock.return_value += 10
+            resp = srv.get_status()
+            # Idle timestamp doesn't get reset if already exists
+            self.assertEqual(30, resp['idle_times'][node_name])
+            # Test remove idle node
+            status.tracker.idle_out(node_name)
+            resp = srv.get_status()
+            self.assertNotIn(node_name, resp['idle_times'])
+
 
 class StatusServerDisabled(unittest.TestCase):
     def test_config_disabled(self):