From 5c0a4eb517a5f0b81e11df1b610fafdf3fab1dcc Mon Sep 17 00:00:00 2001 From: Lucas Di Pentima Date: Wed, 28 Mar 2018 15:51:14 -0300 Subject: [PATCH] 12085: Idle node times tracking, with tests. Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima --- .../computenode/dispatch/__init__.py | 6 ++++ services/nodemanager/arvnodeman/status.py | 22 +++++++++++++- .../tests/test_computenode_dispatch.py | 10 +++++++ services/nodemanager/tests/test_status.py | 29 +++++++++++++++++-- 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 340668eff5..9106ea67cc 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -413,6 +413,12 @@ class ComputeNodeMonitorActor(config.actor_class): #if state == 'idle' and self.arvados_node['job_uuid']: # state = 'busy' + # Update idle node times tracker + if state == 'idle': + status.tracker.idle_in(self.arvados_node['hostname']) + else: + status.tracker.idle_out(self.arvados_node['hostname']) + return state def in_state(self, *states): diff --git a/services/nodemanager/arvnodeman/status.py b/services/nodemanager/arvnodeman/status.py index b2eb7c827f..20e30e1417 100644 --- a/services/nodemanager/arvnodeman/status.py +++ b/services/nodemanager/arvnodeman/status.py @@ -6,6 +6,7 @@ from __future__ import absolute_import, print_function from future import standard_library import http.server +import time import json import logging import socketserver @@ -82,10 +83,16 @@ class Tracker(object): 'actor_exceptions': 0 } self._version = {'Version' : __version__} + self._idle_nodes = {} def get_json(self): with self._mtx: - return json.dumps(dict(self._latest, **self._version)) + times = {'idle_times' : {}} + now = time.time() + for node, ts in self._idle_nodes.items(): + times['idle_times'][node] = int(now - ts) + return json.dumps( + dict(dict(self._latest, **self._version), **times)) def keys(self): with self._mtx: @@ -104,4 +111,17 @@ class Tracker(object): self._latest.setdefault(counter, 0) self._latest[counter] += value + def idle_in(self, nodename): + with self._mtx: + if self._idle_nodes.get(nodename): + return + self._idle_nodes[nodename] = time.time() + + def idle_out(self, nodename): + with self._mtx: + try: + del self._idle_nodes[nodename] + except KeyError: + pass + tracker = Tracker() diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py index d93c940d3f..5775aa659a 100644 --- a/services/nodemanager/tests/test_computenode_dispatch.py +++ b/services/nodemanager/tests/test_computenode_dispatch.py @@ -376,16 +376,26 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.assertTrue(self.node_state('down')) def test_in_idle_state(self): + idle_nodes_before = status.tracker._idle_nodes.keys() self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None)) self.assertTrue(self.node_state('idle')) self.assertFalse(self.node_state('busy')) self.assertTrue(self.node_state('idle', 'busy')) + idle_nodes_after = status.tracker._idle_nodes.keys() + new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before] + # There should be 1 additional idle node + self.assertEqual(1, len(new_idle_nodes)) def test_in_busy_state(self): + idle_nodes_before = status.tracker._idle_nodes.keys() self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True)) self.assertFalse(self.node_state('idle')) self.assertTrue(self.node_state('busy')) self.assertTrue(self.node_state('idle', 'busy')) + idle_nodes_after = status.tracker._idle_nodes.keys() + new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before] + # There shouldn't be any additional idle node + self.assertEqual(0, len(new_idle_nodes)) def test_init_shutdown_scheduling(self): self.make_actor() diff --git a/services/nodemanager/tests/test_status.py b/services/nodemanager/tests/test_status.py index a3f0d177f7..2088a0d5bf 100644 --- a/services/nodemanager/tests/test_status.py +++ b/services/nodemanager/tests/test_status.py @@ -7,6 +7,8 @@ from __future__ import absolute_import, print_function from future import standard_library import json +import mock +import random import requests import unittest @@ -62,16 +64,39 @@ class StatusServerUpdates(unittest.TestCase): def test_counters(self): with TestServer() as srv: resp = srv.get_status() - # Test initial values + # Test counters existance for counter in ['cloud_errors', 'boot_failures', 'actor_exceptions']: self.assertIn(counter, resp) - self.assertEqual(0, resp[counter]) # Test counter increment for count in range(1, 3): status.tracker.counter_add('a_counter') resp = srv.get_status() self.assertEqual(count, resp['a_counter']) + @mock.patch('time.time') + def test_idle_times(self, time_mock): + with TestServer() as srv: + resp = srv.get_status() + node_name = 'idle_compute{}'.format(random.randint(1, 1024)) + self.assertIn('idle_times', resp) + # Test add an idle node + time_mock.return_value = 10 + status.tracker.idle_in(node_name) + time_mock.return_value += 10 + resp = srv.get_status() + self.assertEqual(10, resp['idle_times'][node_name]) + # Test adding the same idle node a 2nd time + time_mock.return_value += 10 + status.tracker.idle_in(node_name) + time_mock.return_value += 10 + resp = srv.get_status() + # Idle timestamp doesn't get reset if already exists + self.assertEqual(30, resp['idle_times'][node_name]) + # Test remove idle node + status.tracker.idle_out(node_name) + resp = srv.get_status() + self.assertNotIn(node_name, resp['idle_times']) + class StatusServerDisabled(unittest.TestCase): def test_config_disabled(self): -- 2.30.2