6520: Expand recognized 'busy' states.
[arvados.git] / services / nodemanager / arvnodeman / nodelist.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import subprocess
6
7 from . import clientactor
8 from . import config
9
10 import arvados.util
11
12 class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
13     """Actor to poll the Arvados node list.
14
15     This actor regularly polls the list of Arvados node records, and
16     sends it to subscribers.
17     """
18
19     def is_common_error(self, exception):
20         return isinstance(exception, config.ARVADOS_ERRORS)
21
22     def _item_key(self, node):
23         return node['uuid']
24
25     def _send_request(self):
26         nodelist = arvados.util.list_all(self._client.nodes().list)
27
28         # node hostname, state
29         sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
30         nodestates = {}
31         for out in sinfo_out.splitlines():
32             nodename, state = out.split(" ", 2)
33             if state in ('alloc', 'alloc*',
34                          'comp',  'comp*',
35                          'mix',   'mix*',
36                          'drng',  'drng*'):
37                 nodestates[nodename] = 'busy'
38             elif state == 'idle':
39                 nodestates[nodename] = 'idle'
40             else:
41                 nodestates[nodename] = 'down'
42
43         for n in nodelist:
44             if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
45                 n["crunch_worker_state"] = nodestates[n["hostname"]]
46             else:
47                 n["crunch_worker_state"] = 'down'
48
49         return nodelist
50
51 class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
52     """Actor to poll the cloud node list.
53
54     This actor regularly polls the cloud to get a list of running compute
55     nodes, and sends it to subscribers.
56     """
57
58     def __init__(self, client, timer_actor, server_calc, *args, **kwargs):
59         super(CloudNodeListMonitorActor, self).__init__(
60             client, timer_actor, *args, **kwargs)
61         self._calculator = server_calc
62
63     def is_common_error(self, exception):
64         return self._client.is_cloud_exception(exception)
65
66     def _item_key(self, node):
67         return node.id
68
69     def _send_request(self):
70         nodes = self._client.list_nodes()
71         for n in nodes:
72             # Replace with libcloud NodeSize object with compatible
73             # CloudSizeWrapper object which merges the size info reported from
74             # the cloud with size information from the configuration file.
75             n.size = self._calculator.find_size(n.size.id)
76         return nodes