Merge branch '12446-dispatcher-query' closes #12446
[arvados.git] / services / nodemanager / arvnodeman / nodelist.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 from __future__ import absolute_import, print_function
7
8 import subprocess
9
10 from . import clientactor
11 from . import config
12
13 import arvados.util
14
15 class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
16     """Actor to poll the Arvados node list.
17
18     This actor regularly polls the list of Arvados node records, and
19     sends it to subscribers.
20     """
21
22     def is_common_error(self, exception):
23         return isinstance(exception, config.ARVADOS_ERRORS)
24
25     def _item_key(self, node):
26         return node['uuid']
27
28     def _send_request(self):
29         nodelist = arvados.util.list_all(self._client.nodes().list)
30
31         # node hostname, state
32         sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
33         nodestates = {}
34         for out in sinfo_out.splitlines():
35             try:
36                 nodename, state = out.split(" ", 2)
37                 if state in ('alloc', 'alloc*',
38                              'comp',  'comp*',
39                              'mix',   'mix*',
40                              'drng',  'drng*'):
41                     nodestates[nodename] = 'busy'
42                 elif state == 'idle':
43                     nodestates[nodename] = 'idle'
44                 else:
45                     nodestates[nodename] = 'down'
46             except ValueError:
47                 pass
48
49         for n in nodelist:
50             if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
51                 n["crunch_worker_state"] = nodestates[n["hostname"]]
52             else:
53                 n["crunch_worker_state"] = 'down'
54
55         return nodelist
56
57 class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
58     """Actor to poll the cloud node list.
59
60     This actor regularly polls the cloud to get a list of running compute
61     nodes, and sends it to subscribers.
62     """
63
64     def __init__(self, client, timer_actor, server_calc, *args, **kwargs):
65         super(CloudNodeListMonitorActor, self).__init__(
66             client, timer_actor, *args, **kwargs)
67         self._calculator = server_calc
68
69     def is_common_error(self, exception):
70         return isinstance(exception, config.CLOUD_ERRORS)
71
72     def _item_key(self, node):
73         return node.id
74
75     def _send_request(self):
76         nodes = self._client.list_nodes()
77         for n in nodes:
78             # Replace with libcloud NodeSize object with compatible
79             # CloudSizeWrapper object which merges the size info reported from
80             # the cloud with size information from the configuration file.
81             n.size = self._calculator.find_size(n.size.id)
82         return nodes