#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
from __future__ import absolute_import, print_function
-import subprocess
+import subprocess32 as subprocess
from . import clientactor
from . import config
class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
"""Actor to poll the Arvados node list.
- This actor regularly polls the list of Arvados node records, and
- sends it to subscribers.
+ This actor regularly polls the list of Arvados node records,
+ augments it with the latest SLURM node info (`sinfo`), and sends
+ it to subscribers.
"""
def is_common_error(self, exception):
nodelist = arvados.util.list_all(self._client.nodes().list)
# node hostname, state
- sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
+ sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n|%t|%f"])
nodestates = {}
+ nodefeatures = {}
for out in sinfo_out.splitlines():
try:
- nodename, state = out.split(" ", 2)
- if state in ('alloc', 'alloc*',
- 'comp', 'comp*',
- 'mix', 'mix*',
- 'drng', 'drng*'):
- nodestates[nodename] = 'busy'
- elif state == 'idle':
- nodestates[nodename] = 'idle'
- else:
- nodestates[nodename] = 'down'
+ nodename, state, features = out.split("|", 3)
except ValueError:
- pass
+ continue
+ if state in ('alloc', 'alloc*',
+ 'comp', 'comp*',
+ 'mix', 'mix*',
+ 'drng', 'drng*'):
+ nodestates[nodename] = 'busy'
+ elif state in ('idle', 'fail'):
+ nodestates[nodename] = state
+ else:
+ nodestates[nodename] = 'down'
+ if features != "(null)":
+ nodefeatures[nodename] = features
for n in nodelist:
if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
n["crunch_worker_state"] = nodestates[n["hostname"]]
else:
n["crunch_worker_state"] = 'down'
+ n["slurm_node_features"] = nodefeatures.get(n["hostname"], "")
return nodelist
def _send_request(self):
nodes = self._client.list_nodes()
for n in nodes:
- # Replace with libcloud NodeSize object with compatible
+ # Replace the libcloud NodeSize object with compatible
# CloudSizeWrapper object which merges the size info reported from
# the cloud with size information from the configuration file.
- n.size = self._calculator.find_size(n.size.id)
+ n.size = self._calculator.find_size(n.extra['arvados_node_size'])
return nodes