projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
12431: Use libcloud fork 2.2.2.dev1
[arvados.git]
/
services
/
nodemanager
/
arvnodeman
/
daemon.py
diff --git
a/services/nodemanager/arvnodeman/daemon.py
b/services/nodemanager/arvnodeman/daemon.py
index 7e63c782ede1fecee931d088505aed549a21c9df..ca3029d9e1bc3c376b119cca367b3767f3a8bb45 100644
(file)
--- a/
services/nodemanager/arvnodeman/daemon.py
+++ b/
services/nodemanager/arvnodeman/daemon.py
@@
-1,4
+1,7
@@
#!/usr/bin/env python
#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
from __future__ import absolute_import, print_function
from __future__ import absolute_import, print_function
@@
-75,7
+78,10
@@
class _ArvadosNodeTracker(_BaseNodeTracker):
item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
def find_stale_node(self, stale_time):
item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
def find_stale_node(self, stale_time):
- for record in self.nodes.itervalues():
+ # Try to select a stale node record that have an assigned slot first
+ for record in sorted(self.nodes.itervalues(),
+ key=lambda r: r.arvados_node['slot_number'],
+ reverse=True):
node = record.arvados_node
if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
stale_time) and
node = record.arvados_node
if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
stale_time) and
@@
-387,7
+393,7
@@
class NodeManagerDaemonActor(actor_class):
arvados_client=self._new_arvados(),
arvados_node=arvados_node,
cloud_client=self._new_cloud(),
arvados_client=self._new_arvados(),
arvados_node=arvados_node,
cloud_client=self._new_cloud(),
- cloud_size=
cloud_size
).proxy()
+ cloud_size=
self.server_calculator.find_size(cloud_size.id)
).proxy()
self.booting[new_setup.actor_ref.actor_urn] = new_setup
self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size
self.booting[new_setup.actor_ref.actor_urn] = new_setup
self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size
@@
-495,8
+501,19
@@
class NodeManagerDaemonActor(actor_class):
except pykka.ActorDeadError:
return
cloud_node_id = cloud_node.id
except pykka.ActorDeadError:
return
cloud_node_id = cloud_node.id
- record = self.cloud_nodes[cloud_node_id]
- shutdown_actor.stop()
+
+ try:
+ shutdown_actor.stop()
+ except pykka.ActorDeadError:
+ pass
+
+ try:
+ record = self.cloud_nodes[cloud_node_id]
+ except KeyError:
+ # Cloud node was already removed from the cloud node list
+ # supposedly while the destroy_node call was finishing its
+ # job.
+ return
record.shutdown_actor = None
if not success:
record.shutdown_actor = None
if not success: