From 5c549965a11b6a2ce789c1e0db9e418f695aed84 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 16 May 2016 14:30:06 -0400 Subject: [PATCH] 9161: Don't automatically consider nodes with job_uuid set to be 'busy'. --- .../arvnodeman/computenode/dispatch/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index f9dbd20e6b..96b2353ce0 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -371,8 +371,13 @@ class ComputeNodeMonitorActor(config.actor_class): if arvados_node_missing(self.arvados_node, self.node_stale_after): state = 'down' - if state == 'idle' and self.arvados_node['job_uuid']: - state = 'busy' + # Turns out using 'job_uuid' this way is a bad idea. The node record + # is assigned the job_uuid before the job is locked (which removes it + # from the queue) which means the job will be double-counted as both in + # the wishlist and but also keeping a node busy. This end result is + # excess nodes being booted. + #if state == 'idle' and self.arvados_node['job_uuid']: + # state = 'busy' return state -- 2.30.2