9161: Don't automatically consider nodes with job_uuid set to be 'busy'.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 16 May 2016 18:30:06 +0000 (14:30 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 16 May 2016 18:30:06 +0000 (14:30 -0400)
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py

index f9dbd20e6b71c1c5b8746885628942ea207f9f4a..96b2353ce034ab0caa739408ec0e7d36d2f138fb 100644 (file)
@@ -371,8 +371,13 @@ class ComputeNodeMonitorActor(config.actor_class):
         if arvados_node_missing(self.arvados_node, self.node_stale_after):
             state = 'down'
 
-        if state == 'idle' and self.arvados_node['job_uuid']:
-            state = 'busy'
+        # Turns out using 'job_uuid' this way is a bad idea.  The node record
+        # is assigned the job_uuid before the job is locked (which removes it
+        # from the queue) which means the job will be double-counted as both in
+        # the wishlist and but also keeping a node busy.  This end result is
+        # excess nodes being booted.
+        #if state == 'idle' and self.arvados_node['job_uuid']:
+        #    state = 'busy'
 
         return state