Merge branch '2278-crunch-dispatcher-monitor-processes'
authorPeter Amstutz <peter.amstutz@curoverse.com>
Thu, 20 Mar 2014 19:38:24 +0000 (15:38 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Thu, 20 Mar 2014 19:38:24 +0000 (15:38 -0400)
sdk/cli/bin/arv-run-pipeline-instance
services/api/app/controllers/arvados/v1/jobs_controller.rb
services/api/app/models/job.rb
services/api/script/crunch-dispatch.rb

index 91d7192c076ba76c6ec1017c9012b8d1e6739bc5..1e62a52c04dcf9edbd9106a42d9c2124d07967e3 100755 (executable)
@@ -558,12 +558,14 @@ class WhRunPipelineInstance
           ended += 1
           if c[:job][:success] == true
             succeeded += 1
+          elsif c[:job][:success] == false
+            failed += 1
           end
         end
       end
     end
     
-    if ended == @components.length
+    if ended == @components.length or failed > 0
       @instance[:active] = false
       @instance[:success] = (succeeded == @components.length)
     end
index a715d0ef29d8117dea8de020af6084234af2e52d..b7b0e677d63d5b5d2e5389a27a42ab1f709f34ad 100644 (file)
@@ -145,7 +145,8 @@ class Arvados::V1::JobsController < ApplicationController
     @where.merge!({
                     started_at: nil,
                     is_locked_by_uuid: nil,
-                    cancelled_at: nil
+                    cancelled_at: nil,
+                    success: nil
                   })
     params[:order] ||= 'priority desc, created_at'
     find_objects_for_index
index 9c8f724120ba8049b4a498141e669c5708ca0317..1f0ef758d97a893664e9e82fb9131b90de688995 100644 (file)
@@ -51,8 +51,8 @@ class Job < ArvadosModel
   end
 
   def self.queue
-    self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ?',
-               nil, nil, nil).
+    self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ? and success is ?',
+               nil, nil, nil, nil).
       order('priority desc, created_at')
   end
 
@@ -108,7 +108,8 @@ class Job < ArvadosModel
 
   def permission_to_update
     if is_locked_by_uuid_was and !(current_user and
-                                   current_user.uuid == is_locked_by_uuid_was)
+                                   (current_user.uuid == is_locked_by_uuid_was or
+                                    current_user.uuid == system_user.uuid))
       if script_changed? or
           script_parameters_changed? or
           script_version_changed? or
index 693dfbdb7fc049289067d94e866b48049bc8c2cb..9eb1c28cb2e3fa567a735f7b9556c57ba32e8f1f 100755 (executable)
@@ -298,7 +298,6 @@ class Dispatcher
     job_done = j_done[:job]
     $stderr.puts "dispatch: child #{pid_done} exit"
     $stderr.puts "dispatch: job #{job_done.uuid} end"
-    $redis.publish job_done.uuid, "end"
 
     # Ensure every last drop of stdout and stderr is consumed
     read_pipes
@@ -309,9 +308,18 @@ class Dispatcher
     # Wait the thread
     j_done[:wait_thr].value
 
+    jobrecord = Job.find_by_uuid(job_done.uuid)
+    jobrecord.running = false
+    jobrecord.finished_at ||= Time.now,
+    # Don't set 'jobrecord.success = false' because if the job failed to run due to an
+    # issue with crunch-job or slurm, we want the job to stay in the queue.
+    jobrecord.save!
+
     # Invalidate the per-job auth token
     j_done[:job_auth].update_attributes expires_at: Time.now
 
+    $redis.publish job_done.uuid, "end"
+
     @running.delete job_done.uuid
   end
 
@@ -360,8 +368,6 @@ class Dispatcher
     end
   end
 
-
-
   protected
 
   def did_recently(thing, min_interval)