Merge branch '20614-no-retry-cancel' refs #20614
[arvados.git] / services / api / app / models / container.rb
index 3c6c969bd28901c8151a34bc719f115e8d8fb61b..8a51d749f7137ec5ace615c365a234c23505180d 100644 (file)
@@ -5,6 +5,7 @@
 require 'log_reuse_info'
 require 'whitelist_update'
 require 'safe_json'
+require 'update_priorities'
 
 class Container < ArvadosModel
   include ArvadosModelUpdates
@@ -49,7 +50,6 @@ class Container < ArvadosModel
   before_save :clear_runtime_status_when_queued
   after_save :update_cr_logs
   after_save :handle_completed
-  after_save :propagate_priority
 
   has_many :container_requests, :foreign_key => :container_uuid, :class_name => 'ContainerRequest', :primary_key => :uuid
   belongs_to :auth, :class_name => 'ApiClientAuthorization', :foreign_key => :auth_uuid, :primary_key => :uuid
@@ -129,34 +129,8 @@ class Container < ArvadosModel
   # priority of a user-submitted request is a function of
   # user-assigned priority and request creation time.
   def update_priority!
-    return if ![Queued, Locked, Running].include?(state)
-    p = ContainerRequest.
-          where('container_uuid=? and priority>0 and state=?', uuid, ContainerRequest::Committed).
-          select("priority, requesting_container_uuid, created_at").
-          lock(true).
-          map do |cr|
-      if cr.requesting_container_uuid
-        Container.where(uuid: cr.requesting_container_uuid).pluck(:priority).first
-      else
-        (cr.priority << 50) - (cr.created_at.to_time.to_f * 1000).to_i
-      end
-    end.max || 0
-    update_attributes!(priority: p)
-  end
-
-  def propagate_priority
-    return true unless saved_change_to_priority?
-    act_as_system_user do
-      # Update the priority of child container requests to match new
-      # priority of the parent container (ignoring requests with no
-      # container assigned, because their priority doesn't matter).
-      ContainerRequest.
-        where('requesting_container_uuid = ? and state = ? and container_uuid is not null',
-              self.uuid, ContainerRequest::Committed).
-        pluck(:container_uuid).each do |container_uuid|
-        Container.find_by_uuid(container_uuid).update_priority!
-      end
-    end
+    update_priorities uuid
+    reload
   end
 
   # Create a new container (or find an existing one) to satisfy the
@@ -645,7 +619,7 @@ class Container < ArvadosModel
     # each requesting CR.
     return if self.final? || !saved_change_to_log?
     leave_modified_by_user_alone do
-      ContainerRequest.where(container_uuid: self.uuid).each do |cr|
+      ContainerRequest.where(container_uuid: self.uuid, state: ContainerRequest::Committed).each do |cr|
         cr.update_collections(container: self, collections: ['log'])
         cr.save!
       end
@@ -756,7 +730,21 @@ class Container < ArvadosModel
       self.with_lock do
         act_as_system_user do
           if self.state == Cancelled
-            retryable_requests = ContainerRequest.where("container_uuid = ? and priority > 0 and state = 'Committed' and container_count < container_count_max", uuid)
+            # Cancelled means the container didn't run to completion.
+            # This happens either because it was cancelled by the user
+            # or because there was an infrastructure failure.  We want
+            # to retry infrastructure failures automatically.
+            #
+            # Seach for live container requests to determine if we
+            # should retry the container.
+            retryable_requests = ContainerRequest.
+                                   joins('left outer join containers as requesting_container on container_requests.requesting_container_uuid = requesting_container.uuid').
+                                   where("container_requests.container_uuid = ? and "+
+                                         "container_requests.priority > 0 and "+
+                                         "(requesting_container.priority is null or (requesting_container.state = 'Running' and requesting_container.priority > 0)) and "+
+                                         "container_requests.state = 'Committed' and "+
+                                         "container_requests.container_count < container_requests.container_count_max", uuid).
+                                   order('container_requests.uuid asc')
           else
             retryable_requests = []
           end