15164: Add locking between container completion and reuse
[arvados.git] / services / api / app / models / container_request.rb
index 24882860ebb3e61bd434630441de7c60f827622f..45db4ee916f202352b658a695e0bae2184693e46 100644 (file)
@@ -119,13 +119,34 @@ class ContainerRequest < ArvadosModel
   end
 
   def finalize_if_needed
-    if state == Committed && Container.find_by_uuid(container_uuid).final?
-      reload
-      act_as_system_user do
-        leave_modified_by_user_alone do
-          finalize!
+    return if state != Committed
+    while true
+      # get container lock first, then lock current container request
+      # (same order as Container#handle_completed). Locking always
+      # reloads the Container and ContainerRequest records.
+      c = Container.find_by_uuid(container_uuid)
+      c.lock!
+      self.lock!
+
+      if container_uuid != c.uuid
+        # After locking, we've noticed a race, the container_uuid is
+        # different than the container record we just loaded.  This
+        # can happen if Container#handle_completed scheduled a new
+        # container for retry and set container_uuid while we were
+        # waiting on the container lock.  Restart the loop and get the
+        # new container.
+        redo
+      end
+
+      if state == Committed && c.final?
+        # The current container is
+        act_as_system_user do
+          leave_modified_by_user_alone do
+            finalize!
+          end
         end
       end
+      return true
     end
   end
 
@@ -210,7 +231,18 @@ class ContainerRequest < ArvadosModel
       return false
     end
     if state_changed? and state == Committed and container_uuid.nil?
-      self.container_uuid = Container.resolve(self).uuid
+      while true
+        c = Container.resolve(self)
+        c.lock!
+        if c.state == Container::Cancelled
+          # Lost a race, we have a lock on the container but the
+          # container was cancelled in a different request, restart
+          # the loop and resolve request to a new container.
+          redo
+        end
+        self.container_uuid = c.uuid
+        break
+      end
     end
     if self.container_uuid != self.container_uuid_was
       if self.container_count_changed?