Merge branch '13164-container-deadlock'
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 18 May 2018 19:10:53 +0000 (15:10 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 18 May 2018 19:10:53 +0000 (15:10 -0400)
refs #13164

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/api/app/controllers/arvados/v1/containers_controller.rb
services/api/app/models/container.rb

index 6ec92b0ba66f9a59bc978844563a68d84d20f417..fa29dbd8135453587cee7a7fcfeb220f864d0755 100644 (file)
@@ -20,10 +20,13 @@ class Arvados::V1::ContainersController < ApplicationController
     show
   end
 
-  # Updates use row locking to resolve races between multiple
-  # dispatchers trying to lock the same container.
   def update
-    @object.with_lock do
+    # container updates can trigger container request lookups, which
+    # can deadlock if we don't lock the container_requests table
+    # first.
+    @object.transaction do
+      ActiveRecord::Base.connection.execute('LOCK container_requests, containers IN EXCLUSIVE MODE')
+      @object.reload
       super
     end
   end
index e9d4f836589cb5933307b869cc6bff8230c894d0..1dbdb571050a70ec3a684f18f335269ac35fd6f8 100644 (file)
@@ -316,11 +316,11 @@ class Container < ArvadosModel
     # (because state might have changed while acquiring the lock).
     check_lock_fail
     transaction do
-      begin
-        reload(lock: 'FOR UPDATE NOWAIT')
-      rescue
-        raise LockFailedError.new("cannot lock: other transaction in progress")
-      end
+      # Locking involves assigning auth_uuid, which involves looking
+      # up container requests, so we must lock both tables in the
+      # proper order to avoid deadlock.
+      ActiveRecord::Base.connection.execute('LOCK container_requests, containers IN EXCLUSIVE MODE')
+      reload
       check_lock_fail
       update_attributes!(state: Locked)
     end