11561: Add lock_count to containers, cancel container on unlock exceeded
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Thu, 14 Feb 2019 23:28:00 +0000 (18:28 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Thu, 14 Feb 2019 23:28:00 +0000 (18:28 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

services/api/app/models/container.rb
services/api/config/application.default.yml
services/api/db/migrate/20190214214814_add_container_lock_count.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/test/unit/container_test.rb

index bd586907ee2eaf205616251be126bc7cf9c94b09..3d1e6491150624bcbcbad9aa95a5e4dc202dd390 100644 (file)
@@ -346,7 +346,7 @@ class Container < ArvadosModel
     transaction do
       reload
       check_lock_fail
-      update_attributes!(state: Locked)
+      update_attributes!(state: Locked, lock_count: self.lock_count+1)
     end
   end
 
@@ -364,7 +364,14 @@ class Container < ArvadosModel
     transaction do
       reload(lock: 'FOR UPDATE')
       check_unlock_fail
-      update_attributes!(state: Queued)
+      if self.lock_count < Rails.configuration.max_container_dispatch_attempts
+        update_attributes!(state: Queued)
+      else
+        update_attributes!(state: Cancelled,
+                           runtime_status: {
+                             error: "Container exceeded 'max_container_dispatch_attempts' (lock_count=#{self.lock_count}."
+                           })
+      end
     end
   end
 
@@ -454,7 +461,7 @@ class Container < ArvadosModel
 
     case self.state
     when Locked
-      permitted.push :priority, :runtime_status, :log
+      permitted.push :priority, :runtime_status, :log, :lock_count
 
     when Queued
       permitted.push :priority
@@ -475,7 +482,7 @@ class Container < ArvadosModel
       when Running
         permitted.push :finished_at, *progress_attrs
       when Queued, Locked
-        permitted.push :finished_at, :log
+        permitted.push :finished_at, :log, :runtime_status
       end
 
     else
index dcf270e3fb5d1a59a25e9858fc65e2eb2b901c42..d0f3a4caeb11d9f931772a8b1036fb257c2632fe 100644 (file)
@@ -529,6 +529,10 @@ common:
   # > 0 = auto-create a new version when older than the specified number of seconds.
   preserve_version_if_idle: -1
 
+  # Number of times a container can be unlocked before being
+  # automatically cancelled.
+  max_container_dispatch_attempts: 5
+
 development:
   force_ssl: false
   cache_classes: false
diff --git a/services/api/db/migrate/20190214214814_add_container_lock_count.rb b/services/api/db/migrate/20190214214814_add_container_lock_count.rb
new file mode 100644 (file)
index 0000000..a496eb0
--- /dev/null
@@ -0,0 +1,5 @@
+class AddContainerLockCount < ActiveRecord::Migration
+  def change
+    add_column :containers, :lock_count, :int, :null => false, :default => 0
+  end
+end
index 211fa5043fda2aedc33646f8c98dff863bec8d7a..f766f33e1b35e1f85a64aa2c5d87bf85e2bb6d0f 100644 (file)
@@ -362,7 +362,8 @@ CREATE TABLE public.containers (
     runtime_status jsonb DEFAULT '{}'::jsonb,
     runtime_user_uuid text,
     runtime_auth_scopes jsonb,
-    runtime_token text
+    runtime_token text,
+    lock_count integer DEFAULT 0 NOT NULL
 );
 
 
@@ -3217,3 +3218,5 @@ INSERT INTO schema_migrations (version) VALUES ('20181011184200');
 
 INSERT INTO schema_migrations (version) VALUES ('20181213183234');
 
+INSERT INTO schema_migrations (version) VALUES ('20190214214814');
+
index 2a9ff5bf4cc6985a413f62a03d7b9555e9c0f938..5750d5ebbd0394d99bdce8ae2a038bb1d799cd77 100644 (file)
@@ -663,6 +663,52 @@ class ContainerTest < ActiveSupport::TestCase
     assert_operator auth_exp, :<, db_current_time
   end
 
+  test "Exceed maximum lock-unlock cycles" do
+    Rails.configuration.max_container_dispatch_attempts = 3
+
+    set_user_from_auth :active
+    c, cr = minimal_new
+
+    set_user_from_auth :dispatch1
+    assert_equal Container::Queued, c.state
+    assert_equal 0, c.lock_count
+
+    c.lock
+    c.reload
+    assert_equal 1, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 1, c.lock_count
+    assert_equal Container::Queued, c.state
+
+    c.lock
+    c.reload
+    assert_equal 2, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 2, c.lock_count
+    assert_equal Container::Queued, c.state
+
+    c.lock
+    c.reload
+    assert_equal 3, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 3, c.lock_count
+    assert_equal Container::Cancelled, c.state
+
+    assert_raise(ArvadosModel::LockFailedError) do
+      # Cancelled to Locked is not allowed
+      c.lock
+    end
+  end
+
   test "Container queued cancel" do
     set_user_from_auth :active
     c, cr = minimal_new({container_count_max: 1})