20862: Merge branch 'main' into 20862-google-api-client
[arvados.git] / services / api / test / unit / container_request_test.rb
index fedfe1521765b1f8dce87b062c5492b756166a57..a64adba6ff19c3619f1f56c503551161b6735aa9 100644 (file)
@@ -950,6 +950,174 @@ class ContainerRequestTest < ActiveSupport::TestCase
 
   end
 
+  test "Retry sub-request on error" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    c3.reload
+    cr3.reload
+
+    # c3 still running
+    assert_equal 'Running', c3.state
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+
+    # c3 goes to cancelled state
+    act_as_system_user do
+      c3.state = "Cancelled"
+      c3.save!
+    end
+
+    cr3.reload
+
+    # Because the parent request is still live, it should
+    # be retried.
+    assert_equal 2, cr3.container_count
+    assert_equal 'Committed', cr3.state
+  end
+
+  test "Do not retry sub-request when process tree is cancelled" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    # Now cancel the toplevel container request
+    act_as_system_user do
+      cr1.priority = 0
+      cr1.save!
+    end
+
+    c3.reload
+    cr3.reload
+
+    # c3 still running
+    assert_equal 'Running', c3.state
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+
+    # c3 goes to cancelled state
+    act_as_system_user do
+      assert_equal 0, c3.priority
+      c3.state = "Cancelled"
+      c3.save!
+    end
+
+    cr3.reload
+
+    # Because the parent process was cancelled, it _should not_ be
+    # retried.
+    assert_equal 1, cr3.container_count
+    assert_equal 'Final', cr3.state
+  end
+
+  test "Retry process tree on error" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    c1.reload
+
+    # c1 goes to cancelled state
+    act_as_system_user do
+      c1.state = "Cancelled"
+      c1.save!
+    end
+
+    cr1.reload
+    cr2.reload
+    cr3.reload
+
+    # Because the root request is still live, it should be retried.
+    # Assumes the root is something like arvados-cwl-runner where
+    # container reuse enables it to more or less pick up where it left
+    # off.
+    assert_equal 2, cr1.container_count
+    assert_equal 'Committed', cr1.state
+
+    # These keep running.
+    assert_equal 1, cr2.container_count
+    assert_equal 'Committed', cr2.state
+
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+  end
+
   test "Output collection name setting using output_name with name collision resolution" do
     set_user_from_auth :active
     output_name = 'unimaginative name'
@@ -1311,8 +1479,41 @@ class ContainerRequestTest < ActiveSupport::TestCase
       cr.destroy
 
       # the cr's container now has priority of 0
+      c.reload
+      assert_equal 0, c.priority
+    end
+  end
+
+  test "trash the project containing a container_request and check its container's priority" do
+    act_as_user users(:active) do
+      cr = ContainerRequest.find_by_uuid container_requests(:running_to_be_deleted).uuid
+
+      # initially the cr's container has priority > 0
       c = Container.find_by_uuid(cr.container_uuid)
+      assert_equal 1, c.priority
+
+      prj = Group.find_by_uuid cr.owner_uuid
+      prj.update_attributes!(trash_at: db_current_time)
+
+      # the cr's container now has priority of 0
+      c.reload
       assert_equal 0, c.priority
+
+      assert_equal c.state, 'Running'
+      assert_equal cr.state, 'Committed'
+
+      # mark the container as cancelled, this should cause the
+      # container request to go to final state and run the finalize
+      # function
+      act_as_system_user do
+        c.update_attributes!(state: 'Cancelled', log: 'fa7aeb5140e2848d39b416daeef4ffc5+45')
+      end
+      c.reload
+      cr.reload
+
+      assert_equal c.state, 'Cancelled'
+      assert_equal cr.state, 'Final'
+      assert_equal nil, cr.log_uuid
     end
   end