20877: Do not create log or output collections if parent is frozen or trashed
[arvados.git] / services / api / test / unit / container_request_test.rb
index efc61eee8c80fcfa509478ebc0ec51e947e3cccb..a64adba6ff19c3619f1f56c503551161b6735aa9 100644 (file)
@@ -107,7 +107,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
     {"runtime_constraints" => {"vcpus" => 1}},
     {"runtime_constraints" => {"vcpus" => 1, "ram" => nil}},
     {"runtime_constraints" => {"vcpus" => 0, "ram" => 123}},
-    {"runtime_constraints" => {"vcpus" => "1", "ram" => "123"}},
+    {"runtime_constraints" => {"vcpus" => "1", "ram" => -1}},
     {"mounts" => {"FOO" => "BAR"}},
     {"mounts" => {"FOO" => {}}},
     {"mounts" => {"FOO" => {"kind" => "tmp", "capacity" => 42.222}}},
@@ -164,7 +164,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
 
   test "Container request commit" do
     set_user_from_auth :active
-    cr = create_minimal_req!(runtime_constraints: {"vcpus" => 2, "ram" => 30})
+    cr = create_minimal_req!(runtime_constraints: {"vcpus" => 2, "ram" => 300000000})
 
     assert_nil cr.container_uuid
 
@@ -175,10 +175,9 @@ class ContainerRequestTest < ActiveSupport::TestCase
 
     cr.reload
 
-    assert ({"vcpus" => 2, "ram" => 30}.to_a - cr.runtime_constraints.to_a).empty?
+    assert_empty({"vcpus" => 2, "ram" => 300000000}.to_a - cr.runtime_constraints.to_a)
 
     assert_equal 0, Rails.configuration.Containers.DefaultKeepCacheRAM
-    assert_equal 8589934592, Rails.configuration.Containers.DefaultKeepCacheDisk
 
     assert_not_nil cr.container_uuid
     c = Container.find_by_uuid cr.container_uuid
@@ -189,7 +188,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
     assert_equal({}, c.environment)
     assert_equal({"/out" => {"kind"=>"tmp", "capacity"=>1000000}}, c.mounts)
     assert_equal "/out", c.output_path
-    assert ({"keep_cache_disk"=>8589934592, "keep_cache_ram"=>0, "vcpus" => 2, "ram" => 30}.to_a - c.runtime_constraints.to_a).empty?
+    assert ({"keep_cache_disk" => 2<<30, "keep_cache_ram" => 0, "vcpus" => 2, "ram" => 300000000}.to_a - c.runtime_constraints.to_a).empty?
     assert_operator 0, :<, c.priority
 
     assert_raises(ActiveRecord::RecordInvalid) do
@@ -395,14 +394,15 @@ class ContainerRequestTest < ActiveSupport::TestCase
     ]
     parents = toplevel_crs.map(&findctr)
 
-    children = parents.map do |parent|
+    children_crs = parents.map do |parent|
       lock_and_run(parent)
       with_container_auth(parent) do
         create_minimal_req!(state: "Committed",
                             priority: 1,
                             environment: {"child" => parent.environment["workflow"]})
       end
-    end.map(&findctr)
+    end
+    children = children_crs.map(&findctr)
 
     grandchildren = children.reverse.map do |child|
       lock_and_run(child)
@@ -467,6 +467,36 @@ class ContainerRequestTest < ActiveSupport::TestCase
     assert_operator shared_grandchild.priority, :<=, grandchildren[2].priority
     assert_operator shared_grandchild.priority, :<=, children[2].priority
     assert_operator shared_grandchild.priority, :<=, parents[2].priority
+
+    # cancelling the most recent toplevel container should
+    # reprioritize all of its descendants (except the shared
+    # grandchild) to zero
+    toplevel_crs[2].update_attributes!(priority: 0)
+    (parents + children + grandchildren + [shared_grandchild]).map(&:reload)
+    assert_operator 0, :==, parents[2].priority
+    assert_operator 0, :==, children[2].priority
+    assert_operator 0, :==, grandchildren[2].priority
+    assert_operator shared_grandchild.priority, :==, grandchildren[0].priority
+
+    # cancel a child request, the parent should be > 0 but
+    # the child and grandchild go to 0.
+    children_crs[1].update_attributes!(priority: 0)
+    (parents + children + grandchildren + [shared_grandchild]).map(&:reload)
+    assert_operator 0, :<, parents[1].priority
+    assert_operator parents[0].priority, :>, parents[1].priority
+    assert_operator 0, :==, children[1].priority
+    assert_operator 0, :==, grandchildren[1].priority
+    assert_operator shared_grandchild.priority, :==, grandchildren[0].priority
+
+    # update the parent, it should get a higher priority but the children and
+    # grandchildren should remain at 0
+    toplevel_crs[1].update_attributes!(priority: 6)
+    (parents + children + grandchildren + [shared_grandchild]).map(&:reload)
+    assert_operator 0, :<, parents[1].priority
+    assert_operator parents[0].priority, :<, parents[1].priority
+    assert_operator 0, :==, children[1].priority
+    assert_operator 0, :==, grandchildren[1].priority
+    assert_operator shared_grandchild.priority, :==, grandchildren[0].priority
   end
 
   [
@@ -483,23 +513,38 @@ class ContainerRequestTest < ActiveSupport::TestCase
   end
 
   [
-    ['running_container_auth', 'zzzzz-dz642-runningcontainr', 501],
-  ].each do |token, expected, expected_priority|
-    test "create as #{token} with requesting_container_uuid set and expect output to be intermediate" do
+    [:admin, 0, "output"],
+    [:admin, 19, "output"],
+    [:admin, nil, "output"],
+    [:running_container_auth, 0, "intermediate"],
+    [:running_container_auth, 29, "intermediate"],
+    [:running_container_auth, nil, "intermediate"],
+  ].each do |token, exit_code, expect_output_type|
+    test "container with exit_code #{exit_code} has collection types set with output type #{expect_output_type}" do
+      final_state = if exit_code.nil?
+                      Container::Cancelled
+                    else
+                      Container::Complete
+                    end
       set_user_from_auth token
-      cr = create_minimal_req!
-      assert_not_nil cr.uuid, 'uuid should be set for newly created container_request'
-      assert_equal expected, cr.requesting_container_uuid
-      assert_equal expected_priority, cr.priority
-
-      cr.state = ContainerRequest::Committed
-      cr.save!
-
-      run_container(cr)
-      cr.reload
-      output = Collection.find_by_uuid(cr.output_uuid)
-      props = {"type": "intermediate", "container_request": cr.uuid}
-      assert_equal props.symbolize_keys, output.properties.symbolize_keys
+      request = create_minimal_req!(
+        container_count_max: 1,
+        priority: 500,
+        state: ContainerRequest::Committed,
+      )
+      run_container(request, final_state: final_state, exit_code: exit_code)
+      request.reload
+      assert_equal(ContainerRequest::Final, request.state)
+
+      output = Collection.find_by_uuid(request.output_uuid)
+      assert_not_nil(output)
+      assert_equal(request.uuid, output.properties["container_request"])
+      assert_equal(expect_output_type, output.properties["type"])
+
+      log = Collection.find_by_uuid(request.log_uuid)
+      assert_not_nil(log)
+      assert_equal(request.uuid, log.properties["container_request"])
+      assert_equal("log", log.properties["type"])
     end
   end
 
@@ -905,6 +950,174 @@ class ContainerRequestTest < ActiveSupport::TestCase
 
   end
 
+  test "Retry sub-request on error" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    c3.reload
+    cr3.reload
+
+    # c3 still running
+    assert_equal 'Running', c3.state
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+
+    # c3 goes to cancelled state
+    act_as_system_user do
+      c3.state = "Cancelled"
+      c3.save!
+    end
+
+    cr3.reload
+
+    # Because the parent request is still live, it should
+    # be retried.
+    assert_equal 2, cr3.container_count
+    assert_equal 'Committed', cr3.state
+  end
+
+  test "Do not retry sub-request when process tree is cancelled" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    # Now cancel the toplevel container request
+    act_as_system_user do
+      cr1.priority = 0
+      cr1.save!
+    end
+
+    c3.reload
+    cr3.reload
+
+    # c3 still running
+    assert_equal 'Running', c3.state
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+
+    # c3 goes to cancelled state
+    act_as_system_user do
+      assert_equal 0, c3.priority
+      c3.state = "Cancelled"
+      c3.save!
+    end
+
+    cr3.reload
+
+    # Because the parent process was cancelled, it _should not_ be
+    # retried.
+    assert_equal 1, cr3.container_count
+    assert_equal 'Final', cr3.state
+  end
+
+  test "Retry process tree on error" do
+    set_user_from_auth :active
+    cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"])
+    c1 = Container.find_by_uuid(cr1.container_uuid)
+    act_as_system_user do
+      c1.update_attributes!(state: Container::Locked)
+      c1.update_attributes!(state: Container::Running)
+    end
+
+    cr2 = with_container_auth(c1) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"])
+    end
+    c2 = Container.find_by_uuid(cr2.container_uuid)
+    act_as_system_user do
+      c2.update_attributes!(state: Container::Locked)
+      c2.update_attributes!(state: Container::Running)
+    end
+
+    cr3 = with_container_auth(c2) do
+      create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"])
+    end
+    c3 = Container.find_by_uuid(cr3.container_uuid)
+
+    act_as_system_user do
+      c3.update_attributes!(state: Container::Locked)
+      c3.update_attributes!(state: Container::Running)
+    end
+
+    # All the containers are in running state
+
+    c1.reload
+
+    # c1 goes to cancelled state
+    act_as_system_user do
+      c1.state = "Cancelled"
+      c1.save!
+    end
+
+    cr1.reload
+    cr2.reload
+    cr3.reload
+
+    # Because the root request is still live, it should be retried.
+    # Assumes the root is something like arvados-cwl-runner where
+    # container reuse enables it to more or less pick up where it left
+    # off.
+    assert_equal 2, cr1.container_count
+    assert_equal 'Committed', cr1.state
+
+    # These keep running.
+    assert_equal 1, cr2.container_count
+    assert_equal 'Committed', cr2.state
+
+    assert_equal 1, cr3.container_count
+    assert_equal 'Committed', cr3.state
+  end
+
   test "Output collection name setting using output_name with name collision resolution" do
     set_user_from_auth :active
     output_name = 'unimaginative name'
@@ -965,7 +1178,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
     assert_in_delta(delete, now + year, 10)
   end
 
-  def run_container(cr)
+  def run_container(cr, final_state: Container::Complete, exit_code: 0)
     act_as_system_user do
       logc = Collection.new(owner_uuid: system_user_uuid,
                             manifest_text: ". ef772b2f28e2c8ca84de45466ed19ee9+7815 0:0:arv-mount.txt\n")
@@ -974,8 +1187,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
       c = Container.find_by_uuid(cr.container_uuid)
       c.update_attributes!(state: Container::Locked)
       c.update_attributes!(state: Container::Running)
-      c.update_attributes!(state: Container::Complete,
-                           exit_code: 0,
+      c.update_attributes!(state: final_state,
+                           exit_code: exit_code,
                            output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45',
                            log: logc.portable_data_hash)
       logc.destroy
@@ -1266,8 +1479,41 @@ class ContainerRequestTest < ActiveSupport::TestCase
       cr.destroy
 
       # the cr's container now has priority of 0
+      c.reload
+      assert_equal 0, c.priority
+    end
+  end
+
+  test "trash the project containing a container_request and check its container's priority" do
+    act_as_user users(:active) do
+      cr = ContainerRequest.find_by_uuid container_requests(:running_to_be_deleted).uuid
+
+      # initially the cr's container has priority > 0
       c = Container.find_by_uuid(cr.container_uuid)
+      assert_equal 1, c.priority
+
+      prj = Group.find_by_uuid cr.owner_uuid
+      prj.update_attributes!(trash_at: db_current_time)
+
+      # the cr's container now has priority of 0
+      c.reload
       assert_equal 0, c.priority
+
+      assert_equal c.state, 'Running'
+      assert_equal cr.state, 'Committed'
+
+      # mark the container as cancelled, this should cause the
+      # container request to go to final state and run the finalize
+      # function
+      act_as_system_user do
+        c.update_attributes!(state: 'Cancelled', log: 'fa7aeb5140e2848d39b416daeef4ffc5+45')
+      end
+      c.reload
+      cr.reload
+
+      assert_equal c.state, 'Cancelled'
+      assert_equal cr.state, 'Final'
+      assert_equal nil, cr.log_uuid
     end
   end