X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3fa6aa4043286ad61e5f29c136d3cc2942e8750d..ad040e37430803ffa7db4f1856a4e362ad2cebfe:/services/api/test/unit/container_request_test.rb diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index e5c0085184..a64adba6ff 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -107,7 +107,7 @@ class ContainerRequestTest < ActiveSupport::TestCase {"runtime_constraints" => {"vcpus" => 1}}, {"runtime_constraints" => {"vcpus" => 1, "ram" => nil}}, {"runtime_constraints" => {"vcpus" => 0, "ram" => 123}}, - {"runtime_constraints" => {"vcpus" => "1", "ram" => "123"}}, + {"runtime_constraints" => {"vcpus" => "1", "ram" => -1}}, {"mounts" => {"FOO" => "BAR"}}, {"mounts" => {"FOO" => {}}}, {"mounts" => {"FOO" => {"kind" => "tmp", "capacity" => 42.222}}}, @@ -164,7 +164,7 @@ class ContainerRequestTest < ActiveSupport::TestCase test "Container request commit" do set_user_from_auth :active - cr = create_minimal_req!(runtime_constraints: {"vcpus" => 2, "ram" => 30}) + cr = create_minimal_req!(runtime_constraints: {"vcpus" => 2, "ram" => 300000000}) assert_nil cr.container_uuid @@ -175,7 +175,9 @@ class ContainerRequestTest < ActiveSupport::TestCase cr.reload - assert ({"vcpus" => 2, "ram" => 30}.to_a - cr.runtime_constraints.to_a).empty? + assert_empty({"vcpus" => 2, "ram" => 300000000}.to_a - cr.runtime_constraints.to_a) + + assert_equal 0, Rails.configuration.Containers.DefaultKeepCacheRAM assert_not_nil cr.container_uuid c = Container.find_by_uuid cr.container_uuid @@ -186,7 +188,7 @@ class ContainerRequestTest < ActiveSupport::TestCase assert_equal({}, c.environment) assert_equal({"/out" => {"kind"=>"tmp", "capacity"=>1000000}}, c.mounts) assert_equal "/out", c.output_path - assert ({"keep_cache_ram"=>268435456, "vcpus" => 2, "ram" => 30}.to_a - c.runtime_constraints.to_a).empty? + assert ({"keep_cache_disk" => 2<<30, "keep_cache_ram" => 0, "vcpus" => 2, "ram" => 300000000}.to_a - c.runtime_constraints.to_a).empty? assert_operator 0, :<, c.priority assert_raises(ActiveRecord::RecordInvalid) do @@ -231,11 +233,12 @@ class ContainerRequestTest < ActiveSupport::TestCase act_as_system_user do Container.find_by_uuid(cr.container_uuid). - update_attributes!(state: Container::Cancelled) + update_attributes!(state: Container::Cancelled, cost: 1.25) end cr.reload assert_equal "Final", cr.state + assert_equal 1.25, cr.cumulative_cost assert_equal users(:active).uuid, cr.modified_by_user_uuid end @@ -261,12 +264,14 @@ class ContainerRequestTest < ActiveSupport::TestCase log_pdh = 'fa7aeb5140e2848d39b416daeef4ffc5+45' act_as_system_user do c.update_attributes!(state: Container::Complete, + cost: 1.25, output: output_pdh, log: log_pdh) end cr.reload assert_equal "Final", cr.state + assert_equal 1.25, cr.cumulative_cost assert_equal users(:active).uuid, cr.modified_by_user_uuid assert_not_nil cr.output_uuid @@ -389,14 +394,15 @@ class ContainerRequestTest < ActiveSupport::TestCase ] parents = toplevel_crs.map(&findctr) - children = parents.map do |parent| + children_crs = parents.map do |parent| lock_and_run(parent) with_container_auth(parent) do create_minimal_req!(state: "Committed", priority: 1, environment: {"child" => parent.environment["workflow"]}) end - end.map(&findctr) + end + children = children_crs.map(&findctr) grandchildren = children.reverse.map do |child| lock_and_run(child) @@ -461,6 +467,36 @@ class ContainerRequestTest < ActiveSupport::TestCase assert_operator shared_grandchild.priority, :<=, grandchildren[2].priority assert_operator shared_grandchild.priority, :<=, children[2].priority assert_operator shared_grandchild.priority, :<=, parents[2].priority + + # cancelling the most recent toplevel container should + # reprioritize all of its descendants (except the shared + # grandchild) to zero + toplevel_crs[2].update_attributes!(priority: 0) + (parents + children + grandchildren + [shared_grandchild]).map(&:reload) + assert_operator 0, :==, parents[2].priority + assert_operator 0, :==, children[2].priority + assert_operator 0, :==, grandchildren[2].priority + assert_operator shared_grandchild.priority, :==, grandchildren[0].priority + + # cancel a child request, the parent should be > 0 but + # the child and grandchild go to 0. + children_crs[1].update_attributes!(priority: 0) + (parents + children + grandchildren + [shared_grandchild]).map(&:reload) + assert_operator 0, :<, parents[1].priority + assert_operator parents[0].priority, :>, parents[1].priority + assert_operator 0, :==, children[1].priority + assert_operator 0, :==, grandchildren[1].priority + assert_operator shared_grandchild.priority, :==, grandchildren[0].priority + + # update the parent, it should get a higher priority but the children and + # grandchildren should remain at 0 + toplevel_crs[1].update_attributes!(priority: 6) + (parents + children + grandchildren + [shared_grandchild]).map(&:reload) + assert_operator 0, :<, parents[1].priority + assert_operator parents[0].priority, :<, parents[1].priority + assert_operator 0, :==, children[1].priority + assert_operator 0, :==, grandchildren[1].priority + assert_operator shared_grandchild.priority, :==, grandchildren[0].priority end [ @@ -477,23 +513,38 @@ class ContainerRequestTest < ActiveSupport::TestCase end [ - ['running_container_auth', 'zzzzz-dz642-runningcontainr', 501], - ].each do |token, expected, expected_priority| - test "create as #{token} with requesting_container_uuid set and expect output to be intermediate" do + [:admin, 0, "output"], + [:admin, 19, "output"], + [:admin, nil, "output"], + [:running_container_auth, 0, "intermediate"], + [:running_container_auth, 29, "intermediate"], + [:running_container_auth, nil, "intermediate"], + ].each do |token, exit_code, expect_output_type| + test "container with exit_code #{exit_code} has collection types set with output type #{expect_output_type}" do + final_state = if exit_code.nil? + Container::Cancelled + else + Container::Complete + end set_user_from_auth token - cr = create_minimal_req! - assert_not_nil cr.uuid, 'uuid should be set for newly created container_request' - assert_equal expected, cr.requesting_container_uuid - assert_equal expected_priority, cr.priority - - cr.state = ContainerRequest::Committed - cr.save! - - run_container(cr) - cr.reload - output = Collection.find_by_uuid(cr.output_uuid) - props = {"type": "intermediate", "container_request": cr.uuid} - assert_equal props.symbolize_keys, output.properties.symbolize_keys + request = create_minimal_req!( + container_count_max: 1, + priority: 500, + state: ContainerRequest::Committed, + ) + run_container(request, final_state: final_state, exit_code: exit_code) + request.reload + assert_equal(ContainerRequest::Final, request.state) + + output = Collection.find_by_uuid(request.output_uuid) + assert_not_nil(output) + assert_equal(request.uuid, output.properties["container_request"]) + assert_equal(expect_output_type, output.properties["type"]) + + log = Collection.find_by_uuid(request.log_uuid) + assert_not_nil(log) + assert_equal(request.uuid, log.properties["container_request"]) + assert_equal("log", log.properties["type"]) end end @@ -788,6 +839,7 @@ class ContainerRequestTest < ActiveSupport::TestCase prev_container_uuid = cr.container_uuid act_as_system_user do + c.update_attributes!(cost: 0.5, subrequests_cost: 1.25) c.update_attributes!(state: Container::Cancelled) end @@ -800,6 +852,9 @@ class ContainerRequestTest < ActiveSupport::TestCase c = act_as_system_user do c = Container.find_by_uuid(cr.container_uuid) + c.update_attributes!(state: Container::Locked) + c.update_attributes!(state: Container::Running) + c.update_attributes!(cost: 0.125) c.update_attributes!(state: Container::Cancelled) c end @@ -809,6 +864,7 @@ class ContainerRequestTest < ActiveSupport::TestCase assert_equal "Final", cr.state assert_equal prev_container_uuid, cr.container_uuid assert_not_equal cr2.container_uuid, cr.container_uuid + assert_equal 1.875, cr.cumulative_cost end test "Retry on container cancelled with runtime_token" do @@ -894,6 +950,174 @@ class ContainerRequestTest < ActiveSupport::TestCase end + test "Retry sub-request on error" do + set_user_from_auth :active + cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"]) + c1 = Container.find_by_uuid(cr1.container_uuid) + act_as_system_user do + c1.update_attributes!(state: Container::Locked) + c1.update_attributes!(state: Container::Running) + end + + cr2 = with_container_auth(c1) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"]) + end + c2 = Container.find_by_uuid(cr2.container_uuid) + act_as_system_user do + c2.update_attributes!(state: Container::Locked) + c2.update_attributes!(state: Container::Running) + end + + cr3 = with_container_auth(c2) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"]) + end + c3 = Container.find_by_uuid(cr3.container_uuid) + + act_as_system_user do + c3.update_attributes!(state: Container::Locked) + c3.update_attributes!(state: Container::Running) + end + + # All the containers are in running state + + c3.reload + cr3.reload + + # c3 still running + assert_equal 'Running', c3.state + assert_equal 1, cr3.container_count + assert_equal 'Committed', cr3.state + + # c3 goes to cancelled state + act_as_system_user do + c3.state = "Cancelled" + c3.save! + end + + cr3.reload + + # Because the parent request is still live, it should + # be retried. + assert_equal 2, cr3.container_count + assert_equal 'Committed', cr3.state + end + + test "Do not retry sub-request when process tree is cancelled" do + set_user_from_auth :active + cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"]) + c1 = Container.find_by_uuid(cr1.container_uuid) + act_as_system_user do + c1.update_attributes!(state: Container::Locked) + c1.update_attributes!(state: Container::Running) + end + + cr2 = with_container_auth(c1) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"]) + end + c2 = Container.find_by_uuid(cr2.container_uuid) + act_as_system_user do + c2.update_attributes!(state: Container::Locked) + c2.update_attributes!(state: Container::Running) + end + + cr3 = with_container_auth(c2) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"]) + end + c3 = Container.find_by_uuid(cr3.container_uuid) + + act_as_system_user do + c3.update_attributes!(state: Container::Locked) + c3.update_attributes!(state: Container::Running) + end + + # All the containers are in running state + + # Now cancel the toplevel container request + act_as_system_user do + cr1.priority = 0 + cr1.save! + end + + c3.reload + cr3.reload + + # c3 still running + assert_equal 'Running', c3.state + assert_equal 1, cr3.container_count + assert_equal 'Committed', cr3.state + + # c3 goes to cancelled state + act_as_system_user do + assert_equal 0, c3.priority + c3.state = "Cancelled" + c3.save! + end + + cr3.reload + + # Because the parent process was cancelled, it _should not_ be + # retried. + assert_equal 1, cr3.container_count + assert_equal 'Final', cr3.state + end + + test "Retry process tree on error" do + set_user_from_auth :active + cr1 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "foo1"]) + c1 = Container.find_by_uuid(cr1.container_uuid) + act_as_system_user do + c1.update_attributes!(state: Container::Locked) + c1.update_attributes!(state: Container::Running) + end + + cr2 = with_container_auth(c1) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo2"]) + end + c2 = Container.find_by_uuid(cr2.container_uuid) + act_as_system_user do + c2.update_attributes!(state: Container::Locked) + c2.update_attributes!(state: Container::Running) + end + + cr3 = with_container_auth(c2) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 2, command: ["echo", "foo3"]) + end + c3 = Container.find_by_uuid(cr3.container_uuid) + + act_as_system_user do + c3.update_attributes!(state: Container::Locked) + c3.update_attributes!(state: Container::Running) + end + + # All the containers are in running state + + c1.reload + + # c1 goes to cancelled state + act_as_system_user do + c1.state = "Cancelled" + c1.save! + end + + cr1.reload + cr2.reload + cr3.reload + + # Because the root request is still live, it should be retried. + # Assumes the root is something like arvados-cwl-runner where + # container reuse enables it to more or less pick up where it left + # off. + assert_equal 2, cr1.container_count + assert_equal 'Committed', cr1.state + + # These keep running. + assert_equal 1, cr2.container_count + assert_equal 'Committed', cr2.state + + assert_equal 1, cr3.container_count + assert_equal 'Committed', cr3.state + end + test "Output collection name setting using output_name with name collision resolution" do set_user_from_auth :active output_name = 'unimaginative name' @@ -954,7 +1178,7 @@ class ContainerRequestTest < ActiveSupport::TestCase assert_in_delta(delete, now + year, 10) end - def run_container(cr) + def run_container(cr, final_state: Container::Complete, exit_code: 0) act_as_system_user do logc = Collection.new(owner_uuid: system_user_uuid, manifest_text: ". ef772b2f28e2c8ca84de45466ed19ee9+7815 0:0:arv-mount.txt\n") @@ -963,8 +1187,8 @@ class ContainerRequestTest < ActiveSupport::TestCase c = Container.find_by_uuid(cr.container_uuid) c.update_attributes!(state: Container::Locked) c.update_attributes!(state: Container::Running) - c.update_attributes!(state: Container::Complete, - exit_code: 0, + c.update_attributes!(state: final_state, + exit_code: exit_code, output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45', log: logc.portable_data_hash) logc.destroy @@ -1255,8 +1479,41 @@ class ContainerRequestTest < ActiveSupport::TestCase cr.destroy # the cr's container now has priority of 0 + c.reload + assert_equal 0, c.priority + end + end + + test "trash the project containing a container_request and check its container's priority" do + act_as_user users(:active) do + cr = ContainerRequest.find_by_uuid container_requests(:running_to_be_deleted).uuid + + # initially the cr's container has priority > 0 c = Container.find_by_uuid(cr.container_uuid) + assert_equal 1, c.priority + + prj = Group.find_by_uuid cr.owner_uuid + prj.update_attributes!(trash_at: db_current_time) + + # the cr's container now has priority of 0 + c.reload assert_equal 0, c.priority + + assert_equal c.state, 'Running' + assert_equal cr.state, 'Committed' + + # mark the container as cancelled, this should cause the + # container request to go to final state and run the finalize + # function + act_as_system_user do + c.update_attributes!(state: 'Cancelled', log: 'fa7aeb5140e2848d39b416daeef4ffc5+45') + end + c.reload + cr.reload + + assert_equal c.state, 'Cancelled' + assert_equal cr.state, 'Final' + assert_equal nil, cr.log_uuid end end @@ -1511,4 +1768,63 @@ class ContainerRequestTest < ActiveSupport::TestCase end end + test "Cumulative cost includes retried attempts but not reused containers" do + set_user_from_auth :active + cr = create_minimal_req!(priority: 5, state: "Committed", container_count_max: 3) + c = Container.find_by_uuid cr.container_uuid + act_as_system_user do + c.update_attributes!(state: Container::Locked) + c.update_attributes!(state: Container::Running) + c.update_attributes!(state: Container::Cancelled, cost: 3) + end + cr.reload + assert_equal 3, cr.cumulative_cost + + c = Container.find_by_uuid cr.container_uuid + lock_and_run c + c.reload + assert_equal 0, c.subrequests_cost + + # cr2 is a child/subrequest + cr2 = with_container_auth(c) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 1, command: ["echo", "foo2"]) + end + assert_equal c.uuid, cr2.requesting_container_uuid + c2 = Container.find_by_uuid cr2.container_uuid + act_as_system_user do + c2.update_attributes!(state: Container::Locked) + c2.update_attributes!(state: Container::Running) + logc = Collection.new(owner_uuid: system_user_uuid, + manifest_text: ". ef772b2f28e2c8ca84de45466ed19ee9+7815 0:0:arv-mount.txt\n") + logc.save! + c2.update_attributes!(state: Container::Complete, + exit_code: 0, + output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45', + log: logc.portable_data_hash, + cost: 7) + end + c.reload + assert_equal 7, c.subrequests_cost + + # cr3 is an identical child/subrequest, will reuse c2 + cr3 = with_container_auth(c) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 1, command: ["echo", "foo2"]) + end + assert_equal c.uuid, cr3.requesting_container_uuid + c3 = Container.find_by_uuid cr3.container_uuid + assert_equal c2.uuid, c3.uuid + assert_equal Container::Complete, c3.state + c.reload + assert_equal 7, c.subrequests_cost + + act_as_system_user do + c.update_attributes!(state: Container::Complete, exit_code: 0, cost: 9) + end + + c.reload + assert_equal 7, c.subrequests_cost + cr.reload + assert_equal 3+7+9, cr.cumulative_cost + end + end