X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fb9f29b33a380616211a56973045dac1254977ee..e1c7395cf6e649876132030c2011434581d3a66a:/services/api/test/unit/container_test.rb diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb index 1a53df7dab..286aa32ae2 100644 --- a/services/api/test/unit/container_test.rb +++ b/services/api/test/unit/container_test.rb @@ -14,7 +14,7 @@ class ContainerTest < ActiveSupport::TestCase container_image: 'fa3c1a9cb6783f85f2ecda037e07b8c3+167', output_path: '/tmp', priority: 1, - runtime_constraints: {"vcpus" => 1, "ram" => 1}, + runtime_constraints: {"vcpus" => 1, "ram" => 1, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}}, } REUSABLE_COMMON_ATTRS = { @@ -23,8 +23,11 @@ class ContainerTest < ActiveSupport::TestCase command: ["echo", "hello"], output_path: "test", runtime_constraints: { + "API" => false, + "keep_cache_disk" => 0, + "keep_cache_ram" => 0, "ram" => 12000000000, - "vcpus" => 4, + "vcpus" => 4 }, mounts: { "test" => {"kind" => "json"}, @@ -37,6 +40,25 @@ class ContainerTest < ActiveSupport::TestCase runtime_auth_scopes: ["all"] } + REUSABLE_ATTRS_SLIM = { + command: ["echo", "slim"], + container_image: "9ae44d5792468c58bcf85ce7353c7027+124", + cwd: "test", + environment: {}, + mounts: {}, + output_path: "test", + runtime_auth_scopes: ["all"], + runtime_constraints: { + "API" => false, + "keep_cache_disk" => 0, + "keep_cache_ram" => 0, + "ram" => 8 << 30, + "vcpus" => 4 + }, + runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz", + secret_mounts: {}, + } + def request_only attrs attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k} end @@ -184,7 +206,7 @@ class ContainerTest < ActiveSupport::TestCase assert_equal c1.runtime_status, {} assert_equal Container::Queued, c1.state - assert_raises ActiveRecord::RecordInvalid do + assert_raises ArvadosModel::PermissionDeniedError do c1.update_attributes! runtime_status: {'error' => 'Oops!'} end @@ -227,11 +249,12 @@ class ContainerTest < ActiveSupport::TestCase set_user_from_auth :active env = {"C" => "3", "B" => "2", "A" => "1"} m = {"F" => {"kind" => "3"}, "E" => {"kind" => "2"}, "D" => {"kind" => "1"}} - rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1} + rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1, "keep_cache_disk" => 0, "API" => true, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}} c, _ = minimal_new(environment: env, mounts: m, runtime_constraints: rc) - assert_equal c.environment.to_json, Container.deep_sort_hash(env).to_json - assert_equal c.mounts.to_json, Container.deep_sort_hash(m).to_json - assert_equal c.runtime_constraints.to_json, Container.deep_sort_hash(rc).to_json + c.reload + assert_equal Container.deep_sort_hash(env).to_json, c.environment.to_json + assert_equal Container.deep_sort_hash(m).to_json, c.mounts.to_json + assert_equal Container.deep_sort_hash(rc).to_json, c.runtime_constraints.to_json end test 'deep_sort_hash on array of hashes' do @@ -241,7 +264,7 @@ class ContainerTest < ActiveSupport::TestCase end test "find_reusable method should select higher priority queued container" do - Rails.configuration.log_reuse_decisions = true + Rails.configuration.Containers.LogReuseDecisions = true set_user_from_auth :active common_attrs = REUSABLE_COMMON_ATTRS.merge({environment:{"var" => "queued"}}) c_low_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:1})) @@ -388,9 +411,11 @@ class ContainerTest < ActiveSupport::TestCase runtime_status: {'warning' => 'This is not an error'}, progress: 0.15}) c_faster_started_second.update_attributes!({state: Container::Locked}) + assert_equal 0, Container.where("runtime_status->'error' is not null").count c_faster_started_second.update_attributes!({state: Container::Running, runtime_status: {'error' => 'Something bad happened'}, progress: 0.2}) + assert_equal 1, Container.where("runtime_status->'error' is not null").count reused = Container.find_reusable(common_attrs) assert_not_nil reused # Selected the non-failing container even if it's the one with less progress done @@ -509,7 +534,7 @@ class ContainerTest < ActiveSupport::TestCase test "find_reusable with logging enabled" do set_user_from_auth :active - Rails.configuration.log_reuse_decisions = true + Rails.configuration.Containers.LogReuseDecisions = true Rails.logger.expects(:info).at_least(3) Container.find_reusable(REUSABLE_COMMON_ATTRS) end @@ -559,6 +584,7 @@ class ContainerTest < ActiveSupport::TestCase assert_equal Container::Queued, c1.state reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token))) # See #14584 + assert_not_nil reused assert_equal c1.uuid, reused.uuid end @@ -569,6 +595,7 @@ class ContainerTest < ActiveSupport::TestCase assert_equal Container::Queued, c1.state reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token))) # See #14584 + assert_not_nil reused assert_equal c1.uuid, reused.uuid end @@ -579,9 +606,37 @@ class ContainerTest < ActiveSupport::TestCase assert_equal Container::Queued, c1.state reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token))) # See #14584 + assert_not_nil reused assert_equal c1.uuid, reused.uuid end + test "find_reusable method with cuda" do + set_user_from_auth :active + # No cuda + no_cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}, + runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false, + "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},}) + c1, _ = minimal_new(no_cuda_attrs) + assert_equal Container::Queued, c1.state + + # has cuda + cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}, + runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false, + "cuda" => {"device_count":1, "driver_version": "11.0", "hardware_capability": "9.0"}},}) + c2, _ = minimal_new(cuda_attrs) + assert_equal Container::Queued, c2.state + + # should find the no cuda one + reused = Container.find_reusable(no_cuda_attrs) + assert_not_nil reused + assert_equal reused.uuid, c1.uuid + + # should find the cuda one + reused = Container.find_reusable(cuda_attrs) + assert_not_nil reused + assert_equal reused.uuid, c2.uuid + end + test "Container running" do set_user_from_auth :active c, _ = minimal_new priority: 1 @@ -661,10 +716,12 @@ class ContainerTest < ActiveSupport::TestCase auth_exp = ApiClientAuthorization.find_by_uuid(auth_uuid_was).expires_at assert_operator auth_exp, :<, db_current_time + + assert_nil ApiClientAuthorization.validate(token: ApiClientAuthorization.find_by_uuid(auth_uuid_was).token) end test "Exceed maximum lock-unlock cycles" do - Rails.configuration.max_container_dispatch_attempts = 3 + Rails.configuration.Containers.MaxDispatchAttempts = 3 set_user_from_auth :active c, cr = minimal_new @@ -740,6 +797,17 @@ class ContainerTest < ActiveSupport::TestCase check_no_change_from_cancelled c end + test "Container locked with non-expiring token" do + Rails.configuration.API.TokenMaxLifetime = 1.hour + set_user_from_auth :active + c, _ = minimal_new + set_user_from_auth :dispatch1 + assert c.lock, show_errors(c) + refute c.auth.nil? + assert c.auth.expires_at.nil? + assert c.auth.user_id == User.find_by_uuid(users(:active).uuid).id + end + test "Container locked cancel with log" do set_user_from_auth :active c, _ = minimal_new @@ -775,16 +843,61 @@ class ContainerTest < ActiveSupport::TestCase end end - test "Container only set exit code on complete" do + [ + [Container::Queued, {state: Container::Locked}], + [Container::Queued, {state: Container::Running}], + [Container::Queued, {state: Container::Complete}], + [Container::Queued, {state: Container::Cancelled}], + [Container::Queued, {priority: 123456789}], + [Container::Queued, {runtime_status: {'error' => 'oops'}}], + [Container::Queued, {cwd: '/'}], + [Container::Locked, {state: Container::Running}], + [Container::Locked, {state: Container::Queued}], + [Container::Locked, {priority: 123456789}], + [Container::Locked, {runtime_status: {'error' => 'oops'}}], + [Container::Locked, {cwd: '/'}], + [Container::Running, {state: Container::Complete}], + [Container::Running, {state: Container::Cancelled}], + [Container::Running, {priority: 123456789}], + [Container::Running, {runtime_status: {'error' => 'oops'}}], + [Container::Running, {cwd: '/'}], + [Container::Running, {gateway_address: "172.16.0.1:12345"}], + [Container::Running, {interactive_session_started: true}], + [Container::Complete, {state: Container::Cancelled}], + [Container::Complete, {priority: 123456789}], + [Container::Complete, {runtime_status: {'error' => 'oops'}}], + [Container::Complete, {cwd: '/'}], + [Container::Cancelled, {cwd: '/'}], + ].each do |start_state, updates| + test "Container update #{updates.inspect} when #{start_state} forbidden for non-admin" do + set_user_from_auth :active + c, _ = minimal_new + if start_state != Container::Queued + set_user_from_auth :dispatch1 + c.lock + if start_state != Container::Locked + c.update_attributes! state: Container::Running + if start_state != Container::Running + c.update_attributes! state: start_state + end + end + end + assert_equal c.state, start_state + set_user_from_auth :active + assert_raises(ArvadosModel::PermissionDeniedError) do + c.update_attributes! updates + end + end + end + + test "can only change exit code while running and at completion" do set_user_from_auth :active c, _ = minimal_new set_user_from_auth :dispatch1 c.lock + check_illegal_updates c, [{exit_code: 1}] c.update_attributes! state: Container::Running - - check_illegal_updates c, [{exit_code: 1}, - {exit_code: 1, state: Container::Cancelled}] - + assert c.update_attributes(exit_code: 1) assert c.update_attributes(exit_code: 1, state: Container::Complete) end @@ -838,7 +951,7 @@ class ContainerTest < ActiveSupport::TestCase end ["auth_uuid", "runtime_token"].each do |tok| - test "#{tok} can set output, progress, runtime_status, state on running container -- but not log" do + test "#{tok} can set output, progress, runtime_status, state, exit_code on running container -- but not log" do if tok == "runtime_token" set_user_from_auth :spectator c, _ = minimal_new(container_image: "9ae44d5792468c58bcf85ce7353c7027+124", @@ -865,9 +978,11 @@ class ContainerTest < ActiveSupport::TestCase Thread.current[:user] = auth.user end + assert c.update_attributes(gateway_address: "127.0.0.1:9") assert c.update_attributes(output: collections(:collection_owned_by_active).portable_data_hash) assert c.update_attributes(runtime_status: {'warning' => 'something happened'}) assert c.update_attributes(progress: 0.5) + assert c.update_attributes(exit_code: 0) refute c.update_attributes(log: collections(:real_log_collection).portable_data_hash) c.reload assert c.update_attributes(state: Container::Complete, exit_code: 0) @@ -897,7 +1012,9 @@ class ContainerTest < ActiveSupport::TestCase c.update_attributes! state: Container::Running set_user_from_auth :running_to_be_deleted_container_auth - refute c.update_attributes(output: collections(:foo_file).portable_data_hash) + assert_raises(ArvadosModel::PermissionDeniedError) do + c.update_attributes(output: collections(:foo_file).portable_data_hash) + end end test "can set trashed output on running container" do @@ -931,6 +1048,15 @@ class ContainerTest < ActiveSupport::TestCase end end + test "user cannot delete" do + set_user_from_auth :active + c, _ = minimal_new + assert_raises ArvadosModel::PermissionDeniedError do + c.destroy + end + assert Container.find_by_uuid(c.uuid) + end + [ {state: Container::Complete, exit_code: 0, output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'}, {state: Container::Cancelled}, @@ -956,4 +1082,260 @@ class ContainerTest < ActiveSupport::TestCase assert_no_secrets_logged end end + + def configure_preemptible_instance_type + Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({ + "a1.small.pre" => { + "Preemptible" => true, + "Price" => 0.1, + "ProviderType" => "a1.small", + "VCPUs" => 1, + "RAM" => 1000000000, + }, + }) + end + + def vary_parameters(**kwargs) + # kwargs is a hash that maps parameters to an array of values. + # This function enumerates every possible hash where each key has one of + # the values from its array. + # The output keys are strings since that's what container hash attributes + # want. + # A nil value yields a hash without that key. + [[:_, nil]].product( + *kwargs.map { |(key, values)| [key.to_s].product(values) }, + ).map { |param_pairs| Hash[param_pairs].compact } + end + + def retry_with_scheduling_parameters(param_hashes) + set_user_from_auth :admin + containers = {} + requests = [] + param_hashes.each do |scheduling_parameters| + container, request = minimal_new(scheduling_parameters: scheduling_parameters) + containers[container.uuid] = container + requests << request + end + refute(containers.empty?, "buggy test: no scheduling parameters enumerated") + assert_equal(1, containers.length) + _, container1 = containers.shift + container1.lock + container1.update_attributes!(state: Container::Cancelled) + container1.reload + request1 = requests.shift + request1.reload + assert_not_equal(container1.uuid, request1.container_uuid) + requests.each do |request| + request.reload + assert_equal(request1.container_uuid, request.container_uuid) + end + container2 = Container.find_by_uuid(request1.container_uuid) + assert_not_nil(container2) + return container2 + end + + preemptible_values = [true, false, nil] + preemptible_values.permutation(1).chain( + preemptible_values.product(preemptible_values), + preemptible_values.product(preemptible_values, preemptible_values), + ).each do |preemptible_a| + test "retry requests scheduled with preemptible=#{preemptible_a}" do + configure_preemptible_instance_type + param_hashes = vary_parameters(preemptible: preemptible_a) + container = retry_with_scheduling_parameters(param_hashes) + assert_equal(preemptible_a.all?, + container.scheduling_parameters["preemptible"] || false) + end + end + + partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]] + partition_values.permutation(1).chain( + partition_values.permutation(2), + ).each do |partitions_a| + test "retry requests scheduled with partitions=#{partitions_a}" do + param_hashes = vary_parameters(partitions: partitions_a) + container = retry_with_scheduling_parameters(param_hashes) + expected = if partitions_a.any? { |value| value.nil? or value.empty? } + [] + else + partitions_a.flatten.uniq + end + actual = container.scheduling_parameters["partitions"] || [] + assert_equal(expected.sort, actual.sort) + end + end + + runtime_values = [nil, 0, 1, 2, 3] + runtime_values.permutation(1).chain( + runtime_values.permutation(2), + runtime_values.permutation(3), + ).each do |max_run_time_a| + test "retry requests scheduled with max_run_time=#{max_run_time_a}" do + param_hashes = vary_parameters(max_run_time: max_run_time_a) + container = retry_with_scheduling_parameters(param_hashes) + expected = if max_run_time_a.any? { |value| value.nil? or value == 0 } + 0 + else + max_run_time_a.max + end + actual = container.scheduling_parameters["max_run_time"] || 0 + assert_equal(expected, actual) + end + end + + test "retry requests with multi-varied scheduling parameters" do + configure_preemptible_instance_type + param_hashes = [{ + "partitions": ["alpha", "bravo"], + "preemptible": true, + "max_run_time": 10, + }, { + "partitions": ["alpha", "charlie"], + "max_run_time": 20, + }, { + "partitions": ["bravo", "charlie"], + "preemptible": false, + "max_run_time": 30, + }] + container = retry_with_scheduling_parameters(param_hashes) + actual = container.scheduling_parameters + assert_equal(["alpha", "bravo", "charlie"], actual["partitions"]&.sort) + assert_equal(false, actual["preemptible"] || false) + assert_equal(30, actual["max_run_time"]) + end + + test "retry requests with unset scheduling parameters" do + configure_preemptible_instance_type + param_hashes = vary_parameters( + preemptible: [nil, true], + partitions: [nil, ["alpha"]], + max_run_time: [nil, 5], + ) + container = retry_with_scheduling_parameters(param_hashes) + actual = container.scheduling_parameters + assert_equal([], actual["partitions"] || []) + assert_equal(false, actual["preemptible"] || false) + assert_equal(0, actual["max_run_time"] || 0) + end + + test "retry requests with default scheduling parameters" do + configure_preemptible_instance_type + param_hashes = vary_parameters( + preemptible: [false, true], + partitions: [[], ["bravo"]], + max_run_time: [0, 1], + ) + container = retry_with_scheduling_parameters(param_hashes) + actual = container.scheduling_parameters + assert_equal([], actual["partitions"] || []) + assert_equal(false, actual["preemptible"] || false) + assert_equal(0, actual["max_run_time"] || 0) + end + + def run_container(request_params, final_attrs) + final_attrs[:state] ||= Container::Complete + if final_attrs[:state] == Container::Complete + final_attrs[:exit_code] ||= 0 + final_attrs[:log] ||= collections(:log_collection).portable_data_hash + final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash + end + container, request = minimal_new(request_params) + container.lock + container.update_attributes!(state: Container::Running) + container.update_attributes!(final_attrs) + return container, request + end + + def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations) + container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value) + orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM + begin + Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram + set_user_from_auth :admin + expected, _ = run_container(container_params, {}) + variations.each do |variation| + full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation) + parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation) + actual = Container.find_reusable(parameters) + assert_equal(expected.uuid, actual&.uuid, + "request with #{vary_attr}=#{variation} did not reuse container") + end + ensure + Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default + end + end + + # Test that we can reuse a container with a known keep_cache_ram constraint, + # no matter what keep_cache_* constraints the new request uses. + [0, 2 << 30, 4 << 30].product( + [0, 1], + [true, false], + ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)| + test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + "keep_cache_ram" => default_keep_cache_ram * multiplier, + ) + if not keep_disk_constraint + # Simulate a container that predates keep_cache_disk by deleting + # the constraint entirely. + runtime_constraints.delete("keep_cache_disk") + end + # Important values are: + # * 0 + # * 2GiB, the minimum default keep_cache_disk + # * 8GiB, the default keep_cache_disk based on container ram + # * 32GiB, the maximum default keep_cache_disk + # Check these values and values in between. + vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a + variations = vary_parameters(keep_cache_ram: vary_values) + .chain(vary_parameters(keep_cache_disk: vary_values)) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + variations, + ) + end + end + + # Test that we can reuse a container with a known keep_cache_disk constraint, + # no matter what keep_cache_* constraints the new request uses. + # keep_cache_disk values are the important values discussed in the test above. + [0, 2 << 30, 4 << 30] + .product([0, 2 << 30, 8 << 30, 32 << 30]) + .each do |(default_keep_cache_ram, keep_cache_disk)| + test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + "keep_cache_disk" => keep_cache_disk, + ) + vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a + variations = vary_parameters(keep_cache_ram: vary_values) + .chain(vary_parameters(keep_cache_disk: vary_values)) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + variations, + ) + end + end + + # Test that a container request can reuse a container with an exactly + # matching keep_cache_* constraint, no matter what the defaults. + [0, 2 << 30, 4 << 30].product( + ["keep_cache_disk", "keep_cache_ram"], + [135790, 13 << 30, 135 << 30], + ).each do |(default_keep_cache_ram, constraint_key, constraint_value)| + test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + constraint_key => constraint_value, + ) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + [runtime_constraints], + ) + end + end end