From: Brett Smith Date: Sun, 5 Mar 2023 02:31:06 +0000 (-0500) Subject: 19981: Reuse containers with various keep_cache constraints X-Git-Tag: 2.6.0~26^2~4 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/e1c7395cf6e649876132030c2011434581d3a66a?hp=901f7a4f3723b982fd45562f74fb92c7900e0d47 19981: Reuse containers with various keep_cache constraints See the comments for rationale and discussion. Arvados-DCO-1.1-Signed-off-by: Brett Smith --- diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb index fa6b6f296c..e6643d4c70 100644 --- a/services/api/app/models/container.rb +++ b/services/api/app/models/container.rb @@ -228,10 +228,7 @@ class Container < ArvadosModel rc['keep_cache_ram'] = Rails.configuration.Containers.DefaultKeepCacheRAM end if rc['keep_cache_disk'] == 0 and rc['keep_cache_ram'] == 0 - # If neither ram nor disk cache was specified and - # DefaultKeepCacheRAM==0, default to disk cache with size equal - # to RAM constraint (but at least 2 GiB and at most 32 GiB). - rc['keep_cache_disk'] = [[rc['ram'] || 0, 2 << 30].max, 32 << 30].min + rc['keep_cache_disk'] = bound_keep_cache_disk(rc['ram']) end rc end @@ -299,30 +296,55 @@ class Container < ArvadosModel candidates = candidates.where('secret_mounts_md5 = ?', secret_mounts_md5) log_reuse_info(candidates) { "after filtering on secret_mounts_md5 #{secret_mounts_md5.inspect}" } - if attrs[:runtime_constraints]['cuda'].nil? - attrs[:runtime_constraints]['cuda'] = { - 'device_count' => 0, - 'driver_version' => '', - 'hardware_capability' => '', - } - end - resolved_runtime_constraints = [resolve_runtime_constraints(attrs[:runtime_constraints])] - if resolved_runtime_constraints[0]['cuda']['device_count'] == 0 - # If no CUDA requested, extend search to include older container - # records that don't have a 'cuda' section in runtime_constraints - resolved_runtime_constraints << resolved_runtime_constraints[0].except('cuda') - end - if resolved_runtime_constraints[0]['keep_cache_disk'] == 0 - # If no disk cache requested, extend search to include older container - # records that don't have a 'keep_cache_disk' field in runtime_constraints - if resolved_runtime_constraints.length == 2 - # exclude the one that also excludes CUDA - resolved_runtime_constraints << resolved_runtime_constraints[1].except('keep_cache_disk') - end - resolved_runtime_constraints << resolved_runtime_constraints[0].except('keep_cache_disk') - end - - candidates = candidates.where_serialized(:runtime_constraints, resolved_runtime_constraints, md5: true, multivalue: true) + resolved_runtime_constraints = resolve_runtime_constraints(attrs[:runtime_constraints]) + # Ideally we would completely ignore Keep cache constraints when making + # reuse considerations, but our database structure makes that impractical. + # The best we can do is generate a search that matches on all likely values. + runtime_constraint_variations = { + keep_cache_disk: [ + # Check for constraints without keep_cache_disk + # (containers that predate the constraint) + nil, + # Containers that use keep_cache_ram instead + 0, + # The default value + bound_keep_cache_disk(resolved_runtime_constraints['ram']), + # The minimum default bound + bound_keep_cache_disk(0), + # The maximum default bound (presumably) + bound_keep_cache_disk(1 << 60), + # The requested value + resolved_runtime_constraints.delete('keep_cache_disk'), + ].uniq, + keep_cache_ram: [ + # Containers that use keep_cache_disk instead + 0, + # The default value + Rails.configuration.Containers.DefaultKeepCacheRAM, + # The requested value + resolved_runtime_constraints.delete('keep_cache_ram'), + ].uniq, + } + resolved_cuda = resolved_runtime_constraints['cuda'] + if resolved_cuda.nil? or resolved_cuda['device_count'] == 0 + runtime_constraint_variations[:cuda] = [ + # Check for constraints without cuda + # (containers that predate the constraint) + nil, + # The default "don't need CUDA" value + { + 'device_count' => 0, + 'driver_version' => '', + 'hardware_capability' => '', + }, + # The requested value + resolved_runtime_constraints.delete('cuda') + ].uniq + end + reusable_runtime_constraints = hash_product(runtime_constraint_variations) + .map { |v| resolved_runtime_constraints.merge(v) } + + candidates = candidates.where_serialized(:runtime_constraints, reusable_runtime_constraints, md5: true, multivalue: true) log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" } log_reuse_info { "checking for state=Complete with readable output and log..." } @@ -447,6 +469,31 @@ class Container < ArvadosModel protected + def self.bound_keep_cache_disk(value) + value ||= 0 + min_value = 2 << 30 + max_value = 32 << 30 + if value < min_value + min_value + elsif value > max_value + max_value + else + value + end + end + + def self.hash_product(**kwargs) + # kwargs is a hash that maps parameters to an array of values. + # This function enumerates every possible hash where each key has one of + # the values from its array. + # The output keys are strings since that's what container hash attributes + # want. + # A nil value yields a hash without that key. + [[:_, nil]].product( + *kwargs.map { |(key, values)| [key.to_s].product(values) }, + ).map { |param_pairs| Hash[param_pairs].compact } + end + def fill_field_defaults self.state ||= Queued self.environment ||= {} diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb index 5da89c7ea3..286aa32ae2 100644 --- a/services/api/test/unit/container_test.rb +++ b/services/api/test/unit/container_test.rb @@ -40,6 +40,25 @@ class ContainerTest < ActiveSupport::TestCase runtime_auth_scopes: ["all"] } + REUSABLE_ATTRS_SLIM = { + command: ["echo", "slim"], + container_image: "9ae44d5792468c58bcf85ce7353c7027+124", + cwd: "test", + environment: {}, + mounts: {}, + output_path: "test", + runtime_auth_scopes: ["all"], + runtime_constraints: { + "API" => false, + "keep_cache_disk" => 0, + "keep_cache_ram" => 0, + "ram" => 8 << 30, + "vcpus" => 4 + }, + runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz", + secret_mounts: {}, + } + def request_only attrs attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k} end @@ -1212,4 +1231,111 @@ class ContainerTest < ActiveSupport::TestCase assert_equal(false, actual["preemptible"] || false) assert_equal(0, actual["max_run_time"] || 0) end + + def run_container(request_params, final_attrs) + final_attrs[:state] ||= Container::Complete + if final_attrs[:state] == Container::Complete + final_attrs[:exit_code] ||= 0 + final_attrs[:log] ||= collections(:log_collection).portable_data_hash + final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash + end + container, request = minimal_new(request_params) + container.lock + container.update_attributes!(state: Container::Running) + container.update_attributes!(final_attrs) + return container, request + end + + def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations) + container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value) + orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM + begin + Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram + set_user_from_auth :admin + expected, _ = run_container(container_params, {}) + variations.each do |variation| + full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation) + parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation) + actual = Container.find_reusable(parameters) + assert_equal(expected.uuid, actual&.uuid, + "request with #{vary_attr}=#{variation} did not reuse container") + end + ensure + Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default + end + end + + # Test that we can reuse a container with a known keep_cache_ram constraint, + # no matter what keep_cache_* constraints the new request uses. + [0, 2 << 30, 4 << 30].product( + [0, 1], + [true, false], + ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)| + test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + "keep_cache_ram" => default_keep_cache_ram * multiplier, + ) + if not keep_disk_constraint + # Simulate a container that predates keep_cache_disk by deleting + # the constraint entirely. + runtime_constraints.delete("keep_cache_disk") + end + # Important values are: + # * 0 + # * 2GiB, the minimum default keep_cache_disk + # * 8GiB, the default keep_cache_disk based on container ram + # * 32GiB, the maximum default keep_cache_disk + # Check these values and values in between. + vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a + variations = vary_parameters(keep_cache_ram: vary_values) + .chain(vary_parameters(keep_cache_disk: vary_values)) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + variations, + ) + end + end + + # Test that we can reuse a container with a known keep_cache_disk constraint, + # no matter what keep_cache_* constraints the new request uses. + # keep_cache_disk values are the important values discussed in the test above. + [0, 2 << 30, 4 << 30] + .product([0, 2 << 30, 8 << 30, 32 << 30]) + .each do |(default_keep_cache_ram, keep_cache_disk)| + test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + "keep_cache_disk" => keep_cache_disk, + ) + vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a + variations = vary_parameters(keep_cache_ram: vary_values) + .chain(vary_parameters(keep_cache_disk: vary_values)) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + variations, + ) + end + end + + # Test that a container request can reuse a container with an exactly + # matching keep_cache_* constraint, no matter what the defaults. + [0, 2 << 30, 4 << 30].product( + ["keep_cache_disk", "keep_cache_ram"], + [135790, 13 << 30, 135 << 30], + ).each do |(default_keep_cache_ram, constraint_key, constraint_value)| + test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do + runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge( + constraint_key => constraint_value, + ) + check_reuse_with_variations( + default_keep_cache_ram, + :runtime_constraints, + runtime_constraints, + [runtime_constraints], + ) + end + end end