From 63b40a5af92aef28d8416c945ffc7c9805ae8d7d Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 22 Dec 2021 17:49:23 -0500 Subject: [PATCH] 18562: Fix UsePreemptibleInstances behavior. * Do not automatically set preemptible=true if there are no preemptible instance types available. * Do not automatically set preemptible=true on a container request that has already been committed with preemptible=false. * Do not reject updates to existing container requests with preemptible=true just because config has since changed and no longer enables it automatically. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/config/config.default.yml | 10 +- lib/config/generated_config.go | 10 +- services/api/app/models/container_request.rb | 55 +++--- .../fixtures/api_client_authorizations.yml | 7 + services/api/test/fixtures/containers.yml | 1 + .../api/test/unit/container_request_test.rb | 162 +++++++++++++----- 6 files changed, 164 insertions(+), 81 deletions(-) diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 7f85982253..98437e7758 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -899,10 +899,12 @@ Clusters: # go down. MaxComputeVMs: 64 - # Preemptible instance support (e.g. AWS Spot Instances) - # When true, child containers will get created with the preemptible - # scheduling parameter parameter set. - UsePreemptibleInstances: false + # Schedule all child containers on preemptible instances (e.g. AWS + # Spot Instances) even if not requested by the submitter. + # + # This flag is ignored if no preemptible instance types are + # configured. + UsePreemptibleInstances: true # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the # cloud dispatcher for executing containers on worker VMs. diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 485273e1b0..751df894e8 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -905,10 +905,12 @@ Clusters: # go down. MaxComputeVMs: 64 - # Preemptible instance support (e.g. AWS Spot Instances) - # When true, child containers will get created with the preemptible - # scheduling parameter parameter set. - UsePreemptibleInstances: false + # Schedule all child containers on preemptible instances (e.g. AWS + # Spot Instances) even if not requested by the submitter. + # + # This flag is ignored if no preemptible instance types are + # configured. + UsePreemptibleInstances: true # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the # cloud dispatcher for executing containers on worker VMs. diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb index badb40ba5f..273664c5bc 100644 --- a/services/api/app/models/container_request.rb +++ b/services/api/app/models/container_request.rb @@ -34,8 +34,6 @@ class ContainerRequest < ArvadosModel after_find :fill_container_defaults_after_find before_validation :fill_field_defaults, :if => :new_record? before_validation :fill_container_defaults - before_validation :set_default_preemptible_scheduling_parameter - before_validation :set_container validates :command, :container_image, :output_path, :cwd, :presence => true validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 } validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 1000 } @@ -46,7 +44,9 @@ class ContainerRequest < ArvadosModel validate :check_update_whitelist validate :secret_mounts_key_conflict validate :validate_runtime_token - before_save :scrub_secrets + after_validation :scrub_secrets + after_validation :set_preemptible + after_validation :set_container before_create :set_requesting_container_uuid before_destroy :set_priority_zero after_save :update_priority @@ -102,6 +102,12 @@ class ContainerRequest < ArvadosModel :scheduling_parameters, :secret_mounts, :output_name, :output_ttl, :output_storage_classes] + def self.any_preemptible_instances? + Rails.configuration.InstanceTypes.any? do |k, v| + v["Preemptible"] + end + end + def self.limit_index_columns_read ["mounts"] end @@ -268,6 +274,10 @@ class ContainerRequest < ArvadosModel errors.add :container_uuid, "can only be updated to nil." return false end + if self.container_count_changed? + errors.add :container_count, "cannot be updated directly." + return false + end if state_changed? and state == Committed and container_uuid.nil? while true c = Container.resolve(self) @@ -283,11 +293,6 @@ class ContainerRequest < ArvadosModel end end if self.container_uuid != self.container_uuid_was - if self.container_count_changed? - errors.add :container_count, "cannot be updated directly." - return false - end - self.container_count += 1 return if self.container_uuid_was.nil? @@ -320,8 +325,10 @@ class ContainerRequest < ArvadosModel end end - def set_default_preemptible_scheduling_parameter - if Rails.configuration.Containers.UsePreemptibleInstances && state == Committed && get_requesting_container() + def set_preemptible + if Rails.configuration.Containers.UsePreemptibleInstances && + get_requesting_container_uuid() && + self.class.any_preemptible_instances? self.scheduling_parameters['preemptible'] = true end end @@ -384,8 +391,10 @@ class ContainerRequest < ArvadosModel scheduling_parameters['partitions'].size) errors.add :scheduling_parameters, "partitions must be an array of strings" end - if !Rails.configuration.Containers.UsePreemptibleInstances and scheduling_parameters['preemptible'] - errors.add :scheduling_parameters, "preemptible instances are not allowed" + if scheduling_parameters['preemptible'] && + (new_record? || state_changed?) && + !self.class.any_preemptible_instances? + errors.add :scheduling_parameters, "preemptible instances are not configured in InstanceTypes" end if scheduling_parameters.include? 'max_run_time' and (!scheduling_parameters['max_run_time'].is_a?(Integer) || @@ -421,20 +430,13 @@ class ContainerRequest < ArvadosModel when Committed permitted.push :priority, :container_count_max, :container_uuid - if self.container_uuid.nil? - self.errors.add :container_uuid, "has not been resolved to a container." - end - if self.priority.nil? self.errors.add :priority, "cannot be nil" end - # Allow container count to increment by 1 - if (self.container_uuid && - self.container_uuid != self.container_uuid_was && - self.container_count == 1 + (self.container_count_was || 0)) - permitted.push :container_count - end + # Allow container count to increment (not by client, only by us + # -- see set_container) + permitted.push :container_count if current_user.andand.is_admin permitted.push :log_uuid @@ -497,17 +499,14 @@ class ContainerRequest < ArvadosModel end def set_requesting_container_uuid - c = get_requesting_container() - if !c.nil? - self.requesting_container_uuid = c.uuid + if (self.requesting_container_uuid = get_requesting_container_uuid()) # Determine the priority of container request for the requesting # container. self.priority = ContainerRequest.where(container_uuid: self.requesting_container_uuid).maximum("priority") || 0 end end - def get_requesting_container - return self.requesting_container_uuid if !self.requesting_container_uuid.nil? - Container.for_current_token + def get_requesting_container_uuid + return self.requesting_container_uuid || Container.for_current_token.andand.uuid end end diff --git a/services/api/test/fixtures/api_client_authorizations.yml b/services/api/test/fixtures/api_client_authorizations.yml index d8ef63120b..c6ade21f8b 100644 --- a/services/api/test/fixtures/api_client_authorizations.yml +++ b/services/api/test/fixtures/api_client_authorizations.yml @@ -317,6 +317,13 @@ running_container_auth: api_token: it2gl94mgu3rbn5s2d06vzh73ns1y6cthct0tvg82qdlsxvbwk expires_at: 2038-01-01 00:00:00 +running_container_with_logs_auth: + uuid: zzzzz-gj3su-n4xycwjpvvi776n + api_client: untrusted + user: active + api_token: mkpdp5jbytt471lw9so1by2t5ylciojdur845rfn4dtm0etl33 + expires_at: 2038-01-01 00:00:00 + running_to_be_deleted_container_auth: uuid: zzzzz-gj3su-ty6lvu9d7u7c2sq api_client: untrusted diff --git a/services/api/test/fixtures/containers.yml b/services/api/test/fixtures/containers.yml index e7cd0abd1f..4c536338b4 100644 --- a/services/api/test/fixtures/containers.yml +++ b/services/api/test/fixtures/containers.yml @@ -400,6 +400,7 @@ running_container_with_logs: vcpus: 4 secret_mounts: {} secret_mounts_md5: 99914b932bd37a50b983c5e7c90ae93b + auth_uuid: zzzzz-gj3su-n4xycwjpvvi776n running_to_be_deleted: uuid: zzzzz-dz642-runnincntrtodel diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index 43d062af87..bf12a3960e 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -14,11 +14,21 @@ class ContainerRequestTest < ActiveSupport::TestCase def with_container_auth(ctr) auth_was = Thread.current[:api_client_authorization] - Thread.current[:api_client_authorization] = ApiClientAuthorization.find_by_uuid(ctr.auth_uuid) + client_was = Thread.current[:api_client] + token_was = Thread.current[:token] + user_was = Thread.current[:user] + auth = ApiClientAuthorization.find_by_uuid(ctr.auth_uuid) + Thread.current[:api_client_authorization] = auth + Thread.current[:api_client] = auth.api_client + Thread.current[:token] = auth.token + Thread.current[:user] = auth.user begin yield ensure Thread.current[:api_client_authorization] = auth_was + Thread.current[:api_client] = client_was + Thread.current[:token] = token_was + Thread.current[:user] = user_was end end @@ -56,6 +66,18 @@ class ContainerRequestTest < ActiveSupport::TestCase end end + def configure_preemptible_instance_type + Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({ + "a1.small.pre" => { + "Preemptible" => true, + "Price" => 0.1, + "ProviderType" => "a1.small", + "VCPUs" => 1, + "RAM" => 1000000000, + }, + }) + end + test "Container request create" do set_user_from_auth :active cr = create_minimal_req! @@ -333,7 +355,7 @@ class ContainerRequestTest < ActiveSupport::TestCase cr2 = with_container_auth(c) do create_minimal_req!(priority: 10, state: "Committed", container_count_max: 1, command: ["echo", "foo2"]) end - assert_not_nil cr2.requesting_container_uuid + assert_equal c.uuid, cr2.requesting_container_uuid assert_equal users(:active).uuid, cr2.modified_by_user_uuid c2 = Container.find_by_uuid cr2.container_uuid @@ -950,63 +972,113 @@ class ContainerRequestTest < ActiveSupport::TestCase end [ - [false, ActiveRecord::RecordInvalid], - [true, nil], - ].each do |preemptible_conf, expected| - test "having Rails.configuration.Containers.UsePreemptibleInstances=#{preemptible_conf}, create preemptible container request and verify #{expected}" do - sp = {"preemptible" => true} - common_attrs = {cwd: "test", - priority: 1, - command: ["echo", "hello"], - output_path: "test", - scheduling_parameters: sp, - mounts: {"test" => {"kind" => "json"}}} - Rails.configuration.Containers.UsePreemptibleInstances = preemptible_conf + # client requests preemptible, but types are not configured + [false, false, false, true, ActiveRecord::RecordInvalid], + [true, false, false, true, ActiveRecord::RecordInvalid], + # client requests preemptible, types are configured + [false, true, false, true, true], + [true, true, false, true, true], + # client requests non-preemptible for top-level container + [false, false, false, false, false], + [true, false, false, false, false], + [false, true, false, false, false], + [true, true, false, false, false], + # client requests non-preemptible for child container, preemptible + # is enabled anyway if UsePreemptibleInstances and instance types + # are configured. + [false, false, true, false, false], + [true, false, true, false, false], + [false, true, true, false, false], + [true, true, true, false, true], + ].each do |use_preemptible, have_preemptible, is_child, ask, expect| + test "with UsePreemptibleInstances=#{use_preemptible} and preemptible types #{have_preemptible ? '' : 'not '}configured, create #{is_child ? 'child' : 'top-level'} container request with preemptible=#{ask} and expect #{expect}" do + Rails.configuration.Containers.UsePreemptibleInstances = use_preemptible + if have_preemptible + configure_preemptible_instance_type + end + common_attrs = { + cwd: "test", + priority: 1, + command: ["echo", "hello"], + output_path: "test", + scheduling_parameters: {"preemptible" => ask}, + mounts: {"test" => {"kind" => "json"}}, + } set_user_from_auth :active - cr = create_minimal_req!(common_attrs) + if is_child + cr = with_container_auth(containers(:running)) do + create_minimal_req!(common_attrs) + end + else + cr = create_minimal_req!(common_attrs) + end + + cr.reload cr.state = ContainerRequest::Committed - if !expected.nil? - assert_raises(expected) do + if expect == true || expect == false + cr.save! + assert_equal expect, cr.scheduling_parameters["preemptible"] + else + assert_raises(expect) do cr.save! end - else - cr.save! - assert (sp.to_a - cr.scheduling_parameters.to_a).empty? end end end - [ - [true, 'zzzzz-dz642-runningcontainr', true], - [true, nil, false], - [false, 'zzzzz-dz642-runningcontainr', false], - [false, nil, false], - ].each do |preemptible_conf, requesting_c, schedule_preemptible| - test "having Rails.configuration.Containers.UsePreemptibleInstances=#{preemptible_conf}, #{requesting_c.nil? ? 'non-':''}child CR should #{schedule_preemptible ? '':'not'} ask for preemptible instance by default" do - common_attrs = {cwd: "test", - priority: 1, - command: ["echo", "hello"], - output_path: "test", - mounts: {"test" => {"kind" => "json"}}} + test "config update does not flip preemptible flag on already-committed container requests" do + parent = containers(:running_container_with_logs) + attrs_p = { + scheduling_parameters: {"preemptible" => true}, + "state" => "Committed", + "priority" => 1, + } + attrs_nonp = { + scheduling_parameters: {"preemptible" => false}, + "state" => "Committed", + "priority" => 1, + } + expect = {false => [], true => []} - Rails.configuration.Containers.UsePreemptibleInstances = preemptible_conf - set_user_from_auth :active + with_container_auth(parent) do + configure_preemptible_instance_type + Rails.configuration.Containers.UsePreemptibleInstances = false - if requesting_c - cr = with_container_auth(Container.find_by_uuid requesting_c) do - create_minimal_req!(common_attrs) - end - assert_not_nil cr.requesting_container_uuid - else - cr = create_minimal_req!(common_attrs) + expect[true].push create_minimal_req!(attrs_p) + expect[false].push create_minimal_req!(attrs_nonp) + + Rails.configuration.Containers.UsePreemptibleInstances = true + + expect[true].push create_minimal_req!(attrs_p) + expect[true].push create_minimal_req!(attrs_nonp) + + Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({}) + + expect[false].push create_minimal_req!(attrs_nonp) + end + + set_user_from_auth :active + [false, true].each do |pflag| + expect[pflag].each do |cr| + cr.reload + assert_equal pflag, cr.scheduling_parameters['preemptible'] end + end - cr.state = ContainerRequest::Committed - cr.save! + act_as_system_user do + # Cancelling the parent used to fail while updating the child + # containers' priority, because the child containers' unchanged + # preemptible fields caused validation to fail. + parent.update_attributes!(state: 'Cancelled') - assert_equal schedule_preemptible, cr.scheduling_parameters['preemptible'] + [false, true].each do |pflag| + expect[pflag].each do |cr| + cr.reload + assert_equal 0, cr.priority, "unexpected non-zero priority #{cr.priority} for #{cr.uuid}" + end + end end end @@ -1055,7 +1127,7 @@ class ContainerRequestTest < ActiveSupport::TestCase state: ContainerRequest::Committed, mounts: {"test" => {"kind" => "json"}}} set_user_from_auth :active - Rails.configuration.Containers.UsePreemptibleInstances = true + configure_preemptible_instance_type cr = with_container_auth(Container.find_by_uuid 'zzzzz-dz642-runningcontainr') do create_minimal_req!(common_attrs) -- 2.30.2