container_image: 'fa3c1a9cb6783f85f2ecda037e07b8c3+167',
output_path: '/tmp',
priority: 1,
- runtime_constraints: {"vcpus" => 1, "ram" => 1},
+ runtime_constraints: {"vcpus" => 1, "ram" => 1, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},
}
REUSABLE_COMMON_ATTRS = {
command: ["echo", "hello"],
output_path: "test",
runtime_constraints: {
+ "API" => false,
+ "keep_cache_disk" => 0,
+ "keep_cache_ram" => 0,
"ram" => 12000000000,
- "vcpus" => 4,
+ "vcpus" => 4
},
mounts: {
"test" => {"kind" => "json"},
runtime_auth_scopes: ["all"]
}
+ REUSABLE_ATTRS_SLIM = {
+ command: ["echo", "slim"],
+ container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
+ cwd: "test",
+ environment: {},
+ mounts: {},
+ output_path: "test",
+ runtime_auth_scopes: ["all"],
+ runtime_constraints: {
+ "API" => false,
+ "keep_cache_disk" => 0,
+ "keep_cache_ram" => 0,
+ "ram" => 8 << 30,
+ "vcpus" => 4
+ },
+ runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz",
+ secret_mounts: {},
+ }
+
def request_only attrs
attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k}
end
assert_equal c1.runtime_status, {}
assert_equal Container::Queued, c1.state
- assert_raises ActiveRecord::RecordInvalid do
+ assert_raises ArvadosModel::PermissionDeniedError do
c1.update_attributes! runtime_status: {'error' => 'Oops!'}
end
set_user_from_auth :active
env = {"C" => "3", "B" => "2", "A" => "1"}
m = {"F" => {"kind" => "3"}, "E" => {"kind" => "2"}, "D" => {"kind" => "1"}}
- rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1}
+ rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1, "keep_cache_disk" => 0, "API" => true, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}}
c, _ = minimal_new(environment: env, mounts: m, runtime_constraints: rc)
- assert_equal c.environment.to_json, Container.deep_sort_hash(env).to_json
- assert_equal c.mounts.to_json, Container.deep_sort_hash(m).to_json
- assert_equal c.runtime_constraints.to_json, Container.deep_sort_hash(rc).to_json
+ c.reload
+ assert_equal Container.deep_sort_hash(env).to_json, c.environment.to_json
+ assert_equal Container.deep_sort_hash(m).to_json, c.mounts.to_json
+ assert_equal Container.deep_sort_hash(rc).to_json, c.runtime_constraints.to_json
end
test 'deep_sort_hash on array of hashes' do
end
test "find_reusable method should select higher priority queued container" do
- Rails.configuration.log_reuse_decisions = true
+ Rails.configuration.Containers.LogReuseDecisions = true
set_user_from_auth :active
common_attrs = REUSABLE_COMMON_ATTRS.merge({environment:{"var" => "queued"}})
c_low_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:1}))
runtime_status: {'warning' => 'This is not an error'},
progress: 0.15})
c_faster_started_second.update_attributes!({state: Container::Locked})
+ assert_equal 0, Container.where("runtime_status->'error' is not null").count
c_faster_started_second.update_attributes!({state: Container::Running,
runtime_status: {'error' => 'Something bad happened'},
progress: 0.2})
+ assert_equal 1, Container.where("runtime_status->'error' is not null").count
reused = Container.find_reusable(common_attrs)
assert_not_nil reused
# Selected the non-failing container even if it's the one with less progress done
test "find_reusable with logging enabled" do
set_user_from_auth :active
- Rails.configuration.log_reuse_decisions = true
+ Rails.configuration.Containers.LogReuseDecisions = true
Rails.logger.expects(:info).at_least(3)
Container.find_reusable(REUSABLE_COMMON_ATTRS)
end
assert_equal Container::Queued, c1.state
reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
# See #14584
+ assert_not_nil reused
assert_equal c1.uuid, reused.uuid
end
assert_equal Container::Queued, c1.state
reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
# See #14584
+ assert_not_nil reused
assert_equal c1.uuid, reused.uuid
end
assert_equal Container::Queued, c1.state
reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
# See #14584
+ assert_not_nil reused
assert_equal c1.uuid, reused.uuid
end
+ test "find_reusable method with cuda" do
+ set_user_from_auth :active
+ # No cuda
+ no_cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
+ runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
+ "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},})
+ c1, _ = minimal_new(no_cuda_attrs)
+ assert_equal Container::Queued, c1.state
+
+ # has cuda
+ cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
+ runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
+ "cuda" => {"device_count":1, "driver_version": "11.0", "hardware_capability": "9.0"}},})
+ c2, _ = minimal_new(cuda_attrs)
+ assert_equal Container::Queued, c2.state
+
+ # should find the no cuda one
+ reused = Container.find_reusable(no_cuda_attrs)
+ assert_not_nil reused
+ assert_equal reused.uuid, c1.uuid
+
+ # should find the cuda one
+ reused = Container.find_reusable(cuda_attrs)
+ assert_not_nil reused
+ assert_equal reused.uuid, c2.uuid
+ end
+
test "Container running" do
set_user_from_auth :active
c, _ = minimal_new priority: 1
auth_exp = ApiClientAuthorization.find_by_uuid(auth_uuid_was).expires_at
assert_operator auth_exp, :<, db_current_time
+
+ assert_nil ApiClientAuthorization.validate(token: ApiClientAuthorization.find_by_uuid(auth_uuid_was).token)
end
test "Exceed maximum lock-unlock cycles" do
- Rails.configuration.max_container_dispatch_attempts = 3
+ Rails.configuration.Containers.MaxDispatchAttempts = 3
set_user_from_auth :active
c, cr = minimal_new
check_no_change_from_cancelled c
end
+ test "Container locked with non-expiring token" do
+ Rails.configuration.API.TokenMaxLifetime = 1.hour
+ set_user_from_auth :active
+ c, _ = minimal_new
+ set_user_from_auth :dispatch1
+ assert c.lock, show_errors(c)
+ refute c.auth.nil?
+ assert c.auth.expires_at.nil?
+ assert c.auth.user_id == User.find_by_uuid(users(:active).uuid).id
+ end
+
test "Container locked cancel with log" do
set_user_from_auth :active
c, _ = minimal_new
end
end
- test "Container only set exit code on complete" do
+ [
+ [Container::Queued, {state: Container::Locked}],
+ [Container::Queued, {state: Container::Running}],
+ [Container::Queued, {state: Container::Complete}],
+ [Container::Queued, {state: Container::Cancelled}],
+ [Container::Queued, {priority: 123456789}],
+ [Container::Queued, {runtime_status: {'error' => 'oops'}}],
+ [Container::Queued, {cwd: '/'}],
+ [Container::Locked, {state: Container::Running}],
+ [Container::Locked, {state: Container::Queued}],
+ [Container::Locked, {priority: 123456789}],
+ [Container::Locked, {runtime_status: {'error' => 'oops'}}],
+ [Container::Locked, {cwd: '/'}],
+ [Container::Running, {state: Container::Complete}],
+ [Container::Running, {state: Container::Cancelled}],
+ [Container::Running, {priority: 123456789}],
+ [Container::Running, {runtime_status: {'error' => 'oops'}}],
+ [Container::Running, {cwd: '/'}],
+ [Container::Running, {gateway_address: "172.16.0.1:12345"}],
+ [Container::Running, {interactive_session_started: true}],
+ [Container::Complete, {state: Container::Cancelled}],
+ [Container::Complete, {priority: 123456789}],
+ [Container::Complete, {runtime_status: {'error' => 'oops'}}],
+ [Container::Complete, {cwd: '/'}],
+ [Container::Cancelled, {cwd: '/'}],
+ ].each do |start_state, updates|
+ test "Container update #{updates.inspect} when #{start_state} forbidden for non-admin" do
+ set_user_from_auth :active
+ c, _ = minimal_new
+ if start_state != Container::Queued
+ set_user_from_auth :dispatch1
+ c.lock
+ if start_state != Container::Locked
+ c.update_attributes! state: Container::Running
+ if start_state != Container::Running
+ c.update_attributes! state: start_state
+ end
+ end
+ end
+ assert_equal c.state, start_state
+ set_user_from_auth :active
+ assert_raises(ArvadosModel::PermissionDeniedError) do
+ c.update_attributes! updates
+ end
+ end
+ end
+
+ test "can only change exit code while running and at completion" do
set_user_from_auth :active
c, _ = minimal_new
set_user_from_auth :dispatch1
c.lock
+ check_illegal_updates c, [{exit_code: 1}]
c.update_attributes! state: Container::Running
-
- check_illegal_updates c, [{exit_code: 1},
- {exit_code: 1, state: Container::Cancelled}]
-
+ assert c.update_attributes(exit_code: 1)
assert c.update_attributes(exit_code: 1, state: Container::Complete)
end
end
["auth_uuid", "runtime_token"].each do |tok|
- test "#{tok} can set output, progress, runtime_status, state on running container -- but not log" do
+ test "#{tok} can set output, progress, runtime_status, state, exit_code on running container -- but not log" do
if tok == "runtime_token"
set_user_from_auth :spectator
c, _ = minimal_new(container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
Thread.current[:user] = auth.user
end
+ assert c.update_attributes(gateway_address: "127.0.0.1:9")
assert c.update_attributes(output: collections(:collection_owned_by_active).portable_data_hash)
assert c.update_attributes(runtime_status: {'warning' => 'something happened'})
assert c.update_attributes(progress: 0.5)
+ assert c.update_attributes(exit_code: 0)
refute c.update_attributes(log: collections(:real_log_collection).portable_data_hash)
c.reload
assert c.update_attributes(state: Container::Complete, exit_code: 0)
c.update_attributes! state: Container::Running
set_user_from_auth :running_to_be_deleted_container_auth
- refute c.update_attributes(output: collections(:foo_file).portable_data_hash)
+ assert_raises(ArvadosModel::PermissionDeniedError) do
+ c.update_attributes(output: collections(:foo_file).portable_data_hash)
+ end
end
test "can set trashed output on running container" do
end
end
+ test "user cannot delete" do
+ set_user_from_auth :active
+ c, _ = minimal_new
+ assert_raises ArvadosModel::PermissionDeniedError do
+ c.destroy
+ end
+ assert Container.find_by_uuid(c.uuid)
+ end
+
[
{state: Container::Complete, exit_code: 0, output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'},
{state: Container::Cancelled},
assert_no_secrets_logged
end
end
+
+ def configure_preemptible_instance_type
+ Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({
+ "a1.small.pre" => {
+ "Preemptible" => true,
+ "Price" => 0.1,
+ "ProviderType" => "a1.small",
+ "VCPUs" => 1,
+ "RAM" => 1000000000,
+ },
+ })
+ end
+
+ def vary_parameters(**kwargs)
+ # kwargs is a hash that maps parameters to an array of values.
+ # This function enumerates every possible hash where each key has one of
+ # the values from its array.
+ # The output keys are strings since that's what container hash attributes
+ # want.
+ # A nil value yields a hash without that key.
+ [[:_, nil]].product(
+ *kwargs.map { |(key, values)| [key.to_s].product(values) },
+ ).map { |param_pairs| Hash[param_pairs].compact }
+ end
+
+ def retry_with_scheduling_parameters(param_hashes)
+ set_user_from_auth :admin
+ containers = {}
+ requests = []
+ param_hashes.each do |scheduling_parameters|
+ container, request = minimal_new(scheduling_parameters: scheduling_parameters)
+ containers[container.uuid] = container
+ requests << request
+ end
+ refute(containers.empty?, "buggy test: no scheduling parameters enumerated")
+ assert_equal(1, containers.length)
+ _, container1 = containers.shift
+ container1.lock
+ container1.update_attributes!(state: Container::Cancelled)
+ container1.reload
+ request1 = requests.shift
+ request1.reload
+ assert_not_equal(container1.uuid, request1.container_uuid)
+ requests.each do |request|
+ request.reload
+ assert_equal(request1.container_uuid, request.container_uuid)
+ end
+ container2 = Container.find_by_uuid(request1.container_uuid)
+ assert_not_nil(container2)
+ return container2
+ end
+
+ preemptible_values = [true, false, nil]
+ preemptible_values.permutation(1).chain(
+ preemptible_values.product(preemptible_values),
+ preemptible_values.product(preemptible_values, preemptible_values),
+ ).each do |preemptible_a|
+ test "retry requests scheduled with preemptible=#{preemptible_a}" do
+ configure_preemptible_instance_type
+ param_hashes = vary_parameters(preemptible: preemptible_a)
+ container = retry_with_scheduling_parameters(param_hashes)
+ assert_equal(preemptible_a.all?,
+ container.scheduling_parameters["preemptible"] || false)
+ end
+ end
+
+ partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]]
+ partition_values.permutation(1).chain(
+ partition_values.permutation(2),
+ ).each do |partitions_a|
+ test "retry requests scheduled with partitions=#{partitions_a}" do
+ param_hashes = vary_parameters(partitions: partitions_a)
+ container = retry_with_scheduling_parameters(param_hashes)
+ expected = if partitions_a.any? { |value| value.nil? or value.empty? }
+ []
+ else
+ partitions_a.flatten.uniq
+ end
+ actual = container.scheduling_parameters["partitions"] || []
+ assert_equal(expected.sort, actual.sort)
+ end
+ end
+
+ runtime_values = [nil, 0, 1, 2, 3]
+ runtime_values.permutation(1).chain(
+ runtime_values.permutation(2),
+ runtime_values.permutation(3),
+ ).each do |max_run_time_a|
+ test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
+ param_hashes = vary_parameters(max_run_time: max_run_time_a)
+ container = retry_with_scheduling_parameters(param_hashes)
+ expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
+ 0
+ else
+ max_run_time_a.max
+ end
+ actual = container.scheduling_parameters["max_run_time"] || 0
+ assert_equal(expected, actual)
+ end
+ end
+
+ test "retry requests with multi-varied scheduling parameters" do
+ configure_preemptible_instance_type
+ param_hashes = [{
+ "partitions": ["alpha", "bravo"],
+ "preemptible": true,
+ "max_run_time": 10,
+ }, {
+ "partitions": ["alpha", "charlie"],
+ "max_run_time": 20,
+ }, {
+ "partitions": ["bravo", "charlie"],
+ "preemptible": false,
+ "max_run_time": 30,
+ }]
+ container = retry_with_scheduling_parameters(param_hashes)
+ actual = container.scheduling_parameters
+ assert_equal(["alpha", "bravo", "charlie"], actual["partitions"]&.sort)
+ assert_equal(false, actual["preemptible"] || false)
+ assert_equal(30, actual["max_run_time"])
+ end
+
+ test "retry requests with unset scheduling parameters" do
+ configure_preemptible_instance_type
+ param_hashes = vary_parameters(
+ preemptible: [nil, true],
+ partitions: [nil, ["alpha"]],
+ max_run_time: [nil, 5],
+ )
+ container = retry_with_scheduling_parameters(param_hashes)
+ actual = container.scheduling_parameters
+ assert_equal([], actual["partitions"] || [])
+ assert_equal(false, actual["preemptible"] || false)
+ assert_equal(0, actual["max_run_time"] || 0)
+ end
+
+ test "retry requests with default scheduling parameters" do
+ configure_preemptible_instance_type
+ param_hashes = vary_parameters(
+ preemptible: [false, true],
+ partitions: [[], ["bravo"]],
+ max_run_time: [0, 1],
+ )
+ container = retry_with_scheduling_parameters(param_hashes)
+ actual = container.scheduling_parameters
+ assert_equal([], actual["partitions"] || [])
+ assert_equal(false, actual["preemptible"] || false)
+ assert_equal(0, actual["max_run_time"] || 0)
+ end
+
+ def run_container(request_params, final_attrs)
+ final_attrs[:state] ||= Container::Complete
+ if final_attrs[:state] == Container::Complete
+ final_attrs[:exit_code] ||= 0
+ final_attrs[:log] ||= collections(:log_collection).portable_data_hash
+ final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash
+ end
+ container, request = minimal_new(request_params)
+ container.lock
+ container.update_attributes!(state: Container::Running)
+ container.update_attributes!(final_attrs)
+ return container, request
+ end
+
+ def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations)
+ container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value)
+ orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM
+ begin
+ Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram
+ set_user_from_auth :admin
+ expected, _ = run_container(container_params, {})
+ variations.each do |variation|
+ full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation)
+ parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation)
+ actual = Container.find_reusable(parameters)
+ assert_equal(expected.uuid, actual&.uuid,
+ "request with #{vary_attr}=#{variation} did not reuse container")
+ end
+ ensure
+ Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default
+ end
+ end
+
+ # Test that we can reuse a container with a known keep_cache_ram constraint,
+ # no matter what keep_cache_* constraints the new request uses.
+ [0, 2 << 30, 4 << 30].product(
+ [0, 1],
+ [true, false],
+ ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)|
+ test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ "keep_cache_ram" => default_keep_cache_ram * multiplier,
+ )
+ if not keep_disk_constraint
+ # Simulate a container that predates keep_cache_disk by deleting
+ # the constraint entirely.
+ runtime_constraints.delete("keep_cache_disk")
+ end
+ # Important values are:
+ # * 0
+ # * 2GiB, the minimum default keep_cache_disk
+ # * 8GiB, the default keep_cache_disk based on container ram
+ # * 32GiB, the maximum default keep_cache_disk
+ # Check these values and values in between.
+ vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+ variations = vary_parameters(keep_cache_ram: vary_values)
+ .chain(vary_parameters(keep_cache_disk: vary_values))
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ variations,
+ )
+ end
+ end
+
+ # Test that we can reuse a container with a known keep_cache_disk constraint,
+ # no matter what keep_cache_* constraints the new request uses.
+ # keep_cache_disk values are the important values discussed in the test above.
+ [0, 2 << 30, 4 << 30]
+ .product([0, 2 << 30, 8 << 30, 32 << 30])
+ .each do |(default_keep_cache_ram, keep_cache_disk)|
+ test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ "keep_cache_disk" => keep_cache_disk,
+ )
+ vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+ variations = vary_parameters(keep_cache_ram: vary_values)
+ .chain(vary_parameters(keep_cache_disk: vary_values))
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ variations,
+ )
+ end
+ end
+
+ # Test that a container request can reuse a container with an exactly
+ # matching keep_cache_* constraint, no matter what the defaults.
+ [0, 2 << 30, 4 << 30].product(
+ ["keep_cache_disk", "keep_cache_ram"],
+ [135790, 13 << 30, 135 << 30],
+ ).each do |(default_keep_cache_ram, constraint_key, constraint_value)|
+ test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do
+ runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+ constraint_key => constraint_value,
+ )
+ check_reuse_with_variations(
+ default_keep_cache_ram,
+ :runtime_constraints,
+ runtime_constraints,
+ [runtime_constraints],
+ )
+ end
+ end
end