19981: Reuse containers with various keep_cache constraints
[arvados.git] / services / api / test / unit / container_test.rb
index ac3e6bea4280ae660042cfb4745c592dbe3c684e..286aa32ae209a98f3930076cb76d5b9ec0988700 100644 (file)
@@ -24,6 +24,7 @@ class ContainerTest < ActiveSupport::TestCase
     output_path: "test",
     runtime_constraints: {
       "API" => false,
+      "keep_cache_disk" => 0,
       "keep_cache_ram" => 0,
       "ram" => 12000000000,
       "vcpus" => 4
@@ -39,6 +40,25 @@ class ContainerTest < ActiveSupport::TestCase
     runtime_auth_scopes: ["all"]
   }
 
+  REUSABLE_ATTRS_SLIM = {
+    command: ["echo", "slim"],
+    container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
+    cwd: "test",
+    environment: {},
+    mounts: {},
+    output_path: "test",
+    runtime_auth_scopes: ["all"],
+    runtime_constraints: {
+      "API" => false,
+      "keep_cache_disk" => 0,
+      "keep_cache_ram" => 0,
+      "ram" => 8 << 30,
+      "vcpus" => 4
+    },
+    runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz",
+    secret_mounts: {},
+  }
+
   def request_only attrs
     attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k}
   end
@@ -229,7 +249,7 @@ class ContainerTest < ActiveSupport::TestCase
     set_user_from_auth :active
     env = {"C" => "3", "B" => "2", "A" => "1"}
     m = {"F" => {"kind" => "3"}, "E" => {"kind" => "2"}, "D" => {"kind" => "1"}}
-    rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1, "API" => true, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}}
+    rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1, "keep_cache_disk" => 0, "API" => true, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}}
     c, _ = minimal_new(environment: env, mounts: m, runtime_constraints: rc)
     c.reload
     assert_equal Container.deep_sort_hash(env).to_json, c.environment.to_json
@@ -594,14 +614,14 @@ class ContainerTest < ActiveSupport::TestCase
     set_user_from_auth :active
     # No cuda
     no_cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
-                                                runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_ram"=>268435456, "API" => false,
+                                                runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
                                                                       "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},})
     c1, _ = minimal_new(no_cuda_attrs)
     assert_equal Container::Queued, c1.state
 
     # has cuda
     cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
-                                                runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_ram"=>268435456, "API" => false,
+                                                runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
                                                                       "cuda" => {"device_count":1, "driver_version": "11.0", "hardware_capability": "9.0"}},})
     c2, _ = minimal_new(cuda_attrs)
     assert_equal Container::Queued, c2.state
@@ -870,16 +890,14 @@ class ContainerTest < ActiveSupport::TestCase
     end
   end
 
-  test "Container only set exit code on complete" do
+  test "can only change exit code while running and at completion" do
     set_user_from_auth :active
     c, _ = minimal_new
     set_user_from_auth :dispatch1
     c.lock
+    check_illegal_updates c, [{exit_code: 1}]
     c.update_attributes! state: Container::Running
-
-    check_illegal_updates c, [{exit_code: 1},
-                              {exit_code: 1, state: Container::Cancelled}]
-
+    assert c.update_attributes(exit_code: 1)
     assert c.update_attributes(exit_code: 1, state: Container::Complete)
   end
 
@@ -933,7 +951,7 @@ class ContainerTest < ActiveSupport::TestCase
   end
 
   ["auth_uuid", "runtime_token"].each do |tok|
-    test "#{tok} can set output, progress, runtime_status, state on running container -- but not log" do
+    test "#{tok} can set output, progress, runtime_status, state, exit_code on running container -- but not log" do
       if tok == "runtime_token"
         set_user_from_auth :spectator
         c, _ = minimal_new(container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
@@ -960,9 +978,11 @@ class ContainerTest < ActiveSupport::TestCase
         Thread.current[:user] = auth.user
       end
 
+      assert c.update_attributes(gateway_address: "127.0.0.1:9")
       assert c.update_attributes(output: collections(:collection_owned_by_active).portable_data_hash)
       assert c.update_attributes(runtime_status: {'warning' => 'something happened'})
       assert c.update_attributes(progress: 0.5)
+      assert c.update_attributes(exit_code: 0)
       refute c.update_attributes(log: collections(:real_log_collection).portable_data_hash)
       c.reload
       assert c.update_attributes(state: Container::Complete, exit_code: 0)
@@ -1062,4 +1082,260 @@ class ContainerTest < ActiveSupport::TestCase
       assert_no_secrets_logged
     end
   end
+
+  def configure_preemptible_instance_type
+    Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({
+      "a1.small.pre" => {
+        "Preemptible" => true,
+        "Price" => 0.1,
+        "ProviderType" => "a1.small",
+        "VCPUs" => 1,
+        "RAM" => 1000000000,
+      },
+    })
+  end
+
+  def vary_parameters(**kwargs)
+    # kwargs is a hash that maps parameters to an array of values.
+    # This function enumerates every possible hash where each key has one of
+    # the values from its array.
+    # The output keys are strings since that's what container hash attributes
+    # want.
+    # A nil value yields a hash without that key.
+    [[:_, nil]].product(
+      *kwargs.map { |(key, values)| [key.to_s].product(values) },
+    ).map { |param_pairs| Hash[param_pairs].compact }
+  end
+
+  def retry_with_scheduling_parameters(param_hashes)
+    set_user_from_auth :admin
+    containers = {}
+    requests = []
+    param_hashes.each do |scheduling_parameters|
+      container, request = minimal_new(scheduling_parameters: scheduling_parameters)
+      containers[container.uuid] = container
+      requests << request
+    end
+    refute(containers.empty?, "buggy test: no scheduling parameters enumerated")
+    assert_equal(1, containers.length)
+    _, container1 = containers.shift
+    container1.lock
+    container1.update_attributes!(state: Container::Cancelled)
+    container1.reload
+    request1 = requests.shift
+    request1.reload
+    assert_not_equal(container1.uuid, request1.container_uuid)
+    requests.each do |request|
+      request.reload
+      assert_equal(request1.container_uuid, request.container_uuid)
+    end
+    container2 = Container.find_by_uuid(request1.container_uuid)
+    assert_not_nil(container2)
+    return container2
+  end
+
+  preemptible_values = [true, false, nil]
+  preemptible_values.permutation(1).chain(
+    preemptible_values.product(preemptible_values),
+    preemptible_values.product(preemptible_values, preemptible_values),
+  ).each do |preemptible_a|
+    test "retry requests scheduled with preemptible=#{preemptible_a}" do
+      configure_preemptible_instance_type
+      param_hashes = vary_parameters(preemptible: preemptible_a)
+      container = retry_with_scheduling_parameters(param_hashes)
+      assert_equal(preemptible_a.all?,
+                   container.scheduling_parameters["preemptible"] || false)
+    end
+  end
+
+  partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]]
+  partition_values.permutation(1).chain(
+    partition_values.permutation(2),
+  ).each do |partitions_a|
+    test "retry requests scheduled with partitions=#{partitions_a}" do
+      param_hashes = vary_parameters(partitions: partitions_a)
+      container = retry_with_scheduling_parameters(param_hashes)
+      expected = if partitions_a.any? { |value| value.nil? or value.empty? }
+                   []
+                 else
+                   partitions_a.flatten.uniq
+                 end
+      actual = container.scheduling_parameters["partitions"] || []
+      assert_equal(expected.sort, actual.sort)
+    end
+  end
+
+  runtime_values = [nil, 0, 1, 2, 3]
+  runtime_values.permutation(1).chain(
+    runtime_values.permutation(2),
+    runtime_values.permutation(3),
+  ).each do |max_run_time_a|
+    test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
+      param_hashes = vary_parameters(max_run_time: max_run_time_a)
+      container = retry_with_scheduling_parameters(param_hashes)
+      expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
+                   0
+                 else
+                   max_run_time_a.max
+                 end
+      actual = container.scheduling_parameters["max_run_time"] || 0
+      assert_equal(expected, actual)
+    end
+  end
+
+  test "retry requests with multi-varied scheduling parameters" do
+    configure_preemptible_instance_type
+    param_hashes = [{
+                     "partitions": ["alpha", "bravo"],
+                     "preemptible": true,
+                     "max_run_time": 10,
+                    }, {
+                     "partitions": ["alpha", "charlie"],
+                     "max_run_time": 20,
+                    }, {
+                     "partitions": ["bravo", "charlie"],
+                     "preemptible": false,
+                     "max_run_time": 30,
+                    }]
+    container = retry_with_scheduling_parameters(param_hashes)
+    actual = container.scheduling_parameters
+    assert_equal(["alpha", "bravo", "charlie"], actual["partitions"]&.sort)
+    assert_equal(false, actual["preemptible"] || false)
+    assert_equal(30, actual["max_run_time"])
+  end
+
+  test "retry requests with unset scheduling parameters" do
+    configure_preemptible_instance_type
+    param_hashes = vary_parameters(
+      preemptible: [nil, true],
+      partitions: [nil, ["alpha"]],
+      max_run_time: [nil, 5],
+    )
+    container = retry_with_scheduling_parameters(param_hashes)
+    actual = container.scheduling_parameters
+    assert_equal([], actual["partitions"] || [])
+    assert_equal(false, actual["preemptible"] || false)
+    assert_equal(0, actual["max_run_time"] || 0)
+  end
+
+  test "retry requests with default scheduling parameters" do
+    configure_preemptible_instance_type
+    param_hashes = vary_parameters(
+      preemptible: [false, true],
+      partitions: [[], ["bravo"]],
+      max_run_time: [0, 1],
+    )
+    container = retry_with_scheduling_parameters(param_hashes)
+    actual = container.scheduling_parameters
+    assert_equal([], actual["partitions"] || [])
+    assert_equal(false, actual["preemptible"] || false)
+    assert_equal(0, actual["max_run_time"] || 0)
+  end
+
+  def run_container(request_params, final_attrs)
+    final_attrs[:state] ||= Container::Complete
+    if final_attrs[:state] == Container::Complete
+      final_attrs[:exit_code] ||= 0
+      final_attrs[:log] ||= collections(:log_collection).portable_data_hash
+      final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash
+    end
+    container, request = minimal_new(request_params)
+    container.lock
+    container.update_attributes!(state: Container::Running)
+    container.update_attributes!(final_attrs)
+    return container, request
+  end
+
+  def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations)
+    container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value)
+    orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM
+    begin
+      Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram
+      set_user_from_auth :admin
+      expected, _ = run_container(container_params, {})
+      variations.each do |variation|
+        full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation)
+        parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation)
+        actual = Container.find_reusable(parameters)
+        assert_equal(expected.uuid, actual&.uuid,
+                     "request with #{vary_attr}=#{variation} did not reuse container")
+      end
+    ensure
+      Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default
+    end
+  end
+
+  # Test that we can reuse a container with a known keep_cache_ram constraint,
+  # no matter what keep_cache_* constraints the new request uses.
+  [0, 2 << 30, 4 << 30].product(
+    [0, 1],
+    [true, false],
+  ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)|
+    test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do
+      runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+        "keep_cache_ram" => default_keep_cache_ram * multiplier,
+      )
+      if not keep_disk_constraint
+        # Simulate a container that predates keep_cache_disk by deleting
+        # the constraint entirely.
+        runtime_constraints.delete("keep_cache_disk")
+      end
+      # Important values are:
+      # * 0
+      # * 2GiB, the minimum default keep_cache_disk
+      # * 8GiB, the default keep_cache_disk based on container ram
+      # * 32GiB, the maximum default keep_cache_disk
+      # Check these values and values in between.
+      vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+      variations = vary_parameters(keep_cache_ram: vary_values)
+                     .chain(vary_parameters(keep_cache_disk: vary_values))
+      check_reuse_with_variations(
+        default_keep_cache_ram,
+        :runtime_constraints,
+        runtime_constraints,
+        variations,
+      )
+    end
+  end
+
+  # Test that we can reuse a container with a known keep_cache_disk constraint,
+  # no matter what keep_cache_* constraints the new request uses.
+  # keep_cache_disk values are the important values discussed in the test above.
+  [0, 2 << 30, 4 << 30]
+    .product([0, 2 << 30, 8 << 30, 32 << 30])
+    .each do |(default_keep_cache_ram, keep_cache_disk)|
+    test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do
+      runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+        "keep_cache_disk" => keep_cache_disk,
+      )
+      vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
+      variations = vary_parameters(keep_cache_ram: vary_values)
+                     .chain(vary_parameters(keep_cache_disk: vary_values))
+      check_reuse_with_variations(
+        default_keep_cache_ram,
+        :runtime_constraints,
+        runtime_constraints,
+        variations,
+      )
+    end
+  end
+
+  # Test that a container request can reuse a container with an exactly
+  # matching keep_cache_* constraint, no matter what the defaults.
+  [0, 2 << 30, 4 << 30].product(
+    ["keep_cache_disk", "keep_cache_ram"],
+    [135790, 13 << 30, 135 << 30],
+  ).each do |(default_keep_cache_ram, constraint_key, constraint_value)|
+    test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do
+      runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
+        constraint_key => constraint_value,
+      )
+      check_reuse_with_variations(
+        default_keep_cache_ram,
+        :runtime_constraints,
+        runtime_constraints,
+        [runtime_constraints],
+      )
+    end
+  end
 end