Merge branch '21644-flaky-test'
[arvados.git] / services / api / test / unit / container_test.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require 'test_helper'
6 require 'helpers/container_test_helper'
7
8 class ContainerTest < ActiveSupport::TestCase
9   include DbCurrentTime
10   include ContainerTestHelper
11
12   DEFAULT_ATTRS = {
13     command: ['echo', 'foo'],
14     container_image: 'fa3c1a9cb6783f85f2ecda037e07b8c3+167',
15     output_path: '/tmp',
16     priority: 1,
17     runtime_constraints: {"vcpus" => 1, "ram" => 1, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},
18   }
19
20   REUSABLE_COMMON_ATTRS = {
21     container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
22     cwd: "test",
23     command: ["echo", "hello"],
24     output_path: "test",
25     runtime_constraints: {
26       "API" => false,
27       "keep_cache_disk" => 0,
28       "keep_cache_ram" => 0,
29       "ram" => 12000000000,
30       "vcpus" => 4
31     },
32     mounts: {
33       "test" => {"kind" => "json"},
34     },
35     environment: {
36       "var" => "val",
37     },
38     secret_mounts: {},
39     runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz",
40     runtime_auth_scopes: ["all"],
41     scheduling_parameters: {},
42   }
43
44   REUSABLE_ATTRS_SLIM = {
45     command: ["echo", "slim"],
46     container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
47     cwd: "test",
48     environment: {},
49     mounts: {},
50     output_path: "test",
51     runtime_auth_scopes: ["all"],
52     runtime_constraints: {
53       "API" => false,
54       "keep_cache_disk" => 0,
55       "keep_cache_ram" => 0,
56       "ram" => 8 << 30,
57       "vcpus" => 4
58     },
59     runtime_user_uuid: "zzzzz-tpzed-xurymjxw79nv3jz",
60     secret_mounts: {},
61     scheduling_parameters: {},
62   }
63
64   def request_only attrs
65     attrs.reject {|k| [:runtime_user_uuid, :runtime_auth_scopes].include? k}
66   end
67
68   def minimal_new attrs={}
69     cr = ContainerRequest.new request_only(DEFAULT_ATTRS.merge(attrs))
70     cr.state = ContainerRequest::Committed
71     cr.save!
72     c = Container.find_by_uuid cr.container_uuid
73     assert_not_nil c
74     return c, cr
75   end
76
77   def check_illegal_updates c, bad_updates
78     bad_updates.each do |u|
79       refute c.update(u), u.inspect
80       refute c.valid?, u.inspect
81       c.reload
82     end
83   end
84
85   def check_illegal_modify c
86     check_illegal_updates c, [{command: ["echo", "bar"]},
87                               {container_image: "arvados/apitestfixture:june10"},
88                               {cwd: "/tmp2"},
89                               {environment: {"FOO" => "BAR"}},
90                               {mounts: {"FOO" => "BAR"}},
91                               {output_path: "/tmp3"},
92                               {locked_by_uuid: "zzzzz-gj3su-027z32aux8dg2s1"},
93                               {auth_uuid: "zzzzz-gj3su-017z32aux8dg2s1"},
94                               {runtime_constraints: {"FOO" => "BAR"}}]
95   end
96
97   def check_bogus_states c
98     check_illegal_updates c, [{state: nil},
99                               {state: "Flubber"}]
100   end
101
102   def check_no_change_from_cancelled c
103     check_illegal_modify c
104     check_bogus_states c
105     check_illegal_updates c, [{ priority: 3 },
106                               { state: Container::Queued },
107                               { state: Container::Locked },
108                               { state: Container::Running },
109                               { state: Container::Complete }]
110   end
111
112   test "Container create" do
113     act_as_system_user do
114       c, _ = minimal_new(environment: {},
115                       mounts: {"BAR" => {"kind" => "FOO"}},
116                       output_path: "/tmp",
117                       priority: 1,
118                       runtime_constraints: {"vcpus" => 1, "ram" => 1})
119
120       check_illegal_modify c
121       check_bogus_states c
122
123       c.reload
124       c.priority = 2
125       c.save!
126     end
127   end
128
129   test "Container valid priority" do
130     act_as_system_user do
131       c, _ = minimal_new(environment: {},
132                       mounts: {"BAR" => {"kind" => "FOO"}},
133                       output_path: "/tmp",
134                       priority: 1,
135                       runtime_constraints: {"vcpus" => 1, "ram" => 1})
136
137       assert_raises(ActiveRecord::RecordInvalid) do
138         c.priority = -1
139         c.save!
140       end
141
142       c.priority = 0
143       c.save!
144
145       c.priority = 1
146       c.save!
147
148       c.priority = 500
149       c.save!
150
151       c.priority = 999
152       c.save!
153
154       c.priority = 1000
155       c.save!
156
157       c.priority = 1000 << 50
158       c.save!
159     end
160   end
161
162   test "Container runtime_status data types" do
163     set_user_from_auth :active
164     attrs = {
165       environment: {},
166       mounts: {"BAR" => {"kind" => "FOO"}},
167       output_path: "/tmp",
168       priority: 1,
169       runtime_constraints: {"vcpus" => 1, "ram" => 1}
170     }
171     c, _ = minimal_new(attrs)
172     assert_equal c.runtime_status, {}
173     assert_equal Container::Queued, c.state
174
175     set_user_from_auth :dispatch1
176     c.update! state: Container::Locked
177     c.update! state: Container::Running
178
179     [
180       'error', 'errorDetail', 'warning', 'warningDetail', 'activity'
181     ].each do |k|
182       # String type is allowed
183       string_val = 'A string is accepted'
184       c.update! runtime_status: {k => string_val}
185       assert_equal string_val, c.runtime_status[k]
186
187       # Other types aren't allowed
188       [
189         42, false, [], {}, nil
190       ].each do |unallowed_val|
191         assert_raises ActiveRecord::RecordInvalid do
192           c.update! runtime_status: {k => unallowed_val}
193         end
194       end
195     end
196   end
197
198   test "Container runtime_status updates" do
199     set_user_from_auth :active
200     attrs = {
201       environment: {},
202       mounts: {"BAR" => {"kind" => "FOO"}},
203       output_path: "/tmp",
204       priority: 1,
205       runtime_constraints: {"vcpus" => 1, "ram" => 1}
206     }
207     c1, _ = minimal_new(attrs)
208     assert_equal c1.runtime_status, {}
209
210     assert_equal Container::Queued, c1.state
211     assert_raises ArvadosModel::PermissionDeniedError do
212       c1.update! runtime_status: {'error' => 'Oops!'}
213     end
214
215     set_user_from_auth :dispatch1
216
217     # Allow updates when state = Locked
218     c1.update! state: Container::Locked
219     c1.update! runtime_status: {'error' => 'Oops!'}
220     assert c1.runtime_status.key? 'error'
221
222     # Reset when transitioning from Locked to Queued
223     c1.update! state: Container::Queued
224     assert_equal c1.runtime_status, {}
225
226     # Allow updates when state = Running
227     c1.update! state: Container::Locked
228     c1.update! state: Container::Running
229     c1.update! runtime_status: {'error' => 'Oops!'}
230     assert c1.runtime_status.key? 'error'
231
232     # Don't allow updates on other states
233     c1.update! state: Container::Complete
234     assert_raises ActiveRecord::RecordInvalid do
235       c1.update! runtime_status: {'error' => 'Some other error'}
236     end
237
238     set_user_from_auth :active
239     c2, _ = minimal_new(attrs)
240     assert_equal c2.runtime_status, {}
241     set_user_from_auth :dispatch1
242     c2.update! state: Container::Locked
243     c2.update! state: Container::Running
244     c2.update! state: Container::Cancelled
245     assert_raises ActiveRecord::RecordInvalid do
246       c2.update! runtime_status: {'error' => 'Oops!'}
247     end
248   end
249
250   test "Container serialized hash attributes sorted before save" do
251     set_user_from_auth :active
252     env = {"C" => "3", "B" => "2", "A" => "1"}
253     m = {"F" => {"kind" => "3"}, "E" => {"kind" => "2"}, "D" => {"kind" => "1"}}
254     rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1, "keep_cache_disk" => 0, "API" => true, "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}}
255     c, _ = minimal_new(environment: env, mounts: m, runtime_constraints: rc)
256     c.reload
257     assert_equal Container.deep_sort_hash(env).to_json, c.environment.to_json
258     assert_equal Container.deep_sort_hash(m).to_json, c.mounts.to_json
259     assert_equal Container.deep_sort_hash(rc).to_json, c.runtime_constraints.to_json
260   end
261
262   test 'deep_sort_hash on array of hashes' do
263     a = {'z' => [[{'a' => 'a', 'b' => 'b'}]]}
264     b = {'z' => [[{'b' => 'b', 'a' => 'a'}]]}
265     assert_equal Container.deep_sort_hash(a).to_json, Container.deep_sort_hash(b).to_json
266   end
267
268   test "find_reusable method should select higher priority queued container" do
269         Rails.configuration.Containers.LogReuseDecisions = true
270     set_user_from_auth :active
271     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment:{"var" => "queued"}})
272     c_low_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:1}))
273     c_high_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:2}))
274     assert_not_equal c_low_priority.uuid, c_high_priority.uuid
275     assert_equal Container::Queued, c_low_priority.state
276     assert_equal Container::Queued, c_high_priority.state
277     reused = Container.find_reusable(common_attrs)
278     assert_not_nil reused
279     assert_equal reused.uuid, c_high_priority.uuid
280   end
281
282   test "find_reusable method should select latest completed container" do
283     set_user_from_auth :active
284     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "complete"}})
285     completed_attrs = {
286       state: Container::Complete,
287       exit_code: 0,
288       log: 'ea10d51bcf88862dbcc36eb292017dfd+45',
289       output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
290     }
291
292     c_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
293     c_recent, _ = minimal_new(common_attrs.merge({use_existing: false}))
294     assert_not_equal c_older.uuid, c_recent.uuid
295
296     set_user_from_auth :dispatch1
297     c_older.update!({state: Container::Locked})
298     c_older.update!({state: Container::Running})
299     c_older.update!(completed_attrs)
300
301     c_recent.update!({state: Container::Locked})
302     c_recent.update!({state: Container::Running})
303     c_recent.update!(completed_attrs)
304
305     reused = Container.find_reusable(common_attrs)
306     assert_not_nil reused
307     assert_equal reused.uuid, c_older.uuid
308   end
309
310   test "find_reusable method should select oldest completed container when inconsistent outputs exist" do
311     set_user_from_auth :active
312     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "complete"}, priority: 1})
313     completed_attrs = {
314       state: Container::Complete,
315       exit_code: 0,
316       log: 'ea10d51bcf88862dbcc36eb292017dfd+45',
317     }
318
319     cr = ContainerRequest.new request_only(common_attrs)
320     cr.use_existing = false
321     cr.state = ContainerRequest::Committed
322     cr.save!
323     c_output1 = Container.where(uuid: cr.container_uuid).first
324
325     cr = ContainerRequest.new request_only(common_attrs)
326     cr.use_existing = false
327     cr.state = ContainerRequest::Committed
328     cr.save!
329     c_output2 = Container.where(uuid: cr.container_uuid).first
330
331     assert_not_equal c_output1.uuid, c_output2.uuid
332
333     set_user_from_auth :dispatch1
334
335     out1 = '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
336     log1 = collections(:real_log_collection).portable_data_hash
337     c_output1.update!({state: Container::Locked})
338     c_output1.update!({state: Container::Running})
339     c_output1.update!(completed_attrs.merge({log: log1, output: out1}))
340
341     out2 = 'fa7aeb5140e2848d39b416daeef4ffc5+45'
342     c_output2.update!({state: Container::Locked})
343     c_output2.update!({state: Container::Running})
344     c_output2.update!(completed_attrs.merge({log: log1, output: out2}))
345
346     set_user_from_auth :active
347     reused = Container.resolve(ContainerRequest.new(request_only(common_attrs)))
348     assert_equal c_output1.uuid, reused.uuid
349   end
350
351   test "find_reusable method should select running container by start date" do
352     set_user_from_auth :active
353     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running"}})
354     c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
355     c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
356     c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
357     # Confirm the 3 container UUIDs are different.
358     assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
359     set_user_from_auth :dispatch1
360     c_slower.update!({state: Container::Locked})
361     c_slower.update!({state: Container::Running,
362                                  progress: 0.1})
363     c_faster_started_first.update!({state: Container::Locked})
364     c_faster_started_first.update!({state: Container::Running,
365                                                progress: 0.15})
366     c_faster_started_second.update!({state: Container::Locked})
367     c_faster_started_second.update!({state: Container::Running,
368                                                 progress: 0.15})
369     reused = Container.find_reusable(common_attrs)
370     assert_not_nil reused
371     # Selected container is the one that started first
372     assert_equal reused.uuid, c_faster_started_first.uuid
373   end
374
375   test "find_reusable method should select running container by progress" do
376     set_user_from_auth :active
377     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running2"}})
378     c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
379     c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
380     c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
381     # Confirm the 3 container UUIDs are different.
382     assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
383     set_user_from_auth :dispatch1
384     c_slower.update!({state: Container::Locked})
385     c_slower.update!({state: Container::Running,
386                                  progress: 0.1})
387     c_faster_started_first.update!({state: Container::Locked})
388     c_faster_started_first.update!({state: Container::Running,
389                                                progress: 0.15})
390     c_faster_started_second.update!({state: Container::Locked})
391     c_faster_started_second.update!({state: Container::Running,
392                                                 progress: 0.2})
393     reused = Container.find_reusable(common_attrs)
394     assert_not_nil reused
395     # Selected container is the one with most progress done
396     assert_equal reused.uuid, c_faster_started_second.uuid
397   end
398
399   test "find_reusable method should select non-failing running container" do
400     set_user_from_auth :active
401     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running2"}})
402     c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
403     c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
404     c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
405     # Confirm the 3 container UUIDs are different.
406     assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
407     set_user_from_auth :dispatch1
408     c_slower.update!({state: Container::Locked})
409     c_slower.update!({state: Container::Running,
410                                  progress: 0.1})
411     c_faster_started_first.update!({state: Container::Locked})
412     c_faster_started_first.update!({state: Container::Running,
413                                                runtime_status: {'warning' => 'This is not an error'},
414                                                progress: 0.15})
415     c_faster_started_second.update!({state: Container::Locked})
416     assert_equal 0, Container.where("runtime_status->'error' is not null").count
417     c_faster_started_second.update!({state: Container::Running,
418                                                 runtime_status: {'error' => 'Something bad happened'},
419                                                 progress: 0.2})
420     assert_equal 1, Container.where("runtime_status->'error' is not null").count
421     reused = Container.find_reusable(common_attrs)
422     assert_not_nil reused
423     # Selected the non-failing container even if it's the one with less progress done
424     assert_equal reused.uuid, c_faster_started_first.uuid
425   end
426
427   test "find_reusable method should select locked container most likely to start sooner" do
428     set_user_from_auth :active
429     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "locked"}})
430     c_low_priority, _ = minimal_new(common_attrs.merge({use_existing: false}))
431     c_high_priority_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
432     c_high_priority_newer, _ = minimal_new(common_attrs.merge({use_existing: false}))
433     # Confirm the 3 container UUIDs are different.
434     assert_equal 3, [c_low_priority.uuid, c_high_priority_older.uuid, c_high_priority_newer.uuid].uniq.length
435     set_user_from_auth :dispatch1
436     c_low_priority.update!({state: Container::Locked,
437                                        priority: 1})
438     c_high_priority_older.update!({state: Container::Locked,
439                                               priority: 2})
440     c_high_priority_newer.update!({state: Container::Locked,
441                                               priority: 2})
442     reused = Container.find_reusable(common_attrs)
443     assert_not_nil reused
444     assert_equal reused.uuid, c_high_priority_older.uuid
445   end
446
447   test "find_reusable method should select running over failed container" do
448     set_user_from_auth :active
449     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "failed_vs_running"}})
450     c_failed, _ = minimal_new(common_attrs.merge({use_existing: false}))
451     c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
452     assert_not_equal c_failed.uuid, c_running.uuid
453     set_user_from_auth :dispatch1
454     c_failed.update!({state: Container::Locked})
455     c_failed.update!({state: Container::Running})
456     c_failed.update!({state: Container::Complete,
457                                  exit_code: 42,
458                                  log: 'ea10d51bcf88862dbcc36eb292017dfd+45',
459                                  output: 'ea10d51bcf88862dbcc36eb292017dfd+45'})
460     c_running.update!({state: Container::Locked})
461     c_running.update!({state: Container::Running,
462                                   progress: 0.15})
463     reused = Container.find_reusable(common_attrs)
464     assert_not_nil reused
465     assert_equal reused.uuid, c_running.uuid
466   end
467
468   test "find_reusable method should select complete over running container" do
469     set_user_from_auth :active
470     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "completed_vs_running"}})
471     c_completed, _ = minimal_new(common_attrs.merge({use_existing: false}))
472     c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
473     assert_not_equal c_completed.uuid, c_running.uuid
474     set_user_from_auth :dispatch1
475     c_completed.update!({state: Container::Locked})
476     c_completed.update!({state: Container::Running})
477     c_completed.update!({state: Container::Complete,
478                                     exit_code: 0,
479                                     log: 'ea10d51bcf88862dbcc36eb292017dfd+45',
480                                     output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'})
481     c_running.update!({state: Container::Locked})
482     c_running.update!({state: Container::Running,
483                                   progress: 0.15})
484     reused = Container.find_reusable(common_attrs)
485     assert_not_nil reused
486     assert_equal c_completed.uuid, reused.uuid
487   end
488
489   test "find_reusable method should select running over locked container" do
490     set_user_from_auth :active
491     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
492     c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
493     c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
494     assert_not_equal c_running.uuid, c_locked.uuid
495     set_user_from_auth :dispatch1
496     c_locked.update!({state: Container::Locked})
497     c_running.update!({state: Container::Locked})
498     c_running.update!({state: Container::Running,
499                                   progress: 0.15})
500     reused = Container.find_reusable(common_attrs)
501     assert_not_nil reused
502     assert_equal reused.uuid, c_running.uuid
503   end
504
505   test "find_reusable method should select locked over queued container" do
506     set_user_from_auth :active
507     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
508     c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
509     c_queued, _ = minimal_new(common_attrs.merge({use_existing: false}))
510     assert_not_equal c_queued.uuid, c_locked.uuid
511     set_user_from_auth :dispatch1
512     c_locked.update!({state: Container::Locked})
513     reused = Container.find_reusable(common_attrs)
514     assert_not_nil reused
515     assert_equal reused.uuid, c_locked.uuid
516   end
517
518   test "find_reusable method should not select failed container" do
519     set_user_from_auth :active
520     attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "failed"}})
521     c, _ = minimal_new(attrs)
522     set_user_from_auth :dispatch1
523     c.update!({state: Container::Locked})
524     c.update!({state: Container::Running})
525     c.update!({state: Container::Complete,
526                           exit_code: 33})
527     reused = Container.find_reusable(attrs)
528     assert_nil reused
529   end
530
531   [[false, false, true],
532    [false, true, true],
533    [true, false, false],
534    [true, true, true]
535   ].each do |c1_preemptible, c2_preemptible, should_reuse|
536     [[Container::Queued, 1],
537      [Container::Locked, 1],
538      [Container::Running, 0],   # not cancelled yet, but obviously will be soon
539     ].each do |c1_state, c1_priority|
540       test "find_reusable for #{c2_preemptible ? '' : 'non-'}preemptible req should #{should_reuse ? '' : 'not'} reuse a #{c1_state} #{c1_preemptible ? '' : 'non-'}preemptible container with priority #{c1_priority}" do
541         configure_preemptible_instance_type
542         set_user_from_auth :active
543         c1_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"test" => name, "state" => c1_state}, scheduling_parameters: {"preemptible" => c1_preemptible}})
544         c1, _ = minimal_new(c1_attrs)
545         set_user_from_auth :dispatch1
546         c1.update!({state: Container::Locked}) if c1_state != Container::Queued
547         c1.update!({state: Container::Running, priority: c1_priority}) if c1_state == Container::Running
548         c2_attrs = c1_attrs.merge({scheduling_parameters: {"preemptible" => c2_preemptible}})
549         reused = Container.find_reusable(c2_attrs)
550         if should_reuse && c1_priority > 0
551           assert_not_nil reused
552         else
553           assert_nil reused
554         end
555       end
556     end
557   end
558
559   test "find_reusable with logging disabled" do
560     set_user_from_auth :active
561     Rails.logger.expects(:info).never
562     Container.find_reusable(REUSABLE_COMMON_ATTRS)
563   end
564
565   test "find_reusable with logging enabled" do
566     set_user_from_auth :active
567     Rails.configuration.Containers.LogReuseDecisions = true
568     Rails.logger.expects(:info).at_least(3)
569     Container.find_reusable(REUSABLE_COMMON_ATTRS)
570   end
571
572   def runtime_token_attr tok
573     auth = api_client_authorizations(tok)
574     {runtime_user_uuid: User.find_by_id(auth.user_id).uuid,
575      runtime_auth_scopes: auth.scopes,
576      runtime_token: auth.token}
577   end
578
579   test "find_reusable method with same runtime_token" do
580     set_user_from_auth :active
581     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
582     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:container_runtime_token).token}))
583     assert_equal Container::Queued, c1.state
584     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
585     assert_not_nil reused
586     assert_equal reused.uuid, c1.uuid
587   end
588
589   test "find_reusable method with different runtime_token, same user" do
590     set_user_from_auth :active
591     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
592     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:crt_user).token}))
593     assert_equal Container::Queued, c1.state
594     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
595     assert_not_nil reused
596     assert_equal reused.uuid, c1.uuid
597   end
598
599   test "find_reusable method with nil runtime_token, then runtime_token with same user" do
600     set_user_from_auth :crt_user
601     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
602     c1, _ = minimal_new(common_attrs)
603     assert_equal Container::Queued, c1.state
604     assert_equal users(:container_runtime_token_user).uuid, c1.runtime_user_uuid
605     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
606     assert_not_nil reused
607     assert_equal reused.uuid, c1.uuid
608   end
609
610   test "find_reusable method with different runtime_token, different user" do
611     set_user_from_auth :crt_user
612     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
613     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:active).token}))
614     assert_equal Container::Queued, c1.state
615     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
616     # See #14584
617     assert_not_nil reused
618     assert_equal c1.uuid, reused.uuid
619   end
620
621   test "find_reusable method with nil runtime_token, then runtime_token with different user" do
622     set_user_from_auth :active
623     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
624     c1, _ = minimal_new(common_attrs.merge({runtime_token: nil}))
625     assert_equal Container::Queued, c1.state
626     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
627     # See #14584
628     assert_not_nil reused
629     assert_equal c1.uuid, reused.uuid
630   end
631
632   test "find_reusable method with different runtime_token, different scope, same user" do
633     set_user_from_auth :active
634     common_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"}})
635     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:runtime_token_limited_scope).token}))
636     assert_equal Container::Queued, c1.state
637     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
638     # See #14584
639     assert_not_nil reused
640     assert_equal c1.uuid, reused.uuid
641   end
642
643   test "find_reusable method with cuda" do
644     set_user_from_auth :active
645     # No cuda
646     no_cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
647                                                 runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
648                                                                       "cuda" => {"device_count":0, "driver_version": "", "hardware_capability": ""}},})
649     c1, _ = minimal_new(no_cuda_attrs)
650     assert_equal Container::Queued, c1.state
651
652     # has cuda
653     cuda_attrs = REUSABLE_COMMON_ATTRS.merge({use_existing:false, priority:1, environment:{"var" => "queued"},
654                                                 runtime_constraints: {"vcpus" => 1, "ram" => 1, "keep_cache_disk"=>0, "keep_cache_ram"=>268435456, "API" => false,
655                                                                       "cuda" => {"device_count":1, "driver_version": "11.0", "hardware_capability": "9.0"}},})
656     c2, _ = minimal_new(cuda_attrs)
657     assert_equal Container::Queued, c2.state
658
659     # should find the no cuda one
660     reused = Container.find_reusable(no_cuda_attrs)
661     assert_not_nil reused
662     assert_equal reused.uuid, c1.uuid
663
664     # should find the cuda one
665     reused = Container.find_reusable(cuda_attrs)
666     assert_not_nil reused
667     assert_equal reused.uuid, c2.uuid
668   end
669
670   test "Container running" do
671     set_user_from_auth :active
672     c, _ = minimal_new priority: 1
673
674     set_user_from_auth :dispatch1
675     check_illegal_updates c, [{state: Container::Running},
676                               {state: Container::Complete}]
677
678     c.lock
679     c.update! state: Container::Running
680
681     check_illegal_modify c
682     check_bogus_states c
683
684     check_illegal_updates c, [{state: Container::Queued}]
685     c.reload
686
687     c.update! priority: 3
688   end
689
690   test "Lock and unlock" do
691     set_user_from_auth :active
692     c, cr = minimal_new priority: 0
693
694     set_user_from_auth :dispatch1
695     assert_equal Container::Queued, c.state
696
697     assert_raise(ArvadosModel::LockFailedError) do
698       # "no priority"
699       c.lock
700     end
701     c.reload
702     assert cr.update priority: 1
703
704     refute c.update(state: Container::Running), "not locked"
705     c.reload
706     refute c.update(state: Container::Complete), "not locked"
707     c.reload
708
709     assert c.lock, show_errors(c)
710     assert c.locked_by_uuid
711     assert c.auth_uuid
712
713     assert_raise(ArvadosModel::LockFailedError) {c.lock}
714     c.reload
715
716     assert c.unlock, show_errors(c)
717     refute c.locked_by_uuid
718     refute c.auth_uuid
719
720     refute c.update(state: Container::Running), "not locked"
721     c.reload
722     refute c.locked_by_uuid
723     refute c.auth_uuid
724
725     assert c.lock, show_errors(c)
726     assert c.update(state: Container::Running), show_errors(c)
727     assert c.locked_by_uuid
728     assert c.auth_uuid
729
730     auth_uuid_was = c.auth_uuid
731
732     assert_raise(ArvadosModel::LockFailedError) do
733       # Running to Locked is not allowed
734       c.lock
735     end
736     c.reload
737     assert_raise(ArvadosModel::InvalidStateTransitionError) do
738       # Running to Queued is not allowed
739       c.unlock
740     end
741     c.reload
742
743     assert c.update(state: Container::Complete), show_errors(c)
744     refute c.locked_by_uuid
745     refute c.auth_uuid
746
747     auth_exp = ApiClientAuthorization.find_by_uuid(auth_uuid_was).expires_at
748     assert_operator auth_exp, :<, db_current_time
749
750     assert_nil ApiClientAuthorization.validate(token: ApiClientAuthorization.find_by_uuid(auth_uuid_was).token)
751   end
752
753   test "Exceed maximum lock-unlock cycles" do
754     Rails.configuration.Containers.MaxDispatchAttempts = 3
755
756     set_user_from_auth :active
757     c, cr = minimal_new
758
759     set_user_from_auth :dispatch1
760     assert_equal Container::Queued, c.state
761     assert_equal 0, c.lock_count
762
763     c.lock
764     c.reload
765     assert_equal 1, c.lock_count
766     assert_equal Container::Locked, c.state
767
768     c.unlock
769     c.reload
770     assert_equal 1, c.lock_count
771     assert_equal Container::Queued, c.state
772
773     c.lock
774     c.reload
775     assert_equal 2, c.lock_count
776     assert_equal Container::Locked, c.state
777
778     c.unlock
779     c.reload
780     assert_equal 2, c.lock_count
781     assert_equal Container::Queued, c.state
782
783     c.lock
784     c.reload
785     assert_equal 3, c.lock_count
786     assert_equal Container::Locked, c.state
787
788     c.unlock
789     c.reload
790     assert_equal 3, c.lock_count
791     assert_equal Container::Cancelled, c.state
792
793     assert_raise(ArvadosModel::LockFailedError) do
794       # Cancelled to Locked is not allowed
795       c.lock
796     end
797   end
798
799   test "Container queued cancel" do
800     set_user_from_auth :active
801     c, cr = minimal_new({container_count_max: 1})
802     set_user_from_auth :dispatch1
803     assert c.update(state: Container::Cancelled), show_errors(c)
804     check_no_change_from_cancelled c
805     cr.reload
806     assert_equal ContainerRequest::Final, cr.state
807   end
808
809   test "Container queued count" do
810     assert_equal 1, Container.readable_by(users(:active)).where(state: "Queued").count
811   end
812
813   test "Containers with no matching request are readable by admin" do
814     uuids = Container.includes('container_requests').where(container_requests: {uuid: nil}).collect(&:uuid)
815     assert_not_empty uuids
816     assert_empty Container.readable_by(users(:active)).where(uuid: uuids)
817     assert_not_empty Container.readable_by(users(:admin)).where(uuid: uuids)
818     assert_equal uuids.count, Container.readable_by(users(:admin)).where(uuid: uuids).count
819   end
820
821   test "Container locked cancel" do
822     set_user_from_auth :active
823     c, _ = minimal_new
824     set_user_from_auth :dispatch1
825     assert c.lock, show_errors(c)
826     assert c.update(state: Container::Cancelled), show_errors(c)
827     check_no_change_from_cancelled c
828   end
829
830   test "Container locked with non-expiring token" do
831     Rails.configuration.API.TokenMaxLifetime = 1.hour
832     set_user_from_auth :active
833     c, _ = minimal_new
834     set_user_from_auth :dispatch1
835     assert c.lock, show_errors(c)
836     refute c.auth.nil?
837     assert c.auth.expires_at.nil?
838     assert c.auth.user_id == User.find_by_uuid(users(:active).uuid).id
839   end
840
841   test "Container locked cancel with log" do
842     set_user_from_auth :active
843     c, _ = minimal_new
844     set_user_from_auth :dispatch1
845     assert c.lock, show_errors(c)
846     assert c.update(
847              state: Container::Cancelled,
848              log: collections(:real_log_collection).portable_data_hash,
849            ), show_errors(c)
850     check_no_change_from_cancelled c
851   end
852
853   test "Container running cancel" do
854     set_user_from_auth :active
855     c, _ = minimal_new
856     set_user_from_auth :dispatch1
857     c.lock
858     c.update! state: Container::Running
859     c.update! state: Container::Cancelled
860     check_no_change_from_cancelled c
861   end
862
863   test "Container create forbidden for non-admin" do
864     set_user_from_auth :active_trustedclient
865     c = Container.new DEFAULT_ATTRS
866     c.environment = {}
867     c.mounts = {"BAR" => "FOO"}
868     c.output_path = "/tmp"
869     c.priority = 1
870     c.runtime_constraints = {}
871     assert_raises(ArvadosModel::PermissionDeniedError) do
872       c.save!
873     end
874   end
875
876   [
877     [Container::Queued, {state: Container::Locked}],
878     [Container::Queued, {state: Container::Running}],
879     [Container::Queued, {state: Container::Complete}],
880     [Container::Queued, {state: Container::Cancelled}],
881     [Container::Queued, {priority: 123456789}],
882     [Container::Queued, {runtime_status: {'error' => 'oops'}}],
883     [Container::Queued, {cwd: '/'}],
884     [Container::Locked, {state: Container::Running}],
885     [Container::Locked, {state: Container::Queued}],
886     [Container::Locked, {priority: 123456789}],
887     [Container::Locked, {runtime_status: {'error' => 'oops'}}],
888     [Container::Locked, {cwd: '/'}],
889     [Container::Running, {state: Container::Complete}],
890     [Container::Running, {state: Container::Cancelled}],
891     [Container::Running, {priority: 123456789}],
892     [Container::Running, {runtime_status: {'error' => 'oops'}}],
893     [Container::Running, {cwd: '/'}],
894     [Container::Running, {gateway_address: "172.16.0.1:12345"}],
895     [Container::Running, {interactive_session_started: true}],
896     [Container::Complete, {state: Container::Cancelled}],
897     [Container::Complete, {priority: 123456789}],
898     [Container::Complete, {runtime_status: {'error' => 'oops'}}],
899     [Container::Complete, {cwd: '/'}],
900     [Container::Cancelled, {cwd: '/'}],
901   ].each do |start_state, updates|
902     test "Container update #{updates.inspect} when #{start_state} forbidden for non-admin" do
903       set_user_from_auth :active
904       c, _ = minimal_new
905       if start_state != Container::Queued
906         set_user_from_auth :dispatch1
907         c.lock
908         if start_state != Container::Locked
909           c.update! state: Container::Running
910           if start_state != Container::Running
911             c.update! state: start_state
912           end
913         end
914       end
915       assert_equal c.state, start_state
916       set_user_from_auth :active
917       assert_raises(ArvadosModel::PermissionDeniedError) do
918         c.update! updates
919       end
920     end
921   end
922
923   test "can only change exit code while running and at completion" do
924     set_user_from_auth :active
925     c, _ = minimal_new
926     set_user_from_auth :dispatch1
927     c.lock
928     check_illegal_updates c, [{exit_code: 1}]
929     c.update! state: Container::Running
930     assert c.update(exit_code: 1)
931     assert c.update(exit_code: 1, state: Container::Complete)
932   end
933
934   test "locked_by_uuid can update log when locked/running, and output when running" do
935     set_user_from_auth :active
936     logcoll = collections(:real_log_collection)
937     c, cr1 = minimal_new
938     cr2 = ContainerRequest.new(DEFAULT_ATTRS)
939     cr2.state = ContainerRequest::Committed
940     act_as_user users(:active) do
941       cr2.save!
942     end
943     assert_equal cr1.container_uuid, cr2.container_uuid
944
945     logpdh_time1 = logcoll.portable_data_hash
946
947     set_user_from_auth :dispatch1
948     c.lock
949     assert_equal c.locked_by_uuid, Thread.current[:api_client_authorization].uuid
950     c.update!(log: logpdh_time1)
951     c.update!(state: Container::Running)
952     cr1.reload
953     cr2.reload
954     cr1log_uuid = cr1.log_uuid
955     cr2log_uuid = cr2.log_uuid
956     assert_not_nil cr1log_uuid
957     assert_not_nil cr2log_uuid
958     assert_not_equal logcoll.uuid, cr1log_uuid
959     assert_not_equal logcoll.uuid, cr2log_uuid
960     assert_not_equal cr1log_uuid, cr2log_uuid
961
962     logcoll.update!(manifest_text: logcoll.manifest_text + ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n")
963     logpdh_time2 = logcoll.portable_data_hash
964
965     assert c.update(output: collections(:collection_owned_by_active).portable_data_hash)
966     assert c.update(log: logpdh_time2)
967     assert c.update(state: Container::Complete, log: logcoll.portable_data_hash)
968     c.reload
969     assert_equal collections(:collection_owned_by_active).portable_data_hash, c.output
970     assert_equal logpdh_time2, c.log
971     refute c.update(output: nil)
972     refute c.update(log: nil)
973     cr1.reload
974     cr2.reload
975     assert_equal cr1log_uuid, cr1.log_uuid
976     assert_equal cr2log_uuid, cr2.log_uuid
977     assert_equal 1, Collection.where(uuid: [cr1log_uuid, cr2log_uuid]).to_a.collect(&:portable_data_hash).uniq.length
978     assert_equal ". acbd18db4cc2f85cedef654fccc4a4d8+3 cdd549ae79fe6640fa3d5c6261d8303c+195 0:3:foo.txt 3:195:zzzzz-8i9sb-0vsrcqi7whchuil.log.txt
979 ./log\\040for\\040container\\040#{cr1.container_uuid} acbd18db4cc2f85cedef654fccc4a4d8+3 cdd549ae79fe6640fa3d5c6261d8303c+195 0:3:foo.txt 3:195:zzzzz-8i9sb-0vsrcqi7whchuil.log.txt
980 ", Collection.find_by_uuid(cr1log_uuid).manifest_text
981   end
982
983   ["auth_uuid", "runtime_token"].each do |tok|
984     test "#{tok} can set output, progress, runtime_status, state, exit_code on running container -- but not log" do
985       if tok == "runtime_token"
986         set_user_from_auth :spectator
987         c, _ = minimal_new(container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
988                            runtime_token: api_client_authorizations(:active).token)
989       else
990         set_user_from_auth :active
991         c, _ = minimal_new
992       end
993       set_user_from_auth :dispatch1
994       c.lock
995       c.update! state: Container::Running
996
997       if tok == "runtime_token"
998         auth = ApiClientAuthorization.validate(token: c.runtime_token)
999         Thread.current[:api_client_authorization] = auth
1000         Thread.current[:api_client] = auth.api_client
1001         Thread.current[:token] = auth.token
1002         Thread.current[:user] = auth.user
1003       else
1004         auth = ApiClientAuthorization.find_by_uuid(c.auth_uuid)
1005         Thread.current[:api_client_authorization] = auth
1006         Thread.current[:api_client] = auth.api_client
1007         Thread.current[:token] = auth.token
1008         Thread.current[:user] = auth.user
1009       end
1010
1011       assert c.update(gateway_address: "127.0.0.1:9")
1012       assert c.update(output: collections(:collection_owned_by_active).portable_data_hash)
1013       assert c.update(runtime_status: {'warning' => 'something happened'})
1014       assert c.update(progress: 0.5)
1015       assert c.update(exit_code: 0)
1016       refute c.update(log: collections(:real_log_collection).portable_data_hash)
1017       c.reload
1018       assert c.update(state: Container::Complete, exit_code: 0)
1019     end
1020   end
1021
1022   test "not allowed to set output that is not readable by current user" do
1023     set_user_from_auth :active
1024     c, _ = minimal_new
1025     set_user_from_auth :dispatch1
1026     c.lock
1027     c.update! state: Container::Running
1028
1029     Thread.current[:api_client_authorization] = ApiClientAuthorization.find_by_uuid(c.auth_uuid)
1030     Thread.current[:user] = User.find_by_id(Thread.current[:api_client_authorization].user_id)
1031
1032     assert_raises ActiveRecord::RecordInvalid do
1033       c.update! output: collections(:collection_not_readable_by_active).portable_data_hash
1034     end
1035   end
1036
1037   test "other token cannot set output on running container" do
1038     set_user_from_auth :active
1039     c, _ = minimal_new
1040     set_user_from_auth :dispatch1
1041     c.lock
1042     c.update! state: Container::Running
1043
1044     set_user_from_auth :running_to_be_deleted_container_auth
1045     assert_raises(ArvadosModel::PermissionDeniedError) do
1046       c.update(output: collections(:foo_file).portable_data_hash)
1047     end
1048   end
1049
1050   test "can set trashed output on running container" do
1051     set_user_from_auth :active
1052     c, _ = minimal_new
1053     set_user_from_auth :dispatch1
1054     c.lock
1055     c.update! state: Container::Running
1056
1057     output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jk')
1058
1059     assert output.is_trashed
1060     assert c.update output: output.portable_data_hash
1061     assert c.update! state: Container::Complete
1062   end
1063
1064   test "not allowed to set trashed output that is not readable by current user" do
1065     set_user_from_auth :active
1066     c, _ = minimal_new
1067     set_user_from_auth :dispatch1
1068     c.lock
1069     c.update! state: Container::Running
1070
1071     output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jr')
1072
1073     Thread.current[:api_client_authorization] = ApiClientAuthorization.find_by_uuid(c.auth_uuid)
1074     Thread.current[:user] = User.find_by_id(Thread.current[:api_client_authorization].user_id)
1075
1076     assert_raises ActiveRecord::RecordInvalid do
1077       c.update! output: output.portable_data_hash
1078     end
1079   end
1080
1081   test "user cannot delete" do
1082     set_user_from_auth :active
1083     c, _ = minimal_new
1084     assert_raises ArvadosModel::PermissionDeniedError do
1085       c.destroy
1086     end
1087     assert Container.find_by_uuid(c.uuid)
1088   end
1089
1090   [
1091     {state: Container::Complete, exit_code: 0, output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'},
1092     {state: Container::Cancelled},
1093   ].each do |final_attrs|
1094     test "secret_mounts and runtime_token are null after container is #{final_attrs[:state]}" do
1095       set_user_from_auth :active
1096       c, cr = minimal_new(secret_mounts: {'/secret' => {'kind' => 'text', 'content' => 'foo'}},
1097                           container_count_max: 1, runtime_token: api_client_authorizations(:active).token)
1098       set_user_from_auth :dispatch1
1099       c.lock
1100       c.update!(state: Container::Running)
1101       c.reload
1102       assert c.secret_mounts.has_key?('/secret')
1103       assert_equal api_client_authorizations(:active).token, c.runtime_token
1104
1105       c.update!(final_attrs)
1106       c.reload
1107       assert_equal({}, c.secret_mounts)
1108       assert_nil c.runtime_token
1109       cr.reload
1110       assert_equal({}, cr.secret_mounts)
1111       assert_nil cr.runtime_token
1112       assert_no_secrets_logged
1113     end
1114   end
1115
1116   def configure_preemptible_instance_type
1117     Rails.configuration.InstanceTypes = ConfigLoader.to_OrderedOptions({
1118       "a1.small.pre" => {
1119         "Preemptible" => true,
1120         "Price" => 0.1,
1121         "ProviderType" => "a1.small",
1122         "VCPUs" => 1,
1123         "RAM" => 1000000000,
1124       },
1125     })
1126   end
1127
1128   def vary_parameters(**kwargs)
1129     # kwargs is a hash that maps parameters to an array of values.
1130     # This function enumerates every possible hash where each key has one of
1131     # the values from its array.
1132     # The output keys are strings since that's what container hash attributes
1133     # want.
1134     # A nil value yields a hash without that key.
1135     [[:_, nil]].product(
1136       *kwargs.map { |(key, values)| [key.to_s].product(values) },
1137     ).map { |param_pairs| Hash[param_pairs].compact }
1138   end
1139
1140   def retry_with_scheduling_parameters(param_hashes)
1141     set_user_from_auth :admin
1142     containers = {}
1143     requests = []
1144     param_hashes.each do |scheduling_parameters|
1145       container, request = minimal_new(scheduling_parameters: scheduling_parameters)
1146       containers[container.uuid] = container
1147       requests << request
1148     end
1149     refute(containers.empty?, "buggy test: no scheduling parameters enumerated")
1150     assert_equal(1, containers.length)
1151     _, container1 = containers.shift
1152     container1.lock
1153     container1.update!(state: Container::Cancelled)
1154     container1.reload
1155     request1 = requests.shift
1156     request1.reload
1157     assert_not_equal(container1.uuid, request1.container_uuid)
1158     requests.each do |request|
1159       request.reload
1160       assert_equal(request1.container_uuid, request.container_uuid)
1161     end
1162     container2 = Container.find_by_uuid(request1.container_uuid)
1163     assert_not_nil(container2)
1164     return container2
1165   end
1166
1167   preemptible_values = [true, false, nil]
1168   preemptible_values.permutation(1).chain(
1169     preemptible_values.product(preemptible_values),
1170     preemptible_values.product(preemptible_values, preemptible_values),
1171   ).each do |preemptible_a|
1172     # If the first req has preemptible=true but a subsequent req
1173     # doesn't, we want to avoid reusing the first container, so this
1174     # test isn't appropriate.
1175     next if preemptible_a[0] &&
1176             ((preemptible_a.length > 1 && !preemptible_a[1]) ||
1177              (preemptible_a.length > 2 && !preemptible_a[2]))
1178     test "retry requests scheduled with preemptible=#{preemptible_a}" do
1179       configure_preemptible_instance_type
1180       param_hashes = vary_parameters(preemptible: preemptible_a)
1181       container = retry_with_scheduling_parameters(param_hashes)
1182       assert_equal(preemptible_a.all?,
1183                    container.scheduling_parameters["preemptible"] || false)
1184     end
1185   end
1186
1187   partition_values = [nil, [], ["alpha"], ["alpha", "bravo"], ["bravo", "charlie"]]
1188   partition_values.permutation(1).chain(
1189     partition_values.permutation(2),
1190   ).each do |partitions_a|
1191     test "retry requests scheduled with partitions=#{partitions_a}" do
1192       param_hashes = vary_parameters(partitions: partitions_a)
1193       container = retry_with_scheduling_parameters(param_hashes)
1194       expected = if partitions_a.any? { |value| value.nil? or value.empty? }
1195                    []
1196                  else
1197                    partitions_a.flatten.uniq
1198                  end
1199       actual = container.scheduling_parameters["partitions"] || []
1200       assert_equal(expected.sort, actual.sort)
1201     end
1202   end
1203
1204   runtime_values = [nil, 0, 1, 2, 3]
1205   runtime_values.permutation(1).chain(
1206     runtime_values.permutation(2),
1207     runtime_values.permutation(3),
1208   ).each do |max_run_time_a|
1209     test "retry requests scheduled with max_run_time=#{max_run_time_a}" do
1210       param_hashes = vary_parameters(max_run_time: max_run_time_a)
1211       container = retry_with_scheduling_parameters(param_hashes)
1212       expected = if max_run_time_a.any? { |value| value.nil? or value == 0 }
1213                    0
1214                  else
1215                    max_run_time_a.max
1216                  end
1217       actual = container.scheduling_parameters["max_run_time"] || 0
1218       assert_equal(expected, actual)
1219     end
1220   end
1221
1222   test "retry requests with multi-varied scheduling parameters" do
1223     configure_preemptible_instance_type
1224     param_hashes = [{
1225                      "partitions": ["alpha", "bravo"],
1226                      "preemptible": false,
1227                      "max_run_time": 10,
1228                     }, {
1229                      "partitions": ["alpha", "charlie"],
1230                      "max_run_time": 20,
1231                     }, {
1232                      "partitions": ["bravo", "charlie"],
1233                      "preemptible": true,
1234                      "max_run_time": 30,
1235                     }]
1236     container = retry_with_scheduling_parameters(param_hashes)
1237     actual = container.scheduling_parameters
1238     assert_equal(["alpha", "bravo", "charlie"], actual["partitions"]&.sort)
1239     assert_equal(false, actual["preemptible"] || false)
1240     assert_equal(30, actual["max_run_time"])
1241   end
1242
1243   test "retry requests with unset scheduling parameters" do
1244     configure_preemptible_instance_type
1245     param_hashes = vary_parameters(
1246       preemptible: [nil, true],
1247       partitions: [nil, ["alpha"]],
1248       max_run_time: [nil, 5],
1249     )
1250     container = retry_with_scheduling_parameters(param_hashes)
1251     actual = container.scheduling_parameters
1252     assert_equal([], actual["partitions"] || [])
1253     assert_equal(false, actual["preemptible"] || false)
1254     assert_equal(0, actual["max_run_time"] || 0)
1255   end
1256
1257   test "retry requests with default scheduling parameters" do
1258     configure_preemptible_instance_type
1259     param_hashes = vary_parameters(
1260       preemptible: [false, true],
1261       partitions: [[], ["bravo"]],
1262       max_run_time: [0, 1],
1263     )
1264     container = retry_with_scheduling_parameters(param_hashes)
1265     actual = container.scheduling_parameters
1266     assert_equal([], actual["partitions"] || [])
1267     assert_equal(false, actual["preemptible"] || false)
1268     assert_equal(0, actual["max_run_time"] || 0)
1269   end
1270
1271   def run_container(request_params, final_attrs)
1272     final_attrs[:state] ||= Container::Complete
1273     if final_attrs[:state] == Container::Complete
1274       final_attrs[:exit_code] ||= 0
1275       final_attrs[:log] ||= collections(:log_collection).portable_data_hash
1276       final_attrs[:output] ||= collections(:multilevel_collection_1).portable_data_hash
1277     end
1278     container, request = minimal_new(request_params)
1279     container.lock
1280     container.update!(state: Container::Running)
1281     container.update!(final_attrs)
1282     return container, request
1283   end
1284
1285   def check_reuse_with_variations(default_keep_cache_ram, vary_attr, start_value, variations)
1286     container_params = REUSABLE_ATTRS_SLIM.merge(vary_attr => start_value)
1287     orig_default = Rails.configuration.Containers.DefaultKeepCacheRAM
1288     begin
1289       Rails.configuration.Containers.DefaultKeepCacheRAM = default_keep_cache_ram
1290       set_user_from_auth :admin
1291       expected, _ = run_container(container_params, {})
1292       variations.each do |variation|
1293         full_variation = REUSABLE_ATTRS_SLIM[vary_attr].merge(variation)
1294         parameters = REUSABLE_ATTRS_SLIM.merge(vary_attr => full_variation)
1295         actual = Container.find_reusable(parameters)
1296         assert_equal(expected.uuid, actual&.uuid,
1297                      "request with #{vary_attr}=#{variation} did not reuse container")
1298       end
1299     ensure
1300       Rails.configuration.Containers.DefaultKeepCacheRAM = orig_default
1301     end
1302   end
1303
1304   # Test that we can reuse a container with a known keep_cache_ram constraint,
1305   # no matter what keep_cache_* constraints the new request uses.
1306   [0, 2 << 30, 4 << 30].product(
1307     [0, 1],
1308     [true, false],
1309   ).each do |(default_keep_cache_ram, multiplier, keep_disk_constraint)|
1310     test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram}, keep_cache_ram*=#{multiplier}, keep_cache_disk=#{keep_disk_constraint}" do
1311       runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
1312         "keep_cache_ram" => default_keep_cache_ram * multiplier,
1313       )
1314       if not keep_disk_constraint
1315         # Simulate a container that predates keep_cache_disk by deleting
1316         # the constraint entirely.
1317         runtime_constraints.delete("keep_cache_disk")
1318       end
1319       # Important values are:
1320       # * 0
1321       # * 2GiB, the minimum default keep_cache_disk
1322       # * 8GiB, the default keep_cache_disk based on container ram
1323       # * 32GiB, the maximum default keep_cache_disk
1324       # Check these values and values in between.
1325       vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
1326       variations = vary_parameters(keep_cache_ram: vary_values)
1327                      .chain(vary_parameters(keep_cache_disk: vary_values))
1328       check_reuse_with_variations(
1329         default_keep_cache_ram,
1330         :runtime_constraints,
1331         runtime_constraints,
1332         variations,
1333       )
1334     end
1335   end
1336
1337   # Test that we can reuse a container with a known keep_cache_disk constraint,
1338   # no matter what keep_cache_* constraints the new request uses.
1339   # keep_cache_disk values are the important values discussed in the test above.
1340   [0, 2 << 30, 4 << 30]
1341     .product([0, 2 << 30, 8 << 30, 32 << 30])
1342     .each do |(default_keep_cache_ram, keep_cache_disk)|
1343     test "reuse request with DefaultKeepCacheRAM=#{default_keep_cache_ram} and keep_cache_disk=#{keep_cache_disk}" do
1344       runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
1345         "keep_cache_disk" => keep_cache_disk,
1346       )
1347       vary_values = [0, 1, 2, 6, 8, 10, 32, 33].map { |v| v << 30 }.to_a
1348       variations = vary_parameters(keep_cache_ram: vary_values)
1349                      .chain(vary_parameters(keep_cache_disk: vary_values))
1350       check_reuse_with_variations(
1351         default_keep_cache_ram,
1352         :runtime_constraints,
1353         runtime_constraints,
1354         variations,
1355       )
1356     end
1357   end
1358
1359   # Test that a container request can reuse a container with an exactly
1360   # matching keep_cache_* constraint, no matter what the defaults.
1361   [0, 2 << 30, 4 << 30].product(
1362     ["keep_cache_disk", "keep_cache_ram"],
1363     [135790, 13 << 30, 135 << 30],
1364   ).each do |(default_keep_cache_ram, constraint_key, constraint_value)|
1365     test "reuse request with #{constraint_key}=#{constraint_value} and DefaultKeepCacheRAM=#{default_keep_cache_ram}" do
1366       runtime_constraints = REUSABLE_ATTRS_SLIM[:runtime_constraints].merge(
1367         constraint_key => constraint_value,
1368       )
1369       check_reuse_with_variations(
1370         default_keep_cache_ram,
1371         :runtime_constraints,
1372         runtime_constraints,
1373         [runtime_constraints],
1374       )
1375     end
1376   end
1377 end