1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
6 require 'crunch_dispatch'
7 require 'helpers/git_test_helper'
9 class CrunchDispatchTest < ActiveSupport::TestCase
12 test 'choose cheaper nodes first' do
14 # Replace test fixtures with a set suitable for testing dispatch
17 # Idle nodes with different prices
18 [['compute1', 3.20, 32],
19 ['compute2', 1.60, 16],
20 ['compute3', 0.80, 8]].each do |hostname, price, cores|
21 Node.create!(hostname: hostname,
23 'slurm_state' => 'idle',
29 'total_cpu_cores' => cores,
30 'total_ram_mb' => cores*1024,
31 'total_scratch_mb' => cores*10000,
35 # Node with no price information
36 Node.create!(hostname: 'compute4',
38 'slurm_state' => 'idle',
41 'total_cpu_cores' => 8,
42 'total_ram_mb' => 8192,
43 'total_scratch_mb' => 80000,
47 Node.create!(hostname: 'compute5',
49 'slurm_state' => 'alloc',
55 'total_cpu_cores' => 32,
56 'total_ram_mb' => 32768,
57 'total_scratch_mb' => 320000,
61 dispatch = CrunchDispatch.new
62 [[1, 16384, ['compute2']],
63 [2, 16384, ['compute2', 'compute1']],
64 [2, 8000, ['compute4', 'compute3']],
65 ].each do |min_nodes, min_ram, expect_nodes|
66 job = Job.new(uuid: 'zzzzz-8i9sb-382lhiizavzhqlp',
67 runtime_constraints: {
68 'min_nodes' => min_nodes,
69 'min_ram_mb_per_node' => min_ram,
71 nodes = dispatch.nodes_available_for_job_now job
72 assert_equal expect_nodes, nodes
76 test 'respond to TERM' do
77 lockfile = Rails.root.join 'tmp', 'dispatch.lock'
78 ENV['CRUNCH_DISPATCH_LOCKFILE'] = lockfile.to_s
82 # Abandon database connections inherited from parent
84 # https://github.com/kstephens/rails_is_forked
85 ActiveRecord::Base.connection_handler.connection_pools.each_value do |pool|
87 @reserved_connections = {}
91 ActiveRecord::Base.establish_connection
93 dispatch = CrunchDispatch.new
94 dispatch.stubs(:did_recently).returns true
100 assert_with_timeout 5, "Dispatch did not lock #{lockfile}" do
104 Process.kill("TERM", pid)
106 assert_with_timeout 20, "Dispatch did not unlock #{lockfile}" do
111 test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do
112 ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup'
113 Rails.configuration.crunch_job_wrapper = :none
114 act_as_system_user do
115 j = Job.create(repository: 'active/foo',
117 script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577',
118 script_parameters: {})
120 Open3.expects(:popen3).at_least_once.with do |*args|
121 if args.index(j.uuid)
122 ok = ((i = args.index '--cgroup-root') and
123 (args[i+1] == '/path/to/cgroup'))
126 end.raises(StandardError.new('all is well'))
127 dispatch = CrunchDispatch.new
128 dispatch.parse_argv ['--jobs']
129 dispatch.refresh_todo
135 def assert_with_timeout timeout, message
137 while (t += 0.1) < timeout
143 assert false, message + " (waited #{timeout} seconds)"
146 def can_lock lockfile
147 lockfile.open(File::RDWR|File::CREAT, 0644) do |f|
148 return f.flock(File::LOCK_EX|File::LOCK_NB)
152 test 'rate limit of partial line segments' do
153 act_as_system_user do
154 Rails.configuration.crunch_log_partial_line_throttle_period = 1
157 job[:bytes_logged] = 0
158 job[:log_throttle_bytes_so_far] = 0
159 job[:log_throttle_lines_so_far] = 0
160 job[:log_throttle_bytes_skipped] = 0
161 job[:log_throttle_is_open] = true
162 job[:log_throttle_partial_line_last_at] = Time.new(0)
163 job[:log_throttle_first_partial_line] = true
165 dispatch = CrunchDispatch.new
167 line = "first log line"
168 limit = dispatch.rate_limit(job, line)
169 assert_equal true, limit
170 assert_equal "first log line", line
171 assert_equal 1, job[:log_throttle_lines_so_far]
173 # first partial line segment is skipped and counted towards skipped lines
174 now = Time.now.strftime('%Y-%m-%d-%H:%M:%S')
175 line = "#{now} localhost 100 0 stderr [...] this is first partial line segment [...]"
176 limit = dispatch.rate_limit(job, line)
177 assert_equal true, limit
178 assert_includes line, "Rate-limiting partial segments of long lines", line
179 assert_equal 2, job[:log_throttle_lines_so_far]
181 # next partial line segment within throttle interval is skipped but not counted towards skipped lines
182 line = "#{now} localhost 100 0 stderr [...] second partial line segment within the interval [...]"
183 limit = dispatch.rate_limit(job, line)
184 assert_equal false, limit
185 assert_equal 2, job[:log_throttle_lines_so_far]
187 # next partial line after interval is counted towards skipped lines
189 line = "#{now} localhost 100 0 stderr [...] third partial line segment after the interval [...]"
190 limit = dispatch.rate_limit(job, line)
191 assert_equal false, limit
192 assert_equal 3, job[:log_throttle_lines_so_far]
194 # this is not a valid line segment
195 line = "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment"
196 limit = dispatch.rate_limit(job, line)
197 assert_equal true, limit
198 assert_equal "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment", line
199 assert_equal 4, job[:log_throttle_lines_so_far]
201 # this also is not a valid line segment
202 line = "#{now} localhost 100 0 stderr does not start correctly but ends with [...]"
203 limit = dispatch.rate_limit(job, line)
204 assert_equal true, limit
205 assert_equal "#{now} localhost 100 0 stderr does not start correctly but ends with [...]", line
206 assert_equal 5, job[:log_throttle_lines_so_far]
210 test 'scancel orphaned job nodes' do
211 Rails.configuration.crunch_job_wrapper = :slurm_immediate
212 act_as_system_user do
213 dispatch = CrunchDispatch.new
215 squeue_resp = IO.popen("echo zzzzz-8i9sb-pshmckwoma9plh7\necho thisisnotvalidjobuuid\necho zzzzz-8i9sb-4cf0abc123e809j\necho zzzzz-dz642-o04e3r651turtdr\n")
216 scancel_resp = IO.popen("true")
219 with(['squeue', '-a', '-h', '-o', '%j']).
223 with(dispatch.sudo_preface + ['scancel', '-n', 'zzzzz-8i9sb-4cf0abc123e809j']).
224 returns(scancel_resp)
226 dispatch.check_orphaned_slurm_jobs