Merge branch '9674-cwl-initialworkdir' closes #9674
[arvados.git] / services / api / test / unit / crunch_dispatch_test.rb
1 require 'test_helper'
2 require 'crunch_dispatch'
3 require 'helpers/git_test_helper'
4
5 class CrunchDispatchTest < ActiveSupport::TestCase
6   include GitTestHelper
7
8   test 'choose cheaper nodes first' do
9     act_as_system_user do
10       # Replace test fixtures with a set suitable for testing dispatch
11       Node.destroy_all
12
13       # Idle nodes with different prices
14       [['compute1', 3.20, 32],
15        ['compute2', 1.60, 16],
16        ['compute3', 0.80, 8]].each do |hostname, price, cores|
17         Node.create!(hostname: hostname,
18                      info: {
19                        'slurm_state' => 'idle',
20                      },
21                      properties: {
22                        'cloud_node' => {
23                          'price' => price,
24                        },
25                        'total_cpu_cores' => cores,
26                        'total_ram_mb' => cores*1024,
27                        'total_scratch_mb' => cores*10000,
28                      })
29       end
30
31       # Node with no price information
32       Node.create!(hostname: 'compute4',
33                    info: {
34                      'slurm_state' => 'idle',
35                    },
36                    properties: {
37                      'total_cpu_cores' => 8,
38                      'total_ram_mb' => 8192,
39                      'total_scratch_mb' => 80000,
40                    })
41
42       # Cheap but busy node
43       Node.create!(hostname: 'compute5',
44                    info: {
45                      'slurm_state' => 'alloc',
46                    },
47                    properties: {
48                      'cloud_node' => {
49                        'price' => 0.10,
50                      },
51                      'total_cpu_cores' => 32,
52                      'total_ram_mb' => 32768,
53                      'total_scratch_mb' => 320000,
54                    })
55     end
56
57     dispatch = CrunchDispatch.new
58     [[1, 16384, ['compute2']],
59      [2, 16384, ['compute2', 'compute1']],
60      [2, 8000, ['compute4', 'compute3']],
61     ].each do |min_nodes, min_ram, expect_nodes|
62       job = Job.new(runtime_constraints: {
63                       'min_nodes' => min_nodes,
64                       'min_ram_mb_per_node' => min_ram,
65                     })
66       nodes = dispatch.nodes_available_for_job_now job
67       assert_equal expect_nodes, nodes
68     end
69   end
70
71   test 'respond to TERM' do
72     lockfile = Rails.root.join 'tmp', 'dispatch.lock'
73     ENV['CRUNCH_DISPATCH_LOCKFILE'] = lockfile.to_s
74     begin
75       pid = Process.fork do
76         begin
77           # Abandon database connections inherited from parent
78           # process.  Credit to
79           # https://github.com/kstephens/rails_is_forked
80           ActiveRecord::Base.connection_handler.connection_pools.each_value do |pool|
81             pool.instance_eval do
82               @reserved_connections = {}
83               @connections = []
84             end
85           end
86           ActiveRecord::Base.establish_connection
87
88           dispatch = CrunchDispatch.new
89           dispatch.stubs(:did_recently).returns true
90           dispatch.run []
91         ensure
92           Process.exit!
93         end
94       end
95       assert_with_timeout 5, "Dispatch did not lock #{lockfile}" do
96         !can_lock(lockfile)
97       end
98     ensure
99       Process.kill("TERM", pid)
100     end
101     assert_with_timeout 20, "Dispatch did not unlock #{lockfile}" do
102       can_lock(lockfile)
103     end
104   end
105
106   test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do
107     ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup'
108     Rails.configuration.crunch_job_wrapper = :none
109     act_as_system_user do
110       j = Job.create(repository: 'active/foo',
111                      script: 'hash',
112                      script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577',
113                      script_parameters: {})
114       ok = false
115       Open3.expects(:popen3).at_least_once.with do |*args|
116         if args.index(j.uuid)
117           ok = ((i = args.index '--cgroup-root') and
118                 (args[i+1] == '/path/to/cgroup'))
119         end
120         true
121       end.raises(StandardError.new('all is well'))
122       dispatch = CrunchDispatch.new
123       dispatch.parse_argv ['--jobs']
124       dispatch.refresh_todo
125       dispatch.start_jobs
126       assert ok
127     end
128   end
129
130   def assert_with_timeout timeout, message
131     t = 0
132     while (t += 0.1) < timeout
133       if yield
134         return
135       end
136       sleep 0.1
137     end
138     assert false, message + " (waited #{timeout} seconds)"
139   end
140
141   def can_lock lockfile
142     lockfile.open(File::RDWR|File::CREAT, 0644) do |f|
143       return f.flock(File::LOCK_EX|File::LOCK_NB)
144     end
145   end
146
147   test 'rate limit of partial line segments' do
148     act_as_system_user do
149       Rails.configuration.crunch_log_partial_line_throttle_period = 1
150
151       job = {}
152       job[:bytes_logged] = 0
153       job[:log_throttle_bytes_so_far] = 0
154       job[:log_throttle_lines_so_far] = 0
155       job[:log_throttle_bytes_skipped] = 0
156       job[:log_throttle_is_open] = true
157       job[:log_throttle_partial_line_last_at] = Time.new(0)
158       job[:log_throttle_first_partial_line] = true
159
160       dispatch = CrunchDispatch.new
161
162       line = "first log line"
163       limit = dispatch.rate_limit(job, line)
164       assert_equal true, limit
165       assert_equal "first log line", line
166       assert_equal 1, job[:log_throttle_lines_so_far]
167
168       # first partial line segment is skipped and counted towards skipped lines
169       now = Time.now.strftime('%Y-%m-%d-%H:%M:%S')
170       line = "#{now} localhost 100 0 stderr [...] this is first partial line segment [...]"
171       limit = dispatch.rate_limit(job, line)
172       assert_equal true, limit
173       assert_includes line, "Rate-limiting partial segments of long lines", line
174       assert_equal 2, job[:log_throttle_lines_so_far]
175
176       # next partial line segment within throttle interval is skipped but not counted towards skipped lines
177       line = "#{now} localhost 100 0 stderr [...] second partial line segment within the interval [...]"
178       limit = dispatch.rate_limit(job, line)
179       assert_equal false, limit
180       assert_equal 2, job[:log_throttle_lines_so_far]
181
182       # next partial line after interval is counted towards skipped lines
183       sleep(1)
184       line = "#{now} localhost 100 0 stderr [...] third partial line segment after the interval [...]"
185       limit = dispatch.rate_limit(job, line)
186       assert_equal false, limit
187       assert_equal 3, job[:log_throttle_lines_so_far]
188
189       # this is not a valid line segment
190       line = "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment"
191       limit = dispatch.rate_limit(job, line)
192       assert_equal true, limit
193       assert_equal "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment", line
194       assert_equal 4, job[:log_throttle_lines_so_far]
195
196       # this also is not a valid line segment
197       line = "#{now} localhost 100 0 stderr does not start correctly but ends with [...]"
198       limit = dispatch.rate_limit(job, line)
199       assert_equal true, limit
200       assert_equal "#{now} localhost 100 0 stderr does not start correctly but ends with [...]", line
201       assert_equal 5, job[:log_throttle_lines_so_far]
202     end
203   end
204 end