7475: Cancel jobs that cannot be satisfied instead of endlessly retry to run it.
[arvados.git] / services / nodemanager / tests / test_jobqueue.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 from __future__ import absolute_import, print_function
7
8 import unittest
9 import mock
10
11 import arvnodeman.jobqueue as jobqueue
12 from . import testutil
13
14 class ServerCalculatorTestCase(unittest.TestCase):
15     def make_calculator(self, factors, **kwargs):
16         return jobqueue.ServerCalculator(
17             [(testutil.MockSize(n), {'cores': n}) for n in factors], **kwargs)
18
19     def calculate(self, servcalc, *constraints):
20         servlist, _ = servcalc.servers_for_queue(
21             [{'uuid': 'zzzzz-jjjjj-{:015x}'.format(index),
22               'runtime_constraints': cdict}
23              for index, cdict in enumerate(constraints)])
24         return servlist
25
26     def test_empty_queue_needs_no_servers(self):
27         servcalc = self.make_calculator([1])
28         self.assertEqual(([], {}), servcalc.servers_for_queue([]))
29
30     def test_easy_server_count(self):
31         servcalc = self.make_calculator([1])
32         servlist = self.calculate(servcalc, {'min_nodes': 3})
33         self.assertEqual(3, len(servlist))
34
35     def test_default_5pct_ram_value_decrease(self):
36         servcalc = self.make_calculator([1])
37         servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
38         self.assertEqual(0, len(servlist))
39         servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 121})
40         self.assertEqual(1, len(servlist))
41
42     def test_custom_node_mem_scaling_factor(self):
43         # Simulate a custom 'node_mem_scaling' config parameter by passing
44         # the value to ServerCalculator
45         servcalc = self.make_calculator([1], node_mem_scaling=0.5)
46         servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
47         self.assertEqual(0, len(servlist))
48         servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 64})
49         self.assertEqual(1, len(servlist))
50
51     def test_implicit_server_count(self):
52         servcalc = self.make_calculator([1])
53         servlist = self.calculate(servcalc, {}, {'min_nodes': 3})
54         self.assertEqual(4, len(servlist))
55
56     def test_bad_min_nodes_override(self):
57         servcalc = self.make_calculator([1])
58         servlist = self.calculate(servcalc,
59                                   {'min_nodes': -2}, {'min_nodes': 'foo'})
60         self.assertEqual(2, len(servlist))
61
62     def test_ignore_unsatisfiable_jobs(self):
63         servcalc = self.make_calculator([1], max_nodes=9)
64         servlist = self.calculate(servcalc,
65                                   {'min_cores_per_node': 2},
66                                   {'min_ram_mb_per_node': 256},
67                                   {'min_nodes': 6},
68                                   {'min_nodes': 12},
69                                   {'min_scratch_mb_per_node': 300000})
70         self.assertEqual(6, len(servlist))
71
72     def test_ignore_too_expensive_jobs(self):
73         servcalc = self.make_calculator([1, 2], max_nodes=12, max_price=6)
74         servlist = self.calculate(servcalc,
75                                   {'min_cores_per_node': 1, 'min_nodes': 6})
76         self.assertEqual(6, len(servlist))
77
78         servlist = self.calculate(servcalc,
79                                   {'min_cores_per_node': 2, 'min_nodes': 6})
80         self.assertEqual(0, len(servlist))
81
82     def test_job_requesting_max_nodes_accepted(self):
83         servcalc = self.make_calculator([1], max_nodes=4)
84         servlist = self.calculate(servcalc, {'min_nodes': 4})
85         self.assertEqual(4, len(servlist))
86
87     def test_cheapest_size(self):
88         servcalc = self.make_calculator([2, 4, 1, 3])
89         self.assertEqual(testutil.MockSize(1), servcalc.cheapest_size())
90
91     def test_next_biggest(self):
92         servcalc = self.make_calculator([1, 2, 4, 8])
93         servlist = self.calculate(servcalc,
94                                   {'min_cores_per_node': 3},
95                                   {'min_cores_per_node': 6})
96         self.assertEqual([servcalc.cloud_sizes[2].id,
97                           servcalc.cloud_sizes[3].id],
98                          [s.id for s in servlist])
99
100     def test_multiple_sizes(self):
101         servcalc = self.make_calculator([1, 2])
102         servlist = self.calculate(servcalc,
103                                   {'min_cores_per_node': 2},
104                                   {'min_cores_per_node': 1},
105                                   {'min_cores_per_node': 1})
106         self.assertEqual([servcalc.cloud_sizes[1].id,
107                           servcalc.cloud_sizes[0].id,
108                           servcalc.cloud_sizes[0].id],
109                          [s.id for s in servlist])
110
111         servlist = self.calculate(servcalc,
112                                   {'min_cores_per_node': 1},
113                                   {'min_cores_per_node': 2},
114                                   {'min_cores_per_node': 1})
115         self.assertEqual([servcalc.cloud_sizes[0].id,
116                           servcalc.cloud_sizes[1].id,
117                           servcalc.cloud_sizes[0].id],
118                          [s.id for s in servlist])
119
120         servlist = self.calculate(servcalc,
121                                   {'min_cores_per_node': 1},
122                                   {'min_cores_per_node': 1},
123                                   {'min_cores_per_node': 2})
124         self.assertEqual([servcalc.cloud_sizes[0].id,
125                           servcalc.cloud_sizes[0].id,
126                           servcalc.cloud_sizes[1].id],
127                          [s.id for s in servlist])
128
129
130
131 class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
132                                    unittest.TestCase):
133     TEST_CLASS = jobqueue.JobQueueMonitorActor
134
135     class MockCalculator(object):
136         @staticmethod
137         def servers_for_queue(queue):
138             return ([testutil.MockSize(n) for n in queue], {})
139
140
141     def build_monitor(self, side_effect, *args, **kwargs):
142         super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
143         self.client.jobs().queue().execute.side_effect = side_effect
144
145     @mock.patch("subprocess.check_output")
146     def test_subscribers_get_server_lists(self, mock_squeue):
147         mock_squeue.return_value = ""
148
149         self.build_monitor([{'items': [1, 2]}], self.MockCalculator(), True, True)
150         self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
151         self.stop_proxy(self.monitor)
152         self.subscriber.assert_called_with([testutil.MockSize(1),
153                                             testutil.MockSize(2)])
154
155     @mock.patch("subprocess.check_output")
156     def test_squeue_server_list(self, mock_squeue):
157         mock_squeue.return_value = """1|1024|0|Resources|zzzzz-zzzzz-zzzzzzzzzzzzzzy
158 2|1024|0|Resources|zzzzz-zzzzz-zzzzzzzzzzzzzzz
159 """
160
161         super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
162             [(testutil.MockSize(n), {'cores': n, 'ram': n*1024, 'scratch': n}) for n in range(1, 3)]),
163                                                                 True, True)
164         self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
165         self.stop_proxy(self.monitor)
166         self.subscriber.assert_called_with([testutil.MockSize(1),
167                                             testutil.MockSize(2)])
168
169     @mock.patch("subprocess.check_output")
170     def test_squeue_server_list_suffix(self, mock_squeue):
171         mock_squeue.return_value = """1|1024M|0|ReqNodeNotAvail, UnavailableNod|zzzzz-zzzzz-zzzzzzzzzzzzzzy
172 1|2G|0|ReqNodeNotAvail, UnavailableNod|zzzzz-zzzzz-zzzzzzzzzzzzzzz
173 """
174
175         super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
176             [(testutil.MockSize(n), {'cores': n, 'ram': n*1024, 'scratch': n}) for n in range(1, 3)]),
177                                                                 True, True)
178         self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
179         self.stop_proxy(self.monitor)
180         self.subscriber.assert_called_with([testutil.MockSize(1),
181                                             testutil.MockSize(2)])
182
183     def test_coerce_to_mb(self):
184         self.assertEqual(1, jobqueue.JobQueueMonitorActor.coerce_to_mb("1"))
185         self.assertEqual(512, jobqueue.JobQueueMonitorActor.coerce_to_mb("512"))
186         self.assertEqual(512, jobqueue.JobQueueMonitorActor.coerce_to_mb("512M"))
187         self.assertEqual(1024, jobqueue.JobQueueMonitorActor.coerce_to_mb("1024M"))
188         self.assertEqual(1024, jobqueue.JobQueueMonitorActor.coerce_to_mb("1G"))
189         self.assertEqual(1536, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5G"))
190         self.assertEqual(2048, jobqueue.JobQueueMonitorActor.coerce_to_mb("2G"))
191         self.assertEqual(1025, jobqueue.JobQueueMonitorActor.coerce_to_mb("1025M"))
192         self.assertEqual(1048576, jobqueue.JobQueueMonitorActor.coerce_to_mb("1T"))
193         self.assertEqual(1572864, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5T"))
194         self.assertEqual(1073741824, jobqueue.JobQueueMonitorActor.coerce_to_mb("1P"))
195         self.assertEqual(1610612736, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5P"))
196         self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0"))
197         self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0M"))
198         self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0G"))
199
200
201 if __name__ == '__main__':
202     unittest.main()