12073: Prioritize stale node records that have a slot_number when
[arvados.git] / services / nodemanager / tests / test_computenode_driver_gce.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 from __future__ import absolute_import, print_function
7
8 import json
9 import time
10 import unittest
11
12 import mock
13
14 import arvnodeman.computenode.driver.gce as gce
15 from . import testutil
16
17 class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
18     TEST_CLASS = gce.ComputeNodeDriver
19
20     def setUp(self):
21         super(GCEComputeNodeDriverTestCase, self).setUp()
22         self.driver_mock().list_images.return_value = [
23             testutil.cloud_object_mock('testimage', selfLink='image-link')]
24         self.driver_mock().ex_list_disktypes.return_value = [
25             testutil.cloud_object_mock(name, selfLink=name + '-link')
26             for name in ['pd-standard', 'pd-ssd', 'local-ssd']]
27         self.driver_mock.reset_mock()
28
29     def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
30         create_kwargs.setdefault('image', 'testimage')
31         return super(GCEComputeNodeDriverTestCase, self).new_driver(
32             auth_kwargs, list_kwargs, create_kwargs)
33
34     def test_driver_instantiation(self):
35         kwargs = {'user_id': 'foo'}
36         driver = self.new_driver(auth_kwargs=kwargs)
37         self.assertTrue(self.driver_mock.called)
38         self.assertEqual(kwargs, self.driver_mock.call_args[1])
39
40     def test_create_image_loaded_at_initialization_by_name(self):
41         image_mocks = [testutil.cloud_object_mock(c) for c in 'abc']
42         list_method = self.driver_mock().list_images
43         list_method.return_value = image_mocks
44         driver = self.new_driver(create_kwargs={'image': 'b'})
45         self.assertEqual(1, list_method.call_count)
46
47     def test_create_includes_ping_secret(self):
48         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
49         driver = self.new_driver()
50         driver.create_node(testutil.MockSize(1), arv_node)
51         metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
52         self.assertIn('ping_secret=ssshh', metadata.get('arv-ping-url'))
53
54     def test_create_raises_but_actually_succeeded(self):
55         arv_node = testutil.arvados_node_mock(1, hostname=None)
56         driver = self.new_driver()
57         nodelist = [testutil.cloud_node_mock(1)]
58         nodelist[0].name = 'compute-000000000000001-zzzzz'
59         self.driver_mock().list_nodes.return_value = nodelist
60         self.driver_mock().create_node.side_effect = IOError
61         n = driver.create_node(testutil.MockSize(1), arv_node)
62         self.assertEqual('compute-000000000000001-zzzzz', n.name)
63
64     def test_create_sets_default_hostname(self):
65         driver = self.new_driver()
66         driver.create_node(testutil.MockSize(1),
67                            testutil.arvados_node_mock(254, hostname=None))
68         create_kwargs = self.driver_mock().create_node.call_args[1]
69         self.assertEqual('compute-0000000000000fe-zzzzz',
70                          create_kwargs.get('name'))
71         self.assertEqual('dynamic.compute.zzzzz.arvadosapi.com',
72                          create_kwargs.get('ex_metadata', {}).get('hostname'))
73
74     def test_create_tags_from_list_tags(self):
75         driver = self.new_driver(list_kwargs={'tags': 'testA, testB'})
76         driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
77         self.assertEqual(['testA', 'testB'],
78                          self.driver_mock().create_node.call_args[1]['ex_tags'])
79
80     def test_create_with_two_disks_attached(self):
81         driver = self.new_driver(create_kwargs={'image': 'testimage'})
82         driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
83         create_disks = self.driver_mock().create_node.call_args[1].get(
84             'ex_disks_gce_struct', [])
85         self.assertEqual(2, len(create_disks))
86         self.assertTrue(create_disks[0].get('autoDelete'))
87         self.assertTrue(create_disks[0].get('boot'))
88         self.assertEqual('PERSISTENT', create_disks[0].get('type'))
89         init_params = create_disks[0].get('initializeParams', {})
90         self.assertEqual('pd-standard-link', init_params.get('diskType'))
91         self.assertEqual('image-link', init_params.get('sourceImage'))
92         # Our node images expect the SSD to be named `tmp` to find and mount it.
93         self.assertEqual('tmp', create_disks[1].get('deviceName'))
94         self.assertTrue(create_disks[1].get('autoDelete'))
95         self.assertFalse(create_disks[1].get('boot', 'unset'))
96         self.assertEqual('SCRATCH', create_disks[1].get('type'))
97         init_params = create_disks[1].get('initializeParams', {})
98         self.assertEqual('local-ssd-link', init_params.get('diskType'))
99
100     def test_list_nodes_requires_tags_match(self):
101         # A node matches if our list tags are a subset of the node's tags.
102         # Test behavior with no tags, no match, partial matches, different
103         # order, and strict supersets.
104         cloud_mocks = [
105             testutil.cloud_node_mock(node_num, tags=tag_set)
106             for node_num, tag_set in enumerate(
107                 [[], ['bad'], ['good'], ['great'], ['great', 'ok'],
108                  ['great', 'good'], ['good', 'fantastic', 'great']])]
109         cloud_mocks.append(testutil.cloud_node_mock())
110         self.driver_mock().list_nodes.return_value = cloud_mocks
111         driver = self.new_driver(list_kwargs={'tags': 'good, great'})
112         self.assertItemsEqual(['5', '6'], [n.id for n in driver.list_nodes()])
113
114     def build_gce_metadata(self, metadata_dict):
115         # Convert a plain metadata dictionary to the GCE data structure.
116         return {
117             'kind': 'compute#metadata',
118             'fingerprint': 'testprint',
119             'items': [{'key': key, 'value': metadata_dict[key]}
120                       for key in metadata_dict],
121             }
122
123     def check_sync_node_updates_hostname_tag(self, plain_metadata):
124         start_metadata = self.build_gce_metadata(plain_metadata)
125         arv_node = testutil.arvados_node_mock(1)
126         cloud_node = testutil.cloud_node_mock(
127             2, metadata=start_metadata.copy(),
128             zone=testutil.cloud_object_mock('testzone'))
129         self.driver_mock().ex_get_node.return_value = cloud_node
130         driver = self.new_driver()
131         driver.sync_node(cloud_node, arv_node)
132         args, kwargs = self.driver_mock().ex_set_node_metadata.call_args
133         self.assertEqual(cloud_node, args[0])
134         plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
135         self.assertEqual(
136             plain_metadata,
137             {item['key']: item['value'] for item in args[1]})
138
139     def test_sync_node_updates_hostname_tag(self):
140         self.check_sync_node_updates_hostname_tag(
141             {'testkey': 'testvalue', 'hostname': 'startvalue'})
142
143     def test_sync_node_adds_hostname_tag(self):
144         self.check_sync_node_updates_hostname_tag({'testkey': 'testval'})
145
146     def test_sync_node_raises_exception_on_failure(self):
147         arv_node = testutil.arvados_node_mock(8)
148         cloud_node = testutil.cloud_node_mock(
149             9, metadata={}, zone=testutil.cloud_object_mock('failzone'))
150         mock_response = self.driver_mock().ex_set_node_metadata.side_effect = (Exception('sync error test'),)
151         driver = self.new_driver()
152         with self.assertRaises(Exception) as err_check:
153             driver.sync_node(cloud_node, arv_node)
154         self.assertIs(err_check.exception.__class__, Exception)
155         self.assertIn('sync error test', str(err_check.exception))
156
157     def test_node_create_time_zero_for_unknown_nodes(self):
158         node = testutil.cloud_node_mock()
159         self.assertEqual(0, gce.ComputeNodeDriver.node_start_time(node))
160
161     def test_node_create_time_for_known_node(self):
162         node = testutil.cloud_node_mock(metadata=self.build_gce_metadata(
163                 {'booted_at': '1970-01-01T00:01:05Z'}))
164         self.assertEqual(65, gce.ComputeNodeDriver.node_start_time(node))
165
166     def test_node_create_time_recorded_when_node_boots(self):
167         start_time = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
168         arv_node = testutil.arvados_node_mock()
169         driver = self.new_driver()
170         driver.create_node(testutil.MockSize(1), arv_node)
171         metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
172         self.assertLessEqual(start_time, metadata.get('booted_at'))
173
174     def test_known_node_fqdn(self):
175         name = 'fqdntest.zzzzz.arvadosapi.com'
176         node = testutil.cloud_node_mock(metadata=self.build_gce_metadata(
177                 {'hostname': name}))
178         self.assertEqual(name, gce.ComputeNodeDriver.node_fqdn(node))
179
180     def test_unknown_node_fqdn(self):
181         # Return an empty string.  This lets fqdn be safely compared
182         # against an expected value, and ComputeNodeMonitorActor
183         # should try to update it.
184         node = testutil.cloud_node_mock(metadata=self.build_gce_metadata({}))
185         self.assertEqual('', gce.ComputeNodeDriver.node_fqdn(node))
186
187     def test_deliver_ssh_key_in_metadata(self):
188         test_ssh_key = 'ssh-rsa-foo'
189         arv_node = testutil.arvados_node_mock(1)
190         with mock.patch('__builtin__.open',
191                         mock.mock_open(read_data=test_ssh_key)) as mock_file:
192             driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
193         mock_file.assert_called_once_with('ssh-key-file')
194         driver.create_node(testutil.MockSize(1), arv_node)
195         metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
196         self.assertEqual('root:ssh-rsa-foo', metadata.get('sshKeys'))
197
198     def test_create_driver_with_service_accounts(self):
199         service_accounts = {'email': 'foo@bar', 'scopes': ['storage-full']}
200         srv_acct_config = {'service_accounts': json.dumps(service_accounts)}
201         arv_node = testutil.arvados_node_mock(1)
202         driver = self.new_driver(create_kwargs=srv_acct_config)
203         driver.create_node(testutil.MockSize(1), arv_node)
204         self.assertEqual(
205             service_accounts,
206             self.driver_mock().create_node.call_args[1]['ex_service_accounts'])
207
208     def test_fix_string_size(self):
209         # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
210         # It's supposed to be the actual size object.  Make sure our driver
211         # patches that up in listings.
212         size = testutil.MockSize(2)
213         node = testutil.cloud_node_mock(size=size)
214         node.size = size.name
215         self.driver_mock().list_sizes.return_value = [size]
216         self.driver_mock().list_nodes.return_value = [node]
217         driver = self.new_driver()
218         nodelist = driver.list_nodes()
219         self.assertEqual(1, len(nodelist))
220         self.assertIs(node, nodelist[0])
221         self.assertIs(size, nodelist[0].size)
222
223     def test_skip_fix_when_size_not_string(self):
224         # Ensure we don't monkeypatch node sizes unless we need to.
225         size = testutil.MockSize(3)
226         node = testutil.cloud_node_mock(size=size)
227         self.driver_mock().list_nodes.return_value = [node]
228         driver = self.new_driver()
229         nodelist = driver.list_nodes()
230         self.assertEqual(1, len(nodelist))
231         self.assertIs(node, nodelist[0])
232         self.assertIs(size, nodelist[0].size)
233
234     def test_node_found_after_timeout_has_fixed_size(self):
235         size = testutil.MockSize(4)
236         cloud_node = testutil.cloud_node_mock(size=size.id)
237         self.check_node_found_after_timeout_has_fixed_size(size, cloud_node)
238
239     def test_list_empty_nodes(self):
240         self.driver_mock().list_nodes.return_value = []
241         self.assertEqual([], self.new_driver().list_nodes())