3 from __future__ import absolute_import, print_function
9 import libcloud.compute.providers as cloud_provider
10 import libcloud.compute.types as cloud_types
12 from . import BaseComputeNodeDriver
13 from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
15 class ComputeNodeDriver(BaseComputeNodeDriver):
16 """Compute node driver wrapper for GCE
18 This translates cloud driver requests to GCE's specific parameters.
20 DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
23 def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
24 driver_class=DEFAULT_DRIVER):
25 list_kwargs = list_kwargs.copy()
26 tags_str = list_kwargs.pop('tags', '')
27 if not tags_str.strip():
28 self.node_tags = frozenset()
30 self.node_tags = frozenset(t.strip() for t in tags_str.split(','))
31 create_kwargs = create_kwargs.copy()
32 create_kwargs.setdefault('external_ip', None)
33 create_kwargs.setdefault('ex_metadata', {})
34 super(ComputeNodeDriver, self).__init__(
35 auth_kwargs, list_kwargs, create_kwargs,
37 self._sizes_by_name = {sz.name: sz for sz in self.sizes.itervalues()}
38 self._disktype_links = {dt.name: self._object_link(dt)
39 for dt in self.real.ex_list_disktypes()}
42 def _name_key(cloud_object):
43 return cloud_object.name
46 def _object_link(cloud_object):
47 return cloud_object.extra.get('selfLink')
49 def _init_image(self, image_name):
50 return 'image', self.search_for(
51 image_name, 'list_images', self._name_key)
53 def _init_network(self, network_name):
54 return 'ex_network', self.search_for(
55 network_name, 'ex_list_networks', self._name_key)
57 def _init_service_accounts(self, service_accounts_str):
58 return 'ex_service_accounts', json.loads(service_accounts_str)
60 def _init_ssh_key(self, filename):
61 # SSH keys are delivered to GCE nodes via ex_metadata: see
62 # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
63 with open(filename) as ssh_file:
64 self.create_kwargs['ex_metadata']['sshKeys'] = (
65 'root:' + ssh_file.read().strip())
67 def arvados_create_kwargs(self, size, arvados_node):
68 cluster_id, _, node_id = arvados_node['uuid'].split('-')
69 name = 'compute-{}-{}'.format(node_id, cluster_id)
76 'diskType': self._disktype_links['pd-standard'],
77 'sourceImage': self._object_link(self.create_kwargs['image']),
83 # Boot images rely on this device name to find the SSD.
84 # Any change must be coordinated in the image.
87 {'diskType': self._disktype_links['local-ssd'],
92 result = {'name': name,
93 'ex_metadata': self.create_kwargs['ex_metadata'].copy(),
94 'ex_tags': list(self.node_tags),
95 'ex_disks_gce_struct': disks,
97 result['ex_metadata'].update({
98 'arv-ping-url': self._make_ping_url(arvados_node),
99 'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
100 'hostname': arvados_node_fqdn(arvados_node),
104 def create_node(self, size, arvados_node):
106 kwargs = self.create_kwargs.copy()
107 kwargs.update(self.arvados_create_kwargs(size, arvados_node))
108 kwargs['size'] = size
109 return self.real.create_node(**kwargs)
110 except ComputeNodeDriver.CLOUD_ERRORS:
111 # Workaround for bug #6702: sometimes the create node request
112 # succeeds but times out and raises an exception instead of
113 # returning a result. If this happens, we get stuck in a retry
114 # loop forever because subsequent create_node attempts will fail
115 # due to node name collision. So check if the node we intended to
116 # create shows up in the cloud node list and return it if found.
118 node = [n for n in self.list_nodes() if n.name == kwargs['name']]
122 # Ignore possible exception from list_nodes in favor of
123 # re-raising the original create_node exception.
127 def list_nodes(self):
128 # The GCE libcloud driver only supports filtering node lists by zone.
129 # Do our own filtering based on tag list.
130 nodelist = [node for node in
131 super(ComputeNodeDriver, self).list_nodes()
132 if self.node_tags.issubset(node.extra.get('tags', []))]
133 # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
134 # It's supposed to be the actual size object. Check that it's not,
135 # and monkeypatch the results when that's the case.
136 if nodelist and not hasattr(nodelist[0].size, 'id'):
137 for node in nodelist:
138 node.size = self._sizes_by_name[node.size]
142 def _find_metadata(cls, metadata_items, key):
143 # Given a list of two-item metadata dictonaries, return the one with
144 # the named key. Raise KeyError if not found.
146 return next(data_dict for data_dict in metadata_items
147 if data_dict.get('key') == key)
148 except StopIteration:
152 def _get_metadata(cls, metadata_items, key, *default):
154 return cls._find_metadata(metadata_items, key)['value']
160 def sync_node(self, cloud_node, arvados_node):
161 # We can't store the FQDN on the name attribute or anything like it,
162 # because (a) names are static throughout the node's life (so FQDN
163 # isn't available because we don't know it at node creation time) and
164 # (b) it can't contain dots. Instead stash it in metadata.
165 hostname = arvados_node_fqdn(arvados_node)
166 metadata_req = cloud_node.extra['metadata'].copy()
167 metadata_items = metadata_req.setdefault('items', [])
169 self._find_metadata(metadata_items, 'hostname')['value'] = hostname
171 metadata_items.append({'key': 'hostname', 'value': hostname})
172 response = self.real.connection.async_request(
173 '/zones/{}/instances/{}/setMetadata'.format(
174 cloud_node.extra['zone'].name, cloud_node.name),
175 method='POST', data=metadata_req)
176 if not response.success():
177 raise Exception("setMetadata error: {}".format(response.error))
180 def node_fqdn(cls, node):
181 # See sync_node comment.
182 return cls._get_metadata(node.extra['metadata'].get('items', []),
186 def node_start_time(cls, node):
188 return arvados_timestamp(cls._get_metadata(
189 node.extra['metadata']['items'], 'booted_at'))