X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e2ab0a122f3b721a7663b61769f15713dbcf1a5e..0eb72b526bf8bbb011551ecf019f604e17a534f1:/services/nodemanager/arvnodeman/computenode/driver/gce.py diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py index ccd193729b..419557fe28 100644 --- a/services/nodemanager/arvnodeman/computenode/driver/gce.py +++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 from __future__ import absolute_import, print_function @@ -31,17 +34,21 @@ class ComputeNodeDriver(BaseComputeNodeDriver): create_kwargs = create_kwargs.copy() create_kwargs.setdefault('external_ip', None) create_kwargs.setdefault('ex_metadata', {}) + self._project = auth_kwargs.get("project") super(ComputeNodeDriver, self).__init__( auth_kwargs, list_kwargs, create_kwargs, driver_class) + self._sizes_by_name = {sz.name: sz for sz in self.sizes.itervalues()} + self._disktype_links = {dt.name: self._object_link(dt) + for dt in self.real.ex_list_disktypes()} @staticmethod - def _name_key(cloud_object): - return cloud_object.name + def _object_link(cloud_object): + return cloud_object.extra.get('selfLink') def _init_image(self, image_name): return 'image', self.search_for( - image_name, 'list_images', self._name_key) + image_name, 'list_images', self._name_key, ex_project=self._project) def _init_network(self, network_name): return 'ex_network', self.search_for( @@ -57,24 +64,64 @@ class ComputeNodeDriver(BaseComputeNodeDriver): self.create_kwargs['ex_metadata']['sshKeys'] = ( 'root:' + ssh_file.read().strip()) - def arvados_create_kwargs(self, arvados_node): - cluster_id, _, node_id = arvados_node['uuid'].split('-') - result = {'name': 'compute-{}-{}'.format(node_id, cluster_id), + def create_cloud_name(self, arvados_node): + uuid_parts = arvados_node['uuid'].split('-', 2) + return 'compute-{parts[2]}-{parts[0]}'.format(parts=uuid_parts) + + def arvados_create_kwargs(self, size, arvados_node): + name = self.create_cloud_name(arvados_node) + + if size.scratch > 375000: + self._logger.warning("Requested %d MB scratch space, but GCE driver currently only supports attaching a single 375 GB disk.", size.scratch) + + disks = [ + {'autoDelete': True, + 'boot': True, + 'deviceName': name, + 'initializeParams': + {'diskName': name, + 'diskType': self._disktype_links['pd-standard'], + 'sourceImage': self._object_link(self.create_kwargs['image']), + }, + 'type': 'PERSISTENT', + }, + {'autoDelete': True, + 'boot': False, + # Boot images rely on this device name to find the SSD. + # Any change must be coordinated in the image. + 'deviceName': 'tmp', + 'initializeParams': + {'diskType': self._disktype_links['local-ssd'], + }, + 'type': 'SCRATCH', + }, + ] + result = {'name': name, 'ex_metadata': self.create_kwargs['ex_metadata'].copy(), - 'ex_tags': list(self.node_tags)} - result['ex_metadata']['arv-ping-url'] = self._make_ping_url( - arvados_node) - result['ex_metadata']['booted_at'] = time.strftime(ARVADOS_TIMEFMT, - time.gmtime()) - result['ex_metadata']['hostname'] = arvados_node_fqdn(arvados_node) + 'ex_tags': list(self.node_tags), + 'ex_disks_gce_struct': disks, + } + result['ex_metadata'].update({ + 'arv-ping-url': self._make_ping_url(arvados_node), + 'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()), + 'hostname': arvados_node_fqdn(arvados_node), + }) return result + def list_nodes(self): # The GCE libcloud driver only supports filtering node lists by zone. # Do our own filtering based on tag list. - return [node for node in - super(ComputeNodeDriver, self).list_nodes() - if self.node_tags.issubset(node.extra.get('tags', []))] + nodelist = [node for node in + super(ComputeNodeDriver, self).list_nodes() + if self.node_tags.issubset(node.extra.get('tags', []))] + # As of 0.18, the libcloud GCE driver sets node.size to the size's name. + # It's supposed to be the actual size object. Check that it's not, + # and monkeypatch the results when that's the case. + if nodelist and not hasattr(nodelist[0].size, 'id'): + for node in nodelist: + node.size = self._sizes_by_name[node.size] + return nodelist @classmethod def _find_metadata(cls, metadata_items, key): @@ -96,6 +143,14 @@ class ComputeNodeDriver(BaseComputeNodeDriver): raise def sync_node(self, cloud_node, arvados_node): + # Update the cloud node record to ensure we have the correct metadata + # fingerprint. + cloud_node = self.real.ex_get_node(cloud_node.name, cloud_node.extra['zone']) + + # We can't store the FQDN on the name attribute or anything like it, + # because (a) names are static throughout the node's life (so FQDN + # isn't available because we don't know it at node creation time) and + # (b) it can't contain dots. Instead stash it in metadata. hostname = arvados_node_fqdn(arvados_node) metadata_req = cloud_node.extra['metadata'].copy() metadata_items = metadata_req.setdefault('items', []) @@ -103,12 +158,14 @@ class ComputeNodeDriver(BaseComputeNodeDriver): self._find_metadata(metadata_items, 'hostname')['value'] = hostname except KeyError: metadata_items.append({'key': 'hostname', 'value': hostname}) - response = self.real.connection.async_request( - '/zones/{}/instances/{}/setMetadata'.format( - cloud_node.extra['zone'].name, cloud_node.name), - method='POST', data=metadata_req) - if not response.success(): - raise Exception("setMetadata error: {}".format(response.error)) + + self.real.ex_set_node_metadata(cloud_node, metadata_items) + + @classmethod + def node_fqdn(cls, node): + # See sync_node comment. + return cls._get_metadata(node.extra['metadata'].get('items', []), + 'hostname', '') @classmethod def node_start_time(cls, node): @@ -117,3 +174,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver): node.extra['metadata']['items'], 'booted_at')) except KeyError: return 0 + + @classmethod + def node_id(cls, node): + return node.id