2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: AGPL-3.0
6 from __future__ import absolute_import, print_function
16 from apiclient import errors as apierror
18 from .baseactor import BaseNodeManagerActor
20 from functools import partial
21 from libcloud.common.types import LibcloudError
22 from libcloud.common.exceptions import BaseHTTPError
24 # IOError is the base class for socket.error, ssl.SSLError, and friends.
25 # It seems like it hits the sweet spot for operations we want to retry:
26 # it's low-level, but unlikely to catch code bugs.
27 NETWORK_ERRORS = (IOError,)
28 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
29 CLOUD_ERRORS = NETWORK_ERRORS + (LibcloudError, BaseHTTPError)
31 actor_class = BaseNodeManagerActor
33 class NodeManagerConfig(ConfigParser.SafeConfigParser):
34 """Node Manager Configuration class.
36 This a standard Python ConfigParser, with additional helper methods to
37 create objects instantiated with configuration information.
40 LOGGING_NONLEVELS = frozenset(['file'])
42 def __init__(self, *args, **kwargs):
43 # Can't use super() because SafeConfigParser is an old-style class.
44 ConfigParser.SafeConfigParser.__init__(self, *args, **kwargs)
45 for sec_name, settings in {
46 'Arvados': {'insecure': 'no',
51 'Daemon': {'min_nodes': '0',
54 'cloudlist_poll_time': '0',
55 'nodelist_poll_time': '0',
56 'wishlist_poll_time': '0',
57 'max_poll_time': '300',
58 'poll_stale_after': '600',
59 'max_total_price': '0',
60 'boot_fail_after': str(sys.maxint),
61 'node_stale_after': str(60 * 60 * 2),
63 'node_mem_scaling': '0.95',
64 'consecutive_idle_count': '2'},
65 'Manage': {'address': '127.0.0.1',
67 'ManagementToken': ''},
68 'Logging': {'file': '/dev/stderr',
71 if not self.has_section(sec_name):
72 self.add_section(sec_name)
73 for opt_name, value in settings.iteritems():
74 if not self.has_option(sec_name, opt_name):
75 self.set(sec_name, opt_name, value)
77 def get_section(self, section, transformers={}, default_transformer=None):
81 bool: self.getboolean,
85 for key, value in self.items(section):
87 if transformers.get(key) in transformer_map:
88 transformer = partial(transformer_map[transformers[key]], section)
89 elif default_transformer in transformer_map:
90 transformer = partial(transformer_map[default_transformer], section)
91 if transformer is not None:
93 value = transformer(key)
94 except (TypeError, ValueError):
100 return {key: getattr(logging, self.get('Logging', key).upper())
101 for key in self.options('Logging')
102 if key not in self.LOGGING_NONLEVELS}
104 def dispatch_classes(self):
105 mod_name = 'arvnodeman.computenode.dispatch'
106 if self.has_option('Daemon', 'dispatcher'):
107 mod_name = '{}.{}'.format(mod_name,
108 self.get('Daemon', 'dispatcher'))
109 module = importlib.import_module(mod_name)
110 return (module.ComputeNodeSetupActor,
111 module.ComputeNodeShutdownActor,
112 module.ComputeNodeUpdateActor,
113 module.ComputeNodeMonitorActor)
115 def new_arvados_client(self):
116 if self.has_option('Daemon', 'certs_file'):
117 certs_file = self.get('Daemon', 'certs_file')
120 insecure = self.getboolean('Arvados', 'insecure')
121 http = httplib2.Http(timeout=self.getint('Arvados', 'timeout'),
123 disable_ssl_certificate_validation=insecure)
124 return arvados.api(version='v1',
125 host=self.get('Arvados', 'host'),
126 token=self.get('Arvados', 'token'),
130 def new_cloud_client(self):
131 module = importlib.import_module('arvnodeman.computenode.driver.' +
132 self.get('Cloud', 'provider'))
133 driver_class = module.ComputeNodeDriver.DEFAULT_DRIVER
134 if self.has_option('Cloud', 'driver_class'):
135 d = self.get('Cloud', 'driver_class').split('.')
136 mod = '.'.join(d[:-1])
138 driver_class = importlib.import_module(mod).__dict__[cls]
139 auth_kwargs = self.get_section('Cloud Credentials')
140 if 'timeout' in auth_kwargs:
141 auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
142 return module.ComputeNodeDriver(auth_kwargs,
143 self.get_section('Cloud List'),
144 self.get_section('Cloud Create'),
145 driver_class=driver_class)
147 def node_sizes(self):
148 """Finds all acceptable NodeSizes for our installation.
150 Returns a list of (NodeSize, kwargs) pairs for each NodeSize object
151 returned by libcloud that matches a size listed in our config file.
153 all_sizes = self.new_cloud_client().list_sizes()
156 'instance_type': str,
160 for sec_name in self.sections():
161 sec_words = sec_name.split(None, 2)
162 if sec_words[0] != 'Size':
164 size_spec = self.get_section(sec_name, section_types, int)
165 if 'preemptible' not in size_spec:
166 size_spec['preemptible'] = False
167 if 'instance_type' not in size_spec:
168 # Assume instance type is Size name if missing
169 size_spec['instance_type'] = sec_words[1]
170 size_spec['id'] = sec_words[1]
171 size_kwargs[sec_words[1]] = size_spec
172 # EC2 node sizes are identified by id. GCE sizes are identified by name.
174 for size in all_sizes:
176 (size, size_kwargs[s]) for s in size_kwargs
177 if size_kwargs[s]['instance_type'] == size.id
178 or size_kwargs[s]['instance_type'] == size.name
180 return matching_sizes
182 def shutdown_windows(self):
184 for n in self.get('Cloud', 'shutdown_windows').split(',')]