f884295e37c7556976ce35ba186954936cc22ed4
[arvados.git] / services / nodemanager / arvnodeman / config.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import ConfigParser
6 import importlib
7 import logging
8 import sys
9
10 import arvados
11 import httplib2
12 import pykka
13 from apiclient import errors as apierror
14
15 from .baseactor import BaseNodeManagerActor
16
17 # IOError is the base class for socket.error, ssl.SSLError, and friends.
18 # It seems like it hits the sweet spot for operations we want to retry:
19 # it's low-level, but unlikely to catch code bugs.
20 NETWORK_ERRORS = (IOError,)
21 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
22
23 actor_class = BaseNodeManagerActor
24
25 class NodeManagerConfig(ConfigParser.SafeConfigParser):
26     """Node Manager Configuration class.
27
28     This a standard Python ConfigParser, with additional helper methods to
29     create objects instantiated with configuration information.
30     """
31
32     LOGGING_NONLEVELS = frozenset(['file'])
33
34     def __init__(self, *args, **kwargs):
35         # Can't use super() because SafeConfigParser is an old-style class.
36         ConfigParser.SafeConfigParser.__init__(self, *args, **kwargs)
37         for sec_name, settings in {
38             'Arvados': {'insecure': 'no',
39                         'timeout': '15',
40                         'jobs_queue': 'yes',
41                         'slurm_queue': 'yes'
42                     },
43             'Daemon': {'min_nodes': '0',
44                        'max_nodes': '1',
45                        'poll_time': '60',
46                        'max_poll_time': '300',
47                        'poll_stale_after': '600',
48                        'max_total_price': '0',
49                        'boot_fail_after': str(sys.maxint),
50                        'node_stale_after': str(60 * 60 * 2),
51                        'watchdog': '600',
52                        'node_mem_scaling': '0.95'},
53             'Manage': {'address': '127.0.0.1',
54                        'port': '-1'},
55             'Logging': {'file': '/dev/stderr',
56                         'level': 'WARNING'}
57         }.iteritems():
58             if not self.has_section(sec_name):
59                 self.add_section(sec_name)
60             for opt_name, value in settings.iteritems():
61                 if not self.has_option(sec_name, opt_name):
62                     self.set(sec_name, opt_name, value)
63
64     def get_section(self, section, transformer=None):
65         result = self._dict()
66         for key, value in self.items(section):
67             if transformer is not None:
68                 try:
69                     value = transformer(value)
70                 except (TypeError, ValueError):
71                     pass
72             result[key] = value
73         return result
74
75     def log_levels(self):
76         return {key: getattr(logging, self.get('Logging', key).upper())
77                 for key in self.options('Logging')
78                 if key not in self.LOGGING_NONLEVELS}
79
80     def dispatch_classes(self):
81         mod_name = 'arvnodeman.computenode.dispatch'
82         if self.has_option('Daemon', 'dispatcher'):
83             mod_name = '{}.{}'.format(mod_name,
84                                       self.get('Daemon', 'dispatcher'))
85         module = importlib.import_module(mod_name)
86         return (module.ComputeNodeSetupActor,
87                 module.ComputeNodeShutdownActor,
88                 module.ComputeNodeUpdateActor,
89                 module.ComputeNodeMonitorActor)
90
91     def new_arvados_client(self):
92         if self.has_option('Daemon', 'certs_file'):
93             certs_file = self.get('Daemon', 'certs_file')
94         else:
95             certs_file = None
96         insecure = self.getboolean('Arvados', 'insecure')
97         http = httplib2.Http(timeout=self.getint('Arvados', 'timeout'),
98                              ca_certs=certs_file,
99                              disable_ssl_certificate_validation=insecure)
100         return arvados.api(version='v1',
101                            host=self.get('Arvados', 'host'),
102                            token=self.get('Arvados', 'token'),
103                            insecure=insecure,
104                            http=http)
105
106     def new_cloud_client(self):
107         module = importlib.import_module('arvnodeman.computenode.driver.' +
108                                          self.get('Cloud', 'provider'))
109         driver_class = module.ComputeNodeDriver.DEFAULT_DRIVER
110         if self.has_option('Cloud', 'driver_class'):
111             d = self.get('Cloud', 'driver_class').split('.')
112             mod = '.'.join(d[:-1])
113             cls = d[-1]
114             driver_class = importlib.import_module(mod).__dict__[cls]
115         auth_kwargs = self.get_section('Cloud Credentials')
116         if 'timeout' in auth_kwargs:
117             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
118         return module.ComputeNodeDriver(auth_kwargs,
119                                         self.get_section('Cloud List'),
120                                         self.get_section('Cloud Create'),
121                                         driver_class=driver_class)
122
123     def node_sizes(self, all_sizes):
124         """Finds all acceptable NodeSizes for our installation.
125
126         Returns a list of (NodeSize, kwargs) pairs for each NodeSize object
127         returned by libcloud that matches a size listed in our config file.
128         """
129
130         size_kwargs = {}
131         for sec_name in self.sections():
132             sec_words = sec_name.split(None, 2)
133             if sec_words[0] != 'Size':
134                 continue
135             size_spec = self.get_section(sec_name, int)
136             if 'price' in size_spec:
137                 size_spec['price'] = float(size_spec['price'])
138             size_kwargs[sec_words[1]] = size_spec
139         # EC2 node sizes are identified by id. GCE sizes are identified by name.
140         matching_sizes = []
141         for size in all_sizes:
142             if size.id in size_kwargs:
143                 matching_sizes.append((size, size_kwargs[size.id]))
144             elif size.name in size_kwargs:
145                 matching_sizes.append((size, size_kwargs[size.name]))
146         return matching_sizes
147
148     def shutdown_windows(self):
149         return [int(n)
150                 for n in self.get('Cloud', 'shutdown_windows').split(',')]