X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/5bcba288077488791daa43a15d5fd5fb0c6e653c..409d130ddcc4b76def5fa8d61d2584725c61152a:/services/nodemanager/arvnodeman/config.py diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py index b7ec1fc80d..b54461c47d 100644 --- a/services/nodemanager/arvnodeman/config.py +++ b/services/nodemanager/arvnodeman/config.py @@ -5,7 +5,6 @@ from __future__ import absolute_import, print_function import ConfigParser import importlib import logging -import ssl import sys import arvados @@ -13,13 +12,15 @@ import httplib2 import pykka from apiclient import errors as apierror -# IOError is the base class for socket.error and friends. +from .baseactor import BaseNodeManagerActor + +# IOError is the base class for socket.error, ssl.SSLError, and friends. # It seems like it hits the sweet spot for operations we want to retry: # it's low-level, but unlikely to catch code bugs. -NETWORK_ERRORS = (IOError, ssl.SSLError) +NETWORK_ERRORS = (IOError,) ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,) -actor_class = pykka.ThreadingActor +actor_class = BaseNodeManagerActor class NodeManagerConfig(ConfigParser.SafeConfigParser): """Node Manager Configuration class. @@ -41,8 +42,10 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser): 'poll_time': '60', 'max_poll_time': '300', 'poll_stale_after': '600', + 'max_total_price': '0', 'boot_fail_after': str(sys.maxint), - 'node_stale_after': str(60 * 60 * 2)}, + 'node_stale_after': str(60 * 60 * 2), + 'watchdog': '600'}, 'Logging': {'file': '/dev/stderr', 'level': 'WARNING'}, }.iteritems(): @@ -88,8 +91,7 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser): http = httplib2.Http(timeout=self.getint('Arvados', 'timeout'), ca_certs=certs_file, disable_ssl_certificate_validation=insecure) - return arvados.api('v1', - cache=False, # Don't reuse an existing client. + return arvados.api(version='v1', host=self.get('Arvados', 'host'), token=self.get('Arvados', 'token'), insecure=insecure, @@ -106,14 +108,29 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser): self.get_section('Cloud Create')) def node_sizes(self, all_sizes): + """Finds all acceptable NodeSizes for our installation. + + Returns a list of (NodeSize, kwargs) pairs for each NodeSize object + returned by libcloud that matches a size listed in our config file. + """ + size_kwargs = {} for sec_name in self.sections(): sec_words = sec_name.split(None, 2) if sec_words[0] != 'Size': continue - size_kwargs[sec_words[1]] = self.get_section(sec_name, int) - return [(size, size_kwargs[size.id]) for size in all_sizes - if size.id in size_kwargs] + size_spec = self.get_section(sec_name, int) + if 'price' in size_spec: + size_spec['price'] = float(size_spec['price']) + size_kwargs[sec_words[1]] = size_spec + # EC2 node sizes are identified by id. GCE sizes are identified by name. + matching_sizes = [] + for size in all_sizes: + if size.id in size_kwargs: + matching_sizes.append((size, size_kwargs[size.id])) + elif size.name in size_kwargs: + matching_sizes.append((size, size_kwargs[size.name])) + return matching_sizes def shutdown_windows(self): return [int(n)