Merge branch '8206-gce-retry-init' closes #8206
[arvados.git] / services / nodemanager / arvnodeman / launcher.py
index d2f4afee061e26fd19085575f7b5bef33c977efd..1ee792858f251b219ce070166c4d68fb2f101d38 100644 (file)
@@ -12,7 +12,6 @@ import daemon
 import pykka
 
 from . import config as nmconfig
 import pykka
 
 from . import config as nmconfig
-from .computenode.dispatch import ComputeNodeUpdateActor
 from .daemon import NodeManagerDaemonActor
 from .jobqueue import JobQueueMonitorActor, ServerCalculator
 from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
 from .daemon import NodeManagerDaemonActor
 from .jobqueue import JobQueueMonitorActor, ServerCalculator
 from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
@@ -58,25 +57,23 @@ def setup_logging(path, level, **sublevels):
         sublogger = logging.getLogger(logger_name)
         sublogger.setLevel(sublevel)
 
         sublogger = logging.getLogger(logger_name)
         sublogger.setLevel(sublevel)
 
-def launch_pollers(config):
-    cloud_client = config.new_cloud_client()
-    arvados_client = config.new_arvados_client()
-    cloud_size_list = config.node_sizes(cloud_client.list_sizes())
+def build_server_calculator(config):
+    cloud_size_list = config.node_sizes(config.new_cloud_client().list_sizes())
     if not cloud_size_list:
         abort("No valid node sizes configured")
     if not cloud_size_list:
         abort("No valid node sizes configured")
+    return ServerCalculator(cloud_size_list,
+                            config.getint('Daemon', 'max_nodes'),
+                            config.getfloat('Daemon', 'max_total_price'))
 
 
-    server_calculator = ServerCalculator(
-        cloud_size_list,
-        config.getint('Daemon', 'min_nodes'),
-        config.getint('Daemon', 'max_nodes'))
+def launch_pollers(config, server_calculator):
     poll_time = config.getint('Daemon', 'poll_time')
     max_poll_time = config.getint('Daemon', 'max_poll_time')
 
     timer = TimedCallBackActor.start(poll_time / 10.0).proxy()
     cloud_node_poller = CloudNodeListMonitorActor.start(
     poll_time = config.getint('Daemon', 'poll_time')
     max_poll_time = config.getint('Daemon', 'max_poll_time')
 
     timer = TimedCallBackActor.start(poll_time / 10.0).proxy()
     cloud_node_poller = CloudNodeListMonitorActor.start(
-        cloud_client, timer, poll_time, max_poll_time).proxy()
+        config.new_cloud_client(timer), timer, poll_time, max_poll_time).proxy()
     arvados_node_poller = ArvadosNodeListMonitorActor.start(
     arvados_node_poller = ArvadosNodeListMonitorActor.start(
-        arvados_client, timer, poll_time, max_poll_time).proxy()
+        config.new_arvados_client(), timer, poll_time, max_poll_time).proxy()
     job_queue_poller = JobQueueMonitorActor.start(
         config.new_arvados_client(), timer, server_calculator,
         poll_time, max_poll_time).proxy()
     job_queue_poller = JobQueueMonitorActor.start(
         config.new_arvados_client(), timer, server_calculator,
         poll_time, max_poll_time).proxy()
@@ -107,19 +104,25 @@ def main(args=None):
         signal.signal(sigcode, shutdown_signal)
 
     setup_logging(config.get('Logging', 'file'), **config.log_levels())
         signal.signal(sigcode, shutdown_signal)
 
     setup_logging(config.get('Logging', 'file'), **config.log_levels())
+    node_setup, node_shutdown, node_update, node_monitor = \
+        config.dispatch_classes()
+    server_calculator = build_server_calculator(config)
     timer, cloud_node_poller, arvados_node_poller, job_queue_poller = \
     timer, cloud_node_poller, arvados_node_poller, job_queue_poller = \
-        launch_pollers(config)
-    cloud_node_updater = ComputeNodeUpdateActor.start(
-        config.new_cloud_client).proxy()
+        launch_pollers(config, server_calculator)
+    cloud_node_updater = node_update.start(config.new_cloud_client).proxy()
     node_daemon = NodeManagerDaemonActor.start(
         job_queue_poller, arvados_node_poller, cloud_node_poller,
         cloud_node_updater, timer,
         config.new_arvados_client, config.new_cloud_client,
         config.shutdown_windows(),
     node_daemon = NodeManagerDaemonActor.start(
         job_queue_poller, arvados_node_poller, cloud_node_poller,
         cloud_node_updater, timer,
         config.new_arvados_client, config.new_cloud_client,
         config.shutdown_windows(),
+        server_calculator,
         config.getint('Daemon', 'min_nodes'),
         config.getint('Daemon', 'max_nodes'),
         config.getint('Daemon', 'poll_stale_after'),
         config.getint('Daemon', 'min_nodes'),
         config.getint('Daemon', 'max_nodes'),
         config.getint('Daemon', 'poll_stale_after'),
-        config.getint('Daemon', 'node_stale_after')).proxy()
+        config.getint('Daemon', 'boot_fail_after'),
+        config.getint('Daemon', 'node_stale_after'),
+        node_setup, node_shutdown, node_monitor,
+        max_total_price=config.getfloat('Daemon', 'max_total_price')).proxy()
 
     signal.pause()
     daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set
 
     signal.pause()
     daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set