projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
11652: Merge branch 'master' into 11652-recursive-contents
[arvados.git]
/
services
/
nodemanager
/
arvnodeman
/
launcher.py
diff --git
a/services/nodemanager/arvnodeman/launcher.py
b/services/nodemanager/arvnodeman/launcher.py
index 78bd2db5cc05fe9516c10e718506ef11734055db..11d38ecb76d22105b289d17cec5081aaa5bf3952 100644
(file)
--- a/
services/nodemanager/arvnodeman/launcher.py
+++ b/
services/nodemanager/arvnodeman/launcher.py
@@
-10,12
+10,16
@@
import time
import daemon
import pykka
import daemon
import pykka
+import libcloud
from . import config as nmconfig
from . import config as nmconfig
+from . import status
+from .baseactor import WatchdogActor
from .daemon import NodeManagerDaemonActor
from .jobqueue import JobQueueMonitorActor, ServerCalculator
from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
from .timedcallback import TimedCallBackActor
from .daemon import NodeManagerDaemonActor
from .jobqueue import JobQueueMonitorActor, ServerCalculator
from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
from .timedcallback import TimedCallBackActor
+from ._version import __version__
node_daemon = None
node_daemon = None
@@
-27,6
+31,10
@@
def parse_cli(args):
parser = argparse.ArgumentParser(
prog='arvados-node-manager',
description="Dynamically allocate Arvados cloud compute nodes")
parser = argparse.ArgumentParser(
prog='arvados-node-manager',
description="Dynamically allocate Arvados cloud compute nodes")
+ parser.add_argument(
+ '--version', action='version',
+ version="%s %s" % (sys.argv[0], __version__),
+ help='Print version and exit.')
parser.add_argument(
'--foreground', action='store_true', default=False,
help="Run in the foreground. Don't daemonize.")
parser.add_argument(
'--foreground', action='store_true', default=False,
help="Run in the foreground. Don't daemonize.")
@@
-56,6
+64,7
@@
def setup_logging(path, level, **sublevels):
for logger_name, sublevel in sublevels.iteritems():
sublogger = logging.getLogger(logger_name)
sublogger.setLevel(sublevel)
for logger_name, sublevel in sublevels.iteritems():
sublogger = logging.getLogger(logger_name)
sublogger.setLevel(sublevel)
+ return root_logger
def build_server_calculator(config):
cloud_size_list = config.node_sizes(config.new_cloud_client().list_sizes())
def build_server_calculator(config):
cloud_size_list = config.node_sizes(config.new_cloud_client().list_sizes())
@@
-63,7
+72,8
@@
def build_server_calculator(config):
abort("No valid node sizes configured")
return ServerCalculator(cloud_size_list,
config.getint('Daemon', 'max_nodes'),
abort("No valid node sizes configured")
return ServerCalculator(cloud_size_list,
config.getint('Daemon', 'max_nodes'),
- config.getfloat('Daemon', 'max_total_price'))
+ config.getfloat('Daemon', 'max_total_price'),
+ config.getfloat('Daemon', 'node_mem_scaling'))
def launch_pollers(config, server_calculator):
poll_time = config.getint('Daemon', 'poll_time')
def launch_pollers(config, server_calculator):
poll_time = config.getint('Daemon', 'poll_time')
@@
-71,7
+81,7
@@
def launch_pollers(config, server_calculator):
timer = TimedCallBackActor.start(poll_time / 10.0).tell_proxy()
cloud_node_poller = CloudNodeListMonitorActor.start(
timer = TimedCallBackActor.start(poll_time / 10.0).tell_proxy()
cloud_node_poller = CloudNodeListMonitorActor.start(
- config.new_cloud_client(), timer, poll_time, max_poll_time).tell_proxy()
+ config.new_cloud_client(), timer,
server_calculator,
poll_time, max_poll_time).tell_proxy()
arvados_node_poller = ArvadosNodeListMonitorActor.start(
config.new_arvados_client(), timer, poll_time, max_poll_time).tell_proxy()
job_queue_poller = JobQueueMonitorActor.start(
arvados_node_poller = ArvadosNodeListMonitorActor.start(
config.new_arvados_client(), timer, poll_time, max_poll_time).tell_proxy()
job_queue_poller = JobQueueMonitorActor.start(
@@
-103,8
+113,11
@@
def main(args=None):
for sigcode in [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]:
signal.signal(sigcode, shutdown_signal)
for sigcode in [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]:
signal.signal(sigcode, shutdown_signal)
+ status.Server(config).start()
+
try:
try:
- setup_logging(config.get('Logging', 'file'), **config.log_levels())
+ root_logger = setup_logging(config.get('Logging', 'file'), **config.log_levels())
+ root_logger.info("%s %s, libcloud %s", sys.argv[0], __version__, libcloud.__version__)
node_setup, node_shutdown, node_update, node_monitor = \
config.dispatch_classes()
server_calculator = build_server_calculator(config)
node_setup, node_shutdown, node_update, node_monitor = \
config.dispatch_classes()
server_calculator = build_server_calculator(config)
@@
-125,6
+138,12
@@
def main(args=None):
node_setup, node_shutdown, node_monitor,
max_total_price=config.getfloat('Daemon', 'max_total_price')).tell_proxy()
node_setup, node_shutdown, node_monitor,
max_total_price=config.getfloat('Daemon', 'max_total_price')).tell_proxy()
+ WatchdogActor.start(config.getint('Daemon', 'watchdog'),
+ cloud_node_poller.actor_ref,
+ arvados_node_poller.actor_ref,
+ job_queue_poller.actor_ref,
+ node_daemon.actor_ref)
+
signal.pause()
daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set
while not daemon_stopped():
signal.pause()
daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set
while not daemon_stopped():