13497: Rename SystemNodes to NodeProfiles in config.
[arvados.git] / sdk / python / tests / run_test_server.py
index a83566aa669fde27b6a2718aa3baaca8fd360fdb..f7ca6daf6f65c190ccd22a1a04ad3cb1996f3e75 100644 (file)
@@ -1,9 +1,15 @@
-#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 from __future__ import print_function
+from __future__ import division
+from builtins import str
+from builtins import range
 import argparse
 import atexit
 import errno
+import glob
 import httplib2
 import os
 import pipes
@@ -12,8 +18,8 @@ import re
 import shutil
 import signal
 import socket
-import subprocess
 import string
+import subprocess
 import sys
 import tempfile
 import time
@@ -33,9 +39,19 @@ import arvados.config
 ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..'))
 SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services')
 if 'GOPATH' in os.environ:
+    # Add all GOPATH bin dirs to PATH -- but insert them after the
+    # ruby gems bin dir, to ensure "bundle" runs the Ruby bundler
+    # command, not the golang.org/x/tools/cmd/bundle command.
     gopaths = os.environ['GOPATH'].split(':')
-    gobins = [os.path.join(path, 'bin') for path in gopaths]
-    os.environ['PATH'] = ':'.join(gobins) + ':' + os.environ['PATH']
+    addbins = [os.path.join(path, 'bin') for path in gopaths]
+    newbins = []
+    for path in os.environ['PATH'].split(':'):
+        newbins.append(path)
+        if os.path.exists(os.path.join(path, 'bundle')):
+            newbins += addbins
+            addbins = []
+    newbins += addbins
+    os.environ['PATH'] = ':'.join(newbins)
 
 TEST_TMPDIR = os.path.join(ARVADOS_DIR, 'tmp')
 if not os.path.exists(TEST_TMPDIR):
@@ -43,6 +59,7 @@ if not os.path.exists(TEST_TMPDIR):
 
 my_api_host = None
 _cached_config = {}
+_cached_db_config = {}
 
 def find_server_pid(PID_PATH, wait=10):
     now = time.time()
@@ -84,7 +101,7 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
         # Use up to half of the +wait+ period waiting for "passenger
         # stop" to work. If the process hasn't exited by then, start
         # sending TERM signals.
-        startTERM += wait/2
+        startTERM += wait//2
 
     server_pid = None
     while now <= deadline and server_pid is None:
@@ -98,7 +115,8 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
             # Pidfile exists, but we can't parse it. Perhaps the
             # server has created the file but hasn't written its PID
             # yet?
-            print("Parse error reading pidfile {}: {}".format(pidfile, error))
+            print("Parse error reading pidfile {}: {}".format(pidfile, error),
+                  file=sys.stderr)
             time.sleep(0.1)
             now = time.time()
 
@@ -106,6 +124,7 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
         try:
             exited, _ = os.waitpid(server_pid, os.WNOHANG)
             if exited > 0:
+                _remove_pidfile(pidfile)
                 return
         except OSError:
             # already exited, or isn't our child process
@@ -113,18 +132,28 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
         try:
             if now >= startTERM:
                 os.kill(server_pid, signal.SIGTERM)
-                print("Sent SIGTERM to {} ({})".format(server_pid, pidfile))
+                print("Sent SIGTERM to {} ({})".format(server_pid, pidfile),
+                      file=sys.stderr)
         except OSError as error:
             if error.errno == errno.ESRCH:
                 # Thrown by os.getpgid() or os.kill() if the process
                 # does not exist, i.e., our work here is done.
+                _remove_pidfile(pidfile)
                 return
             raise
         time.sleep(0.1)
         now = time.time()
 
     print("Server PID {} ({}) did not exit, giving up after {}s".
-          format(server_pid, pidfile, wait))
+          format(server_pid, pidfile, wait),
+          file=sys.stderr)
+
+def _remove_pidfile(pidfile):
+    try:
+        os.unlink(pidfile)
+    except:
+        if os.path.lexists(pidfile):
+            raise
 
 def find_available_port():
     """Return an IPv4 port number that is not in use right now.
@@ -155,7 +184,8 @@ def _wait_until_port_listens(port, timeout=10):
         subprocess.check_output(['which', 'lsof'])
     except subprocess.CalledProcessError:
         print("WARNING: No `lsof` -- cannot wait for port to listen. "+
-              "Sleeping 0.5 and hoping for the best.")
+              "Sleeping 0.5 and hoping for the best.",
+              file=sys.stderr)
         time.sleep(0.5)
         return
     deadline = time.time() + timeout
@@ -172,23 +202,43 @@ def _wait_until_port_listens(port, timeout=10):
         format(port, timeout),
         file=sys.stderr)
 
-def _fifo2stderr(label):
-    """Create a fifo, and copy it to stderr, prepending label to each line.
+def _logfilename(label):
+    """Set up a labelled log file, and return a path to write logs to.
+
+    Normally, the returned path is {tmpdir}/{label}.log.
 
-    Return value is the path to the new FIFO.
+    In debug mode, logs are also written to stderr, with [label]
+    prepended to each line. The returned path is a FIFO.
 
     +label+ should contain only alphanumerics: it is also used as part
     of the FIFO filename.
+
     """
+    logfilename = os.path.join(TEST_TMPDIR, label+'.log')
+    if not os.environ.get('ARVADOS_DEBUG', ''):
+        return logfilename
     fifo = os.path.join(TEST_TMPDIR, label+'.fifo')
     try:
         os.remove(fifo)
     except OSError as error:
         if error.errno != errno.ENOENT:
             raise
-    os.mkfifo(fifo, 0700)
+    os.mkfifo(fifo, 0o700)
+    stdbuf = ['stdbuf', '-i0', '-oL', '-eL']
+    # open(fifo, 'r') would block waiting for someone to open the fifo
+    # for writing, so we need a separate cat process to open it for
+    # us.
+    cat = subprocess.Popen(
+        stdbuf+['cat', fifo],
+        stdin=open('/dev/null'),
+        stdout=subprocess.PIPE)
+    tee = subprocess.Popen(
+        stdbuf+['tee', '-a', logfilename],
+        stdin=cat.stdout,
+        stdout=subprocess.PIPE)
     subprocess.Popen(
-        ['sed', '-e', 's/^/['+label+'] /', fifo],
+        stdbuf+['sed', '-e', 's/^/['+label+'] /'],
+        stdin=tee.stdout,
         stdout=sys.stderr)
     return fifo
 
@@ -209,8 +259,15 @@ def run(leave_running_atexit=False):
     """
     global my_api_host
 
-    # Delete cached discovery document.
-    shutil.rmtree(arvados.http_cache('discovery'))
+    # Delete cached discovery documents.
+    #
+    # This will clear cached docs that belong to other processes (like
+    # concurrent test suites) even if they're still running. They should
+    # be able to tolerate that.
+    for fn in glob.glob(os.path.join(
+            str(arvados.http_cache('discovery')),
+            '*,arvados,v1,rest,*')):
+        os.unlink(fn)
 
     pid_file = _pidfile('api')
     pid_file_ok = find_server_pid(pid_file, 0)
@@ -251,21 +308,6 @@ def run(leave_running_atexit=False):
     if not os.path.exists('tmp/logs'):
         os.makedirs('tmp/logs')
 
-    if not os.path.exists('tmp/self-signed.pem'):
-        # We assume here that either passenger reports its listening
-        # address as https:/0.0.0.0:port/. If it reports "127.0.0.1"
-        # then the certificate won't match the host and reset() will
-        # fail certificate verification. If it reports "localhost",
-        # clients (notably Python SDK's websocket client) might
-        # resolve localhost as ::1 and then fail to connect.
-        subprocess.check_call([
-            'openssl', 'req', '-new', '-x509', '-nodes',
-            '-out', 'tmp/self-signed.pem',
-            '-keyout', 'tmp/self-signed.key',
-            '-days', '3650',
-            '-subj', '/CN=0.0.0.0'],
-        stdout=sys.stderr)
-
     # Install the git repository fixtures.
     gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
     gittarball = os.path.join(SERVICES_SRC_DIR, 'api', 'test', 'test.git.tar')
@@ -273,10 +315,16 @@ def run(leave_running_atexit=False):
         os.makedirs(gitdir)
     subprocess.check_output(['tar', '-xC', gitdir, '-f', gittarball])
 
+    # The nginx proxy isn't listening here yet, but we need to choose
+    # the wss:// port now so we can write the API server config file.
+    wss_port = find_available_port()
+    _setport('wss', wss_port)
+
     port = find_available_port()
     env = os.environ.copy()
     env['RAILS_ENV'] = 'test'
-    env['ARVADOS_WEBSOCKETS'] = 'yes'
+    env['ARVADOS_TEST_WSS_PORT'] = str(wss_port)
+    env.pop('ARVADOS_WEBSOCKETS', None)
     env.pop('ARVADOS_TEST_API_HOST', None)
     env.pop('ARVADOS_API_HOST', None)
     env.pop('ARVADOS_API_HOST_INSECURE', None)
@@ -349,6 +397,81 @@ def stop(force=False):
         kill_server_pid(_pidfile('api'))
         my_api_host = None
 
+def run_controller():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_controller()
+    rails_api_port = int(string.split(os.environ.get('ARVADOS_TEST_API_HOST', my_api_host), ':')[-1])
+    port = find_available_port()
+    conf = os.path.join(TEST_TMPDIR, 'arvados.yml')
+    with open(conf, 'w') as f:
+        f.write("""
+Clusters:
+  zzzzz:
+    NodeProfiles:
+      "*":
+        "arvados-controller":
+          Listen: ":{}"
+        "arvados-api-server":
+          Listen: ":{}"
+          TLS: true
+        """.format(port, rails_api_port))
+    logf = open(_logfilename('controller'), 'a')
+    controller = subprocess.Popen(
+        ["arvados-server", "controller", "-config", conf],
+        stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+    with open(_pidfile('controller'), 'w') as f:
+        f.write(str(controller.pid))
+    _wait_until_port_listens(port)
+    _setport('controller', port)
+    return port
+
+def stop_controller():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('controller'))
+
+def run_ws():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_ws()
+    port = find_available_port()
+    conf = os.path.join(TEST_TMPDIR, 'ws.yml')
+    with open(conf, 'w') as f:
+        f.write("""
+Client:
+  APIHost: {}
+  Insecure: true
+Listen: :{}
+LogLevel: {}
+Postgres:
+  host: {}
+  dbname: {}
+  user: {}
+  password: {}
+  sslmode: require
+        """.format(os.environ['ARVADOS_API_HOST'],
+                   port,
+                   ('info' if os.environ.get('ARVADOS_DEBUG', '') in ['','0'] else 'debug'),
+                   _dbconfig('host'),
+                   _dbconfig('database'),
+                   _dbconfig('username'),
+                   _dbconfig('password')))
+    logf = open(_logfilename('ws'), 'a')
+    ws = subprocess.Popen(
+        ["ws", "-config", conf],
+        stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+    with open(_pidfile('ws'), 'w') as f:
+        f.write(str(ws.pid))
+    _wait_until_port_listens(port)
+    _setport('ws', port)
+    return port
+
+def stop_ws():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('ws'))
+
 def _start_keep(n, keep_args):
     keep0 = tempfile.mkdtemp()
     port = find_available_port()
@@ -357,10 +480,10 @@ def _start_keep(n, keep_args):
                 "-listen=:{}".format(port),
                 "-pid="+_pidfile('keep{}'.format(n))]
 
-    for arg, val in keep_args.iteritems():
+    for arg, val in keep_args.items():
         keep_cmd.append("{}={}".format(arg, val))
 
-    logf = open(_fifo2stderr('keep{}'.format(n)), 'w')
+    logf = open(_logfilename('keep{}'.format(n)), 'a')
     kp0 = subprocess.Popen(
         keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
 
@@ -383,8 +506,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
     with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
         keep_args['-blob-signing-key-file'] = f.name
         f.write(blob_signing_key)
-    if enforce_permissions:
-        keep_args['-enforce-permissions'] = 'true'
+    keep_args['-enforce-permissions'] = str(enforce_permissions).lower()
     with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
         keep_args['-data-manager-token-file'] = f.name
         f.write(auth_token('data_manager'))
@@ -414,11 +536,15 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
             'keep_disk': {'keep_service_uuid': svc['uuid'] }
         }).execute()
 
-    # If keepproxy is running, send SIGHUP to make it discover the new
-    # keepstore services.
-    proxypidfile = _pidfile('keepproxy')
-    if os.path.exists(proxypidfile):
-        os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
+    # If keepproxy and/or keep-web is running, send SIGHUP to make
+    # them discover the new keepstore services.
+    for svc in ('keepproxy', 'keep-web'):
+        pidfile = _pidfile('keepproxy')
+        if os.path.exists(pidfile):
+            try:
+                os.kill(int(open(pidfile).read()), signal.SIGHUP)
+            except OSError:
+                os.remove(pidfile)
 
 def _stop_keep(n):
     kill_server_pid(_pidfile('keep{}'.format(n)))
@@ -441,7 +567,7 @@ def run_keep_proxy():
     port = find_available_port()
     env = os.environ.copy()
     env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
-    logf = open(_fifo2stderr('keepproxy'), 'w')
+    logf = open(_logfilename('keepproxy'), 'a')
     kp = subprocess.Popen(
         ['keepproxy',
          '-pid='+_pidfile('keepproxy'),
@@ -462,7 +588,7 @@ def run_keep_proxy():
         'service_type': 'proxy',
         'service_ssl_flag': False,
     }}).execute()
-    os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
+    os.environ["ARVADOS_KEEP_SERVICES"] = "http://localhost:{}".format(port)
     _setport('keepproxy', port)
     _wait_until_port_listens(port)
 
@@ -480,7 +606,7 @@ def run_arv_git_httpd():
     gitport = find_available_port()
     env = os.environ.copy()
     env.pop('ARVADOS_API_TOKEN', None)
-    logf = open(_fifo2stderr('arv-git-httpd'), 'w')
+    logf = open(_logfilename('arv-git-httpd'), 'a')
     agh = subprocess.Popen(
         ['arv-git-httpd',
          '-repo-root='+gitdir+'/test',
@@ -504,11 +630,11 @@ def run_keep_web():
     keepwebport = find_available_port()
     env = os.environ.copy()
     env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
-    logf = open(_fifo2stderr('keep-web'), 'w')
+    logf = open(_logfilename('keep-web'), 'a')
     keepweb = subprocess.Popen(
         ['keep-web',
          '-allow-anonymous',
-         '-attachment-only-host=localhost:'+str(keepwebport),
+         '-attachment-only-host=download:'+str(keepwebport),
          '-listen=:'+str(keepwebport)],
         env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('keep-web'), 'w') as f:
@@ -524,16 +650,24 @@ def stop_keep_web():
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
+    stop_nginx()
     nginxconf = {}
+    nginxconf['CONTROLLERPORT'] = _getport('controller')
+    nginxconf['CONTROLLERSSLPORT'] = find_available_port()
     nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBDLSSLPORT'] = find_available_port()
     nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
     nginxconf['GITSSLPORT'] = find_available_port()
+    nginxconf['WSPORT'] = _getport('ws')
+    nginxconf['WSSPORT'] = _getport('wss')
     nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
     nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
-    nginxconf['ACCESSLOG'] = _fifo2stderr('nginx_access_log')
+    nginxconf['ACCESSLOG'] = _logfilename('nginx_access')
+    nginxconf['ERRORLOG'] = _logfilename('nginx_error')
+    nginxconf['TMPDIR'] = TEST_TMPDIR
 
     conftemplatefile = os.path.join(MY_DIRNAME, 'nginx.conf')
     conffile = os.path.join(TEST_TMPDIR, 'nginx.conf')
@@ -552,6 +686,8 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('controller-ssl', nginxconf['CONTROLLERSSLPORT'])
+    _setport('keep-web-dl-ssl', nginxconf['KEEPWEBDLSSLPORT'])
     _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
@@ -578,7 +714,15 @@ def _getport(program):
     except IOError:
         return 9
 
+def _dbconfig(key):
+    global _cached_db_config
+    if not _cached_db_config:
+        _cached_db_config = yaml.load(open(os.path.join(
+            SERVICES_SRC_DIR, 'api', 'config', 'database.yml')))
+    return _cached_db_config['test'][key]
+
 def _apiconfig(key):
+    global _cached_config
     if _cached_config:
         return _cached_config[key]
     def _load(f, required=True):
@@ -632,13 +776,14 @@ class TestCaseWithServers(unittest.TestCase):
     original environment.
     """
     MAIN_SERVER = None
+    WS_SERVER = None
     KEEP_SERVER = None
     KEEP_PROXY_SERVER = None
     KEEP_WEB_SERVER = None
 
     @staticmethod
     def _restore_dict(src, dest):
-        for key in dest.keys():
+        for key in list(dest.keys()):
             if key not in src:
                 del dest[key]
         dest.update(src)
@@ -648,10 +793,11 @@ class TestCaseWithServers(unittest.TestCase):
         cls._orig_environ = os.environ.copy()
         cls._orig_config = arvados.config.settings().copy()
         cls._cleanup_funcs = []
-        os.environ.pop('ARVADOS_KEEP_PROXY', None)
+        os.environ.pop('ARVADOS_KEEP_SERVICES', None)
         os.environ.pop('ARVADOS_EXTERNAL_CLIENT', None)
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
+                (cls.WS_SERVER, run_ws, stop_ws),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
                 (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
                 (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
@@ -678,6 +824,8 @@ class TestCaseWithServers(unittest.TestCase):
 if __name__ == "__main__":
     actions = [
         'start', 'stop',
+        'start_ws', 'stop_ws',
+        'start_controller', 'stop_controller',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
         'start_keep-web', 'stop_keep-web',
@@ -693,7 +841,9 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     if args.action not in actions:
-        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions), file=sys.stderr)
+        print("Unrecognized action '{}'. Actions are: {}.".
+              format(args.action, actions),
+              file=sys.stderr)
         sys.exit(1)
     if args.action == 'start':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
@@ -708,6 +858,14 @@ if __name__ == "__main__":
             print(host)
     elif args.action == 'stop':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
+    elif args.action == 'start_ws':
+        run_ws()
+    elif args.action == 'stop_ws':
+        stop_ws()
+    elif args.action == 'start_controller':
+        run_controller()
+    elif args.action == 'stop_controller':
+        stop_controller()
     elif args.action == 'start_keep':
         run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
@@ -726,6 +884,7 @@ if __name__ == "__main__":
         stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
+        print("export ARVADOS_API_HOST=0.0.0.0:{}".format(_getport('controller-ssl')))
     elif args.action == 'stop_nginx':
         stop_nginx()
     else: