Merge branch '9551-python-keep-services-env'
[arvados.git] / sdk / python / tests / run_test_server.py
index cdfd93a0a5cce4bb933facf49eec3e915e7035e8..e72f67dce49049f37c9b2e68794eb62cc780297c 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import print_function
 import argparse
 import atexit
 import errno
+import glob
 import httplib2
 import os
 import pipes
@@ -12,8 +13,8 @@ import re
 import shutil
 import signal
 import socket
-import subprocess
 import string
+import subprocess
 import sys
 import tempfile
 import time
@@ -32,7 +33,6 @@ import arvados.config
 
 ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..'))
 SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services')
-SERVER_PID_PATH = 'tmp/pids/test-server.pid'
 if 'GOPATH' in os.environ:
     gopaths = os.environ['GOPATH'].split(':')
     gobins = [os.path.join(path, 'bin') for path in gopaths]
@@ -70,28 +70,65 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
     import signal
     import subprocess
     import time
-    try:
-        if passenger_root:
-            # First try to shut down nicely
-            restore_cwd = os.getcwd()
-            os.chdir(passenger_root)
-            subprocess.call([
-                'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
-            os.chdir(restore_cwd)
-        now = time.time()
-        timeout = now + wait
-        with open(pidfile, 'r') as f:
-            server_pid = int(f.read())
-        while now <= timeout:
-            if not passenger_root or timeout - now < wait / 2:
-                # Half timeout has elapsed. Start sending SIGTERM
-                os.kill(server_pid, signal.SIGTERM)
-            # Raise OSError if process has disappeared
-            os.getpgid(server_pid)
+
+    now = time.time()
+    startTERM = now
+    deadline = now + wait
+
+    if passenger_root:
+        # First try to shut down nicely
+        restore_cwd = os.getcwd()
+        os.chdir(passenger_root)
+        subprocess.call([
+            'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
+        os.chdir(restore_cwd)
+        # Use up to half of the +wait+ period waiting for "passenger
+        # stop" to work. If the process hasn't exited by then, start
+        # sending TERM signals.
+        startTERM += wait/2
+
+    server_pid = None
+    while now <= deadline and server_pid is None:
+        try:
+            with open(pidfile, 'r') as f:
+                server_pid = int(f.read())
+        except IOError:
+            # No pidfile = nothing to kill.
+            return
+        except ValueError as error:
+            # Pidfile exists, but we can't parse it. Perhaps the
+            # server has created the file but hasn't written its PID
+            # yet?
+            print("Parse error reading pidfile {}: {}".format(pidfile, error),
+                  file=sys.stderr)
             time.sleep(0.1)
             now = time.time()
-    except EnvironmentError:
-        pass
+
+    while now <= deadline:
+        try:
+            exited, _ = os.waitpid(server_pid, os.WNOHANG)
+            if exited > 0:
+                return
+        except OSError:
+            # already exited, or isn't our child process
+            pass
+        try:
+            if now >= startTERM:
+                os.kill(server_pid, signal.SIGTERM)
+                print("Sent SIGTERM to {} ({})".format(server_pid, pidfile),
+                      file=sys.stderr)
+        except OSError as error:
+            if error.errno == errno.ESRCH:
+                # Thrown by os.getpgid() or os.kill() if the process
+                # does not exist, i.e., our work here is done.
+                return
+            raise
+        time.sleep(0.1)
+        now = time.time()
+
+    print("Server PID {} ({}) did not exit, giving up after {}s".
+          format(server_pid, pidfile, wait),
+          file=sys.stderr)
 
 def find_available_port():
     """Return an IPv4 port number that is not in use right now.
@@ -122,7 +159,8 @@ def _wait_until_port_listens(port, timeout=10):
         subprocess.check_output(['which', 'lsof'])
     except subprocess.CalledProcessError:
         print("WARNING: No `lsof` -- cannot wait for port to listen. "+
-              "Sleeping 0.5 and hoping for the best.")
+              "Sleeping 0.5 and hoping for the best.",
+              file=sys.stderr)
         time.sleep(0.5)
         return
     deadline = time.time() + timeout
@@ -155,7 +193,7 @@ def _fifo2stderr(label):
             raise
     os.mkfifo(fifo, 0700)
     subprocess.Popen(
-        ['sed', '-e', 's/^/['+label+'] /', fifo],
+        ['stdbuf', '-i0', '-oL', '-eL', 'sed', '-e', 's/^/['+label+'] /', fifo],
         stdout=sys.stderr)
     return fifo
 
@@ -176,10 +214,16 @@ def run(leave_running_atexit=False):
     """
     global my_api_host
 
-    # Delete cached discovery document.
-    shutil.rmtree(arvados.http_cache('discovery'))
+    # Delete cached discovery documents.
+    #
+    # This will clear cached docs that belong to other processes (like
+    # concurrent test suites) even if they're still running. They should
+    # be able to tolerate that.
+    for fn in glob.glob(os.path.join(arvados.http_cache('discovery'),
+                                     '*,arvados,v1,rest,*')):
+        os.unlink(fn)
 
-    pid_file = os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH)
+    pid_file = _pidfile('api')
     pid_file_ok = find_server_pid(pid_file, 0)
 
     existing_api_host = os.environ.get('ARVADOS_TEST_API_HOST', my_api_host)
@@ -251,7 +295,7 @@ def run(leave_running_atexit=False):
     start_msg = subprocess.check_output(
         ['bundle', 'exec',
          'passenger', 'start', '-d', '-p{}'.format(port),
-         '--pid-file', os.path.join(os.getcwd(), pid_file),
+         '--pid-file', pid_file,
          '--log-file', os.path.join(os.getcwd(), 'log/test.log'),
          '--ssl',
          '--ssl-certificate', 'tmp/self-signed.pem',
@@ -313,7 +357,7 @@ def stop(force=False):
     """
     global my_api_host
     if force or my_api_host is not None:
-        kill_server_pid(os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH))
+        kill_server_pid(_pidfile('api'))
         my_api_host = None
 
 def _start_keep(n, keep_args):
@@ -354,7 +398,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         keep_args['-enforce-permissions'] = 'true'
     with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
         keep_args['-data-manager-token-file'] = f.name
-        f.write(os.environ['ARVADOS_API_TOKEN'])
+        f.write(auth_token('data_manager'))
     keep_args['-never-delete'] = 'false'
 
     api = arvados.api(
@@ -385,10 +429,13 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
     # keepstore services.
     proxypidfile = _pidfile('keepproxy')
     if os.path.exists(proxypidfile):
-        os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
+        try:
+            os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
+        except OSError:
+            os.remove(proxypidfile)
 
 def _stop_keep(n):
-    kill_server_pid(_pidfile('keep{}'.format(n)), 0)
+    kill_server_pid(_pidfile('keep{}'.format(n)))
     if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
         with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
             shutil.rmtree(r.read(), True)
@@ -429,14 +476,14 @@ def run_keep_proxy():
         'service_type': 'proxy',
         'service_ssl_flag': False,
     }}).execute()
-    os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
+    os.environ["ARVADOS_KEEP_SERVICES"] = "http://localhost:{}".format(port)
     _setport('keepproxy', port)
     _wait_until_port_listens(port)
 
 def stop_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('keepproxy'), wait=0)
+    kill_server_pid(_pidfile('keepproxy'))
 
 def run_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -447,11 +494,12 @@ def run_arv_git_httpd():
     gitport = find_available_port()
     env = os.environ.copy()
     env.pop('ARVADOS_API_TOKEN', None)
+    logf = open(_fifo2stderr('arv-git-httpd'), 'w')
     agh = subprocess.Popen(
         ['arv-git-httpd',
          '-repo-root='+gitdir+'/test',
          '-address=:'+str(gitport)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('arv-git-httpd'), 'w') as f:
         f.write(str(agh.pid))
     _setport('arv-git-httpd', gitport)
@@ -460,7 +508,7 @@ def run_arv_git_httpd():
 def stop_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
+    kill_server_pid(_pidfile('arv-git-httpd'))
 
 def run_keep_web():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -469,13 +517,14 @@ def run_keep_web():
 
     keepwebport = find_available_port()
     env = os.environ.copy()
-    env.pop('ARVADOS_API_TOKEN', None)
+    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
+    logf = open(_fifo2stderr('keep-web'), 'w')
     keepweb = subprocess.Popen(
         ['keep-web',
-         '-anonymous-token='+fixture('api_client_authorizations')['anonymous']['api_token'],
-         '-attachment-only-host=localhost:'+str(keepwebport),
-         '-address=:'+str(keepwebport)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+         '-allow-anonymous',
+         '-attachment-only-host=download:'+str(keepwebport),
+         '-listen=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('keep-web'), 'w') as f:
         f.write(str(keepweb.pid))
     _setport('keep-web', keepwebport)
@@ -484,13 +533,14 @@ def run_keep_web():
 def stop_keep_web():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('keep-web'), wait=0)
+    kill_server_pid(_pidfile('keep-web'))
 
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
     nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBDLSSLPORT'] = find_available_port()
     nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
@@ -517,6 +567,7 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keep-web-dl-ssl', nginxconf['KEEPWEBDLSSLPORT'])
     _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
@@ -524,7 +575,7 @@ def run_nginx():
 def stop_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('nginx'), wait=0)
+    kill_server_pid(_pidfile('nginx'))
 
 def _pidfile(program):
     return os.path.join(TEST_TMPDIR, program + '.pid')
@@ -613,7 +664,7 @@ class TestCaseWithServers(unittest.TestCase):
         cls._orig_environ = os.environ.copy()
         cls._orig_config = arvados.config.settings().copy()
         cls._cleanup_funcs = []
-        os.environ.pop('ARVADOS_KEEP_PROXY', None)
+        os.environ.pop('ARVADOS_KEEP_SERVICES', None)
         os.environ.pop('ARVADOS_EXTERNAL_CLIENT', None)
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
@@ -658,7 +709,9 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     if args.action not in actions:
-        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions), file=sys.stderr)
+        print("Unrecognized action '{}'. Actions are: {}.".
+              format(args.action, actions),
+              file=sys.stderr)
         sys.exit(1)
     if args.action == 'start':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
@@ -676,7 +729,7 @@ if __name__ == "__main__":
     elif args.action == 'start_keep':
         run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
-        stop_keep()
+        stop_keep(num_servers=args.num_keep_servers)
     elif args.action == 'start_keep_proxy':
         run_keep_proxy()
     elif args.action == 'stop_keep_proxy':