Merge branch '10805-avoid-llfuse-deadlock'
[arvados.git] / services / fuse / tests / mount_test_base.py
index 12395d7f951422c90a76bc1e474172524c598356..96ff889fcfbdb0de8e2d07ed0ea6a15ebd6f0bfe 100644 (file)
@@ -1,18 +1,23 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
 import arvados
-import arvados.safeapi
 import arvados_fuse as fuse
+import arvados.safeapi
 import llfuse
+import logging
+import multiprocessing
 import os
+import run_test_server
 import shutil
+import signal
 import subprocess
 import sys
 import tempfile
 import threading
 import time
 import unittest
-import logging
-import multiprocessing
-import run_test_server
 
 logger = logging.getLogger('arvados.arv-mount')
 
@@ -38,6 +43,10 @@ class MountTestBase(unittest.TestCase):
         self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
         self.llfuse_thread = None
 
+        # Workaround for llfuse deadlock bug. See #10805, #8345,
+        # https://bitbucket.org/nikratio/python-llfuse/issues/108
+        llfuse.close = lambda *args: None
+
     # This is a copy of Mount's method.  TODO: Refactor MountTestBase
     # to use a Mount instead of copying its code.
     def _llfuse_main(self):
@@ -64,23 +73,28 @@ class MountTestBase(unittest.TestCase):
         return self.operations.inodes[llfuse.ROOT_INODE]
 
     def tearDown(self):
-        self.pool.terminate()
-        self.pool.join()
-        del self.pool
-
         if self.llfuse_thread:
+            if self.operations.events:
+                self.operations.events.close(timeout=10)
             subprocess.call(["fusermount", "-u", "-z", self.mounttmp])
-            self.llfuse_thread.join(timeout=1)
+            t0 = time.time()
+            self.llfuse_thread.join(timeout=10)
             if self.llfuse_thread.is_alive():
                 logger.warning("MountTestBase.tearDown():"
-                               " llfuse thread still alive 1s after umount"
-                               " -- abandoning and exiting anyway")
+                               " llfuse thread still alive 10s after umount"
+                               " -- exiting with SIGKILL")
+                os.kill(os.getpid(), signal.SIGKILL)
+            waited = time.time() - t0
+            if waited > 0.1:
+                logger.warning("MountTestBase.tearDown(): waited %f s for llfuse thread to end", waited)
 
         os.rmdir(self.mounttmp)
         if self.keeptmp:
             shutil.rmtree(self.keeptmp)
             os.environ.pop('KEEP_LOCAL_STORE')
         run_test_server.reset()
+        self.pool.close()
+        self.pool.join()
 
     def assertDirContents(self, subdir, expect_content):
         path = self.mounttmp