10669: Robust cache for httplib2
authorTom Clegg <tom@curoverse.com>
Wed, 22 Mar 2017 19:16:37 +0000 (15:16 -0400)
committerTom Clegg <tom@curoverse.com>
Wed, 22 Mar 2017 21:49:59 +0000 (17:49 -0400)
sdk/python/arvados/api.py
sdk/python/arvados/cache.py [new file with mode: 0644]
sdk/python/tests/run_test_server.py
sdk/python/tests/test_cache.py [new file with mode: 0644]

index ccf16a5fcef3be02b6450bd4b527e6ff682c88d1..b324722a3b6ee111697116b6c66b229b2516f00d 100644 (file)
@@ -15,6 +15,7 @@ from apiclient import errors as apiclient_errors
 import config
 import errors
 import util
+import cache
 
 _logger = logging.getLogger('arvados.api')
 
@@ -136,7 +137,7 @@ def http_cache(data_type):
         util.mkdir_dash_p(path)
     except OSError:
         path = None
-    return path
+    return cache.SafeHTTPCache(path)
 
 def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
     """Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
new file mode 100644 (file)
index 0000000..c4e4c09
--- /dev/null
@@ -0,0 +1,50 @@
+import errno
+import md5
+import os
+import tempfile
+
+class SafeHTTPCache(object):
+    def __init__(self, path=None):
+        self._dir = path
+
+    def __str__(self):
+        return self._dir
+
+    def _filename(self, url):
+        return os.path.join(self._dir, md5.new(url).hexdigest())
+
+    def get(self, url):
+        filename = self._filename(url)
+        try:
+            with open(filename, 'rb') as f:
+                return f.read()
+        except IOError, OSError:
+            return None
+
+    def set(self, url, content):
+        try:
+            fd, tempname = tempfile.mkstemp(dir=self._dir)
+        except:
+            return None
+        try:
+            try:
+                f = os.fdopen(fd, 'w')
+            except:
+                os.close(fd)
+                raise
+            try:
+                f.write(content)
+            finally:
+                f.close()
+            os.rename(tempname, self._filename(url))
+            tempname = None
+        finally:
+            if tempname:
+                os.unlink(tempname)
+
+    def delete(self, url):
+        try:
+            os.unlink(self._filename(url))
+        except OSError as err:
+            if err.errno != errno.ENOENT:
+                raise
index da35f4fc9b347e201ad304cdb0913b8d40b6bacf..d10e60c22fef1009179c90da126d098a2fdc9c56 100644 (file)
@@ -239,8 +239,9 @@ def run(leave_running_atexit=False):
     # This will clear cached docs that belong to other processes (like
     # concurrent test suites) even if they're still running. They should
     # be able to tolerate that.
-    for fn in glob.glob(os.path.join(arvados.http_cache('discovery'),
-                                     '*,arvados,v1,rest,*')):
+    for fn in glob.glob(os.path.join(
+            str(arvados.http_cache('discovery')),
+            '*,arvados,v1,rest,*')):
         os.unlink(fn)
 
     pid_file = _pidfile('api')
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
new file mode 100644 (file)
index 0000000..ce97a17
--- /dev/null
@@ -0,0 +1,62 @@
+import md5
+import shutil
+import random
+import tempfile
+import threading
+import unittest
+
+import arvados.cache
+
+
+def _random(n):
+    return bytearray(random.getrandbits(8) for _ in xrange(n))
+
+
+class CacheTestThread(threading.Thread):
+    def __init__(self, dir):
+        super(CacheTestThread, self).__init__()
+        self._dir = dir
+
+    def run(self):
+        c = arvados.cache.SafeHTTPCache(self._dir)
+        url = 'http://example.com/foo'
+        for x in range(16):
+            data_in = _random(128)
+            data_in = md5.new(data_in).hexdigest() + "\n" + str(data_in)
+            c.set(url, data_in)
+            data_out = c.get(url)
+            digest, content = data_out.split("\n", 1)
+            self.ok = (digest == md5.new(content).hexdigest())
+
+
+class CacheTest(unittest.TestCase):
+    def setUp(self):
+        self._dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self._dir)
+
+    def test_cache_crud(self):
+        c = arvados.cache.SafeHTTPCache(self._dir)
+        url = 'https://example.com/foo?bar=baz'
+        data1 = _random(256)
+        data2 = _random(128)
+        self.assertEqual(None, c.get(url))
+        c.delete(url)
+        c.set(url, data1)
+        self.assertEqual(data1, c.get(url))
+        c.delete(url)
+        self.assertEqual(None, c.get(url))
+        c.set(url, data1)
+        c.set(url, data2)
+        self.assertEqual(data2, c.get(url))
+
+    def test_cache_threads(self):
+        threads = []
+        for _ in range(64):
+            t = CacheTestThread(dir=self._dir)
+            t.start()
+            threads.append(t)
+        for t in threads:
+            t.join()
+            self.assertTrue(t.ok)