From ecca76ea83aca34d19e7964d148c8653fd5c948f Mon Sep 17 00:00:00 2001 From: Brett Smith Date: Fri, 12 Jul 2024 10:32:19 -0400 Subject: [PATCH] 21935: Move arvados.cache.SafeHTTPCache to arvados.api This removes a module from the reference documentation while maintaining backwards compatibility. Arvados-DCO-1.1-Signed-off-by: Brett Smith --- sdk/python/arvados/__init__.py | 5 ++ sdk/python/arvados/api.py | 79 ++++++++++++++++++- sdk/python/arvados/cache.py | 76 ++---------------- .../{test_cache.py => test_http_cache.py} | 0 4 files changed, 88 insertions(+), 72 deletions(-) rename sdk/python/tests/{test_cache.py => test_http_cache.py} (100%) diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py index 8d9a5690b4..0aea3da640 100644 --- a/sdk/python/arvados/__init__.py +++ b/sdk/python/arvados/__init__.py @@ -34,6 +34,11 @@ from .arvfile import StreamFileReader from .logging import log_format, log_date_format, log_handler from .retry import RetryLoop +# Backwards compatibility shims: these modules used to get pulled in after +# `import arvados` with previous versions of the SDK. We must keep the names +# accessible even though there's no longer any functional need for them. +from . import cache + # Previous versions of the PySDK used to say `from .api import api`. This # made it convenient to call the API client constructor, but difficult to # access the rest of the `arvados.api` module. The magic below fixes that diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index e2f47a4dff..4756f76c28 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -10,6 +10,8 @@ client constructors are `api` and `api_from_config`. """ import collections +import errno +import hashlib import httplib2 import json import logging @@ -19,6 +21,7 @@ import re import socket import ssl import sys +import tempfile import threading import time import types @@ -39,7 +42,6 @@ from . import config from . import errors from . import retry from . import util -from . import cache from .logging import GoogleHTTPClientFilter, log_handler _logger = logging.getLogger('arvados.api') @@ -155,10 +157,81 @@ def _new_http_error(cls, *args, **kwargs): errors.ApiError, *args, **kwargs) apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error) +class SafeHTTPCache(object): + """Thread-safe replacement for `httplib2.FileCache` + + `arvados.api.http_cache` is the preferred way to construct this object. + Refer to that function's docstring for details. + """ + + def __init__(self, path=None, max_age=None): + self._dir = path + if max_age is not None: + try: + self._clean(threshold=time.time() - max_age) + except: + pass + + def _clean(self, threshold=0): + for ent in os.listdir(self._dir): + fnm = os.path.join(self._dir, ent) + if os.path.isdir(fnm) or not fnm.endswith('.tmp'): + continue + stat = os.lstat(fnm) + if stat.st_mtime < threshold: + try: + os.unlink(fnm) + except OSError as err: + if err.errno != errno.ENOENT: + raise + + def __str__(self): + return self._dir + + def _filename(self, url): + return os.path.join(self._dir, hashlib.md5(url.encode('utf-8')).hexdigest()+'.tmp') + + def get(self, url): + filename = self._filename(url) + try: + with open(filename, 'rb') as f: + return f.read() + except (IOError, OSError): + return None + + def set(self, url, content): + try: + fd, tempname = tempfile.mkstemp(dir=self._dir) + except: + return None + try: + try: + f = os.fdopen(fd, 'wb') + except: + os.close(fd) + raise + try: + f.write(content) + finally: + f.close() + os.rename(tempname, self._filename(url)) + tempname = None + finally: + if tempname: + os.unlink(tempname) + + def delete(self, url): + try: + os.unlink(self._filename(url)) + except OSError as err: + if err.errno != errno.ENOENT: + raise + + def http_cache(data_type: str) -> Optional[SafeHTTPCache]: """Set up an HTTP file cache - This function constructs and returns an `arvados.cache.SafeHTTPCache` + This function constructs and returns an `arvados.api.SafeHTTPCache` backed by the filesystem under a cache directory from the environment, or `None` if the directory cannot be set up. The return value can be passed to `httplib2.Http` as the `cache` argument. @@ -173,7 +246,7 @@ def http_cache(data_type: str) -> Optional[SafeHTTPCache]: except (OSError, RuntimeError): return None else: - return cache.SafeHTTPCache(str(path), max_age=60*60*24*2) + return SafeHTTPCache(str(path), max_age=60*60*24*2) def api_client( version: str, diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py index 115547cf93..3fcc769f93 100644 --- a/sdk/python/arvados/cache.py +++ b/sdk/python/arvados/cache.py @@ -1,75 +1,13 @@ # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 +"""arvados.cache - Shim compatibility module -import errno -import hashlib -import os -import tempfile -import time +This module used to define `arvados.api.SafeHTTPCache`. Now it only exists +to provide backwards compatible imports. New code should prefer to import +`arvados.api`. -class SafeHTTPCache(object): - """Thread-safe replacement for httplib2.FileCache""" +@private +""" - def __init__(self, path=None, max_age=None): - self._dir = path - if max_age is not None: - try: - self._clean(threshold=time.time() - max_age) - except: - pass - - def _clean(self, threshold=0): - for ent in os.listdir(self._dir): - fnm = os.path.join(self._dir, ent) - if os.path.isdir(fnm) or not fnm.endswith('.tmp'): - continue - stat = os.lstat(fnm) - if stat.st_mtime < threshold: - try: - os.unlink(fnm) - except OSError as err: - if err.errno != errno.ENOENT: - raise - - def __str__(self): - return self._dir - - def _filename(self, url): - return os.path.join(self._dir, hashlib.md5(url.encode('utf-8')).hexdigest()+'.tmp') - - def get(self, url): - filename = self._filename(url) - try: - with open(filename, 'rb') as f: - return f.read() - except (IOError, OSError): - return None - - def set(self, url, content): - try: - fd, tempname = tempfile.mkstemp(dir=self._dir) - except: - return None - try: - try: - f = os.fdopen(fd, 'wb') - except: - os.close(fd) - raise - try: - f.write(content) - finally: - f.close() - os.rename(tempname, self._filename(url)) - tempname = None - finally: - if tempname: - os.unlink(tempname) - - def delete(self, url): - try: - os.unlink(self._filename(url)) - except OSError as err: - if err.errno != errno.ENOENT: - raise +from .api import SafeHTTPCache diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_http_cache.py similarity index 100% rename from sdk/python/tests/test_cache.py rename to sdk/python/tests/test_http_cache.py -- 2.30.2