import ssl
import sys
import threading
+import resource
from . import timer
import urllib.parse
import arvados.errors
import arvados.retry as retry
import arvados.util
+import arvados.diskcache
_logger = logging.getLogger('arvados.keep')
global_client_object = None
config.get('ARVADOS_API_TOKEN'),
config.flag_is_true('ARVADOS_API_HOST_INSECURE'),
config.get('ARVADOS_KEEP_PROXY'),
- config.get('ARVADOS_EXTERNAL_CLIENT') == 'true',
os.environ.get('KEEP_LOCAL_STORE'))
if (global_client_object is None) or (cls._last_key != key):
global_client_object = KeepClient()
return Keep.global_client_object().put(data, **kwargs)
class KeepBlockCache(object):
- # Default RAM cache is 256MiB
- def __init__(self, cache_max=(256 * 1024 * 1024)):
+ def __init__(self, cache_max=0, max_slots=0, disk_cache=False, disk_cache_dir=None):
self.cache_max = cache_max
self._cache = []
self._cache_lock = threading.Lock()
+ self._max_slots = max_slots
+ self._disk_cache = disk_cache
+ self._disk_cache_dir = disk_cache_dir
+
+ if self._disk_cache and self._disk_cache_dir is None:
+ self._disk_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "arvados", "keep")
+ os.makedirs(self._disk_cache_dir, mode=0o700, exist_ok=True)
+
+ if self._max_slots == 0:
+ if self._disk_cache:
+ # default max slots to half of maximum file handles
+ # NOFILE typically defaults to 1024 on Linux so this
+ # will be 512 slots.
+ self._max_slots = resource.getrlimit(resource.RLIMIT_NOFILE)[0] / 2
+ else:
+ # RAM cache slots
+ self._max_slots = 512
+
+ if self.cache_max == 0:
+ if self._disk_cache:
+ fs = os.statvfs(self._disk_cache_dir)
+ avail = (fs.f_bavail * fs.f_bsize) / 4
+ maxdisk = int((fs.f_blocks * fs.f_bsize) * 0.10)
+ # pick smallest of:
+ # 10% of total disk size
+ # 25% of available space
+ # max_slots * 64 MiB
+ self.cache_max = min(min(maxdisk, avail), (self._max_slots * 64 * 1024 * 1024))
+ else:
+ # 256 GiB in RAM
+ self.cache_max = (256 * 1024 * 1024)
+
+ self.cache_max = max(self.cache_max, 64 * 1024 * 1024)
+
+ if self._disk_cache:
+ self._cache = arvados.diskcache.DiskCacheSlot.init_cache(self._disk_cache_dir, self._max_slots)
+ self.cap_cache()
+
class CacheSlot(object):
__slots__ = ("locator", "ready", "content")
else:
return len(self.content)
+ def evict(self):
+ return True
+
def cap_cache(self):
'''Cap the cache size to self.cache_max'''
with self._cache_lock:
# None (that means there was an error reading the block).
self._cache = [c for c in self._cache if not (c.ready.is_set() and c.content is None)]
sm = sum([slot.size() for slot in self._cache])
- while len(self._cache) > 0 and sm > self.cache_max:
+ while len(self._cache) > 0 and (sm > self.cache_max or len(self._cache) > self._max_slots):
for i in range(len(self._cache)-1, -1, -1):
+ # start from the back, find a slot that is a candidate to evict
if self._cache[i].ready.is_set():
+ sz = self._cache[i].size()
+
+ # If evict returns false it means the
+ # underlying disk cache couldn't lock the file
+ # for deletion because another process was using
+ # it. Don't count it as reducing the amount
+ # of data in the cache, find something else to
+ # throw out.
+ if self._cache[i].evict():
+ sm -= sz
+
+ # either way we forget about it. either the
+ # other process will delete it, or if we need
+ # it again and it is still there, we'll find
+ # it on disk.
del self._cache[i]
break
- sm = sum([slot.size() for slot in self._cache])
def _get(self, locator):
# Test if the locator is already in the cache
del self._cache[i]
self._cache.insert(0, n)
return n
+ if self._disk_cache:
+ # see if it exists on disk
+ n = arvados.diskcache.DiskCacheSlot.get_from_disk(locator, self._disk_cache_dir)
+ if n is not None:
+ self._cache.insert(0, n)
+ return n
return None
def get(self, locator):
return n, False
else:
# Add a new cache slot for the locator
- n = KeepBlockCache.CacheSlot(locator)
+ if self._disk_cache:
+ n = arvados.diskcache.DiskCacheSlot(locator, self._disk_cache_dir)
+ else:
+ n = KeepBlockCache.CacheSlot(locator)
self._cache.insert(0, n)
return n, True
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
if self.insecure:
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
+ curl.setopt(pycurl.SSL_VERIFYHOST, 0)
else:
curl.setopt(pycurl.CAINFO, arvados.util.ca_certs_path())
if method == "HEAD":
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
if self.insecure:
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
+ curl.setopt(pycurl.SSL_VERIFYHOST, 0)
else:
curl.setopt(pycurl.CAINFO, arvados.util.ca_certs_path())
self._setcurltimeouts(curl, timeout)
else:
return None
- def get_from_cache(self, loc):
+ def get_from_cache(self, loc_s):
"""Fetch a block only if is in the cache, otherwise return None."""
- slot = self.block_cache.get(loc)
+ locator = KeepLocator(loc_s)
+ slot = self.block_cache.get(locator.md5sum)
if slot is not None and slot.ready.is_set():
return slot.get()
else:
def get(self, loc_s, **kwargs):
return self._get_or_head(loc_s, method="GET", **kwargs)
- def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None):
+ def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None, prefetch=False):
"""Get data from Keep.
This method fetches one or more blocks of data from Keep. It
self.get_counter.add(1)
+ request_id = (request_id or
+ (hasattr(self, 'api_client') and self.api_client.request_id) or
+ arvados.util.new_request_id())
+ if headers is None:
+ headers = {}
+ headers['X-Request-Id'] = request_id
+
slot = None
blob = None
try:
if method == "GET":
slot, first = self.block_cache.reserve_cache(locator.md5sum)
if not first:
+ if prefetch:
+ # this is request for a prefetch, if it is
+ # already in flight, return immediately.
+ # clear 'slot' to prevent finally block from
+ # calling slot.set()
+ slot = None
+ return None
self.hits_counter.add(1)
blob = slot.get()
if blob is None:
self.misses_counter.add(1)
- if headers is None:
- headers = {}
- headers['X-Request-Id'] = (request_id or
- (hasattr(self, 'api_client') and self.api_client.request_id) or
- arvados.util.new_request_id())
-
# If the locator has hints specifying a prefix (indicating a
# remote keepproxy) or the UUID of a local gateway service,
# read data from the indicated service(s) instead of the usual
for key in sorted_roots)
if not roots_map:
raise arvados.errors.KeepReadError(
- "failed to read {}: no Keep services available ({})".format(
- loc_s, loop.last_result()))
+ "[{}] failed to read {}: no Keep services available ({})".format(
+ request_id, loc_s, loop.last_result()))
elif not_founds == len(sorted_roots):
raise arvados.errors.NotFoundError(
- "{} not found".format(loc_s), service_errors)
+ "[{}] {} not found".format(request_id, loc_s), service_errors)
else:
raise arvados.errors.KeepReadError(
- "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service")
+ "[{}] failed to read {} after {}".format(request_id, loc_s, loop.attempts_str()), service_errors, label="service")
@retry.retry_method
def put(self, data, copies=2, num_retries=None, request_id=None, classes=None):
return loc_s
locator = KeepLocator(loc_s)
+ request_id = (request_id or
+ (hasattr(self, 'api_client') and self.api_client.request_id) or
+ arvados.util.new_request_id())
headers = {
- 'X-Request-Id': (request_id or
- (hasattr(self, 'api_client') and self.api_client.request_id) or
- arvados.util.new_request_id()),
+ 'X-Request-Id': request_id,
'X-Keep-Desired-Replicas': str(copies),
}
roots_map = {}
return writer_pool.response()
if not roots_map:
raise arvados.errors.KeepWriteError(
- "failed to write {}: no Keep services available ({})".format(
- data_hash, loop.last_result()))
+ "[{}] failed to write {}: no Keep services available ({})".format(
+ request_id, data_hash, loop.last_result()))
else:
service_errors = ((key, roots_map[key].last_result()['error'])
for key in sorted_roots
if roots_map[key].last_result()['error'])
raise arvados.errors.KeepWriteError(
- "failed to write {} after {} (wanted {} copies but wrote {})".format(
- data_hash, loop.attempts_str(), (copies, classes), writer_pool.done()), service_errors, label="service")
+ "[{}] failed to write {} after {} (wanted {} copies but wrote {})".format(
+ request_id, data_hash, loop.attempts_str(), (copies, classes), writer_pool.done()), service_errors, label="service")
def local_store_put(self, data, copies=1, num_retries=None, classes=[]):
"""A stub for put().
return True
if os.path.exists(os.path.join(self.local_store, locator.md5sum)):
return True
-
- def is_cached(self, locator):
- return self.block_cache.reserve_cache(expect_hash)