import time
import threading
import timer
+import datetime
global_client_object = None
import config
import arvados.errors
+class KeepLocator(object):
+ EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
+ HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+
+ def __init__(self, locator_str):
+ self.size = None
+ self.loc_hint = None
+ self._perm_sig = None
+ self._perm_expiry = None
+ pieces = iter(locator_str.split('+'))
+ self.md5sum = next(pieces)
+ for hint in pieces:
+ if hint.startswith('A'):
+ self.parse_permission_hint(hint)
+ elif hint.startswith('K'):
+ self.loc_hint = hint # FIXME
+ elif hint.isdigit():
+ self.size = int(hint)
+ else:
+ raise ValueError("unrecognized hint data {}".format(hint))
+
+ def __str__(self):
+ return '+'.join(
+ str(s) for s in [self.md5sum, self.size, self.loc_hint,
+ self.permission_hint()]
+ if s is not None)
+
+ def _is_hex_length(self, s, *size_spec):
+ if len(size_spec) == 1:
+ good_len = (len(s) == size_spec[0])
+ else:
+ good_len = (size_spec[0] <= len(s) <= size_spec[1])
+ return good_len and self.HEX_RE.match(s)
+
+ def _make_hex_prop(name, length):
+ # Build and return a new property with the given name that
+ # must be a hex string of the given length.
+ data_name = '_{}'.format(name)
+ def getter(self):
+ return getattr(self, data_name)
+ def setter(self, hex_str):
+ if not self._is_hex_length(hex_str, length):
+ raise ValueError("{} must be a {}-digit hex string: {}".
+ format(name, length, hex_str))
+ setattr(self, data_name, hex_str)
+ return property(getter, setter)
+
+ md5sum = _make_hex_prop('md5sum', 32)
+ perm_sig = _make_hex_prop('perm_sig', 40)
+
+ @property
+ def perm_expiry(self):
+ return self._perm_expiry
+
+ @perm_expiry.setter
+ def perm_expiry(self, value):
+ if not self._is_hex_length(value, 1, 8):
+ raise ValueError(
+ "permission timestamp must be a hex Unix timestamp: {}".
+ format(value))
+ self._perm_expiry = datetime.datetime.utcfromtimestamp(int(value, 16))
+
+ def permission_hint(self):
+ data = [self.perm_sig, self.perm_expiry]
+ if None in data:
+ return None
+ data[1] = int((data[1] - self.EPOCH_DATETIME).total_seconds())
+ return "A{}@{:08x}".format(*data)
+
+ def parse_permission_hint(self, s):
+ try:
+ self.perm_sig, self.perm_expiry = s[1:].split('@', 1)
+ except IndexError:
+ raise ValueError("bad permission hint {}".format(s))
+
+ def permission_expired(self, as_of_dt=None):
+ if self.perm_expiry is None:
+ return False
+ elif as_of_dt is None:
+ as_of_dt = datetime.datetime.now()
+ return self.perm_expiry <= as_of_dt
+
+
class Keep:
@staticmethod
def global_client_object():
with self._done_lock:
return (self._done < self._todo)
- def save_response(self, response_body):
+ def save_response(self, response_body, replicas_stored):
"""
Records a response body (a locator, possibly signed) returned by
the Keep server. It is not necessary to save more than
in response to a successful request is valid.
"""
with self._done_lock:
- self._done += 1
+ self._done += replicas_stored
self._response = response_body
def response(self):
url = self.args['service_root'] + self.args['data_hash']
api_token = config.get('ARVADOS_API_TOKEN')
headers = {'Authorization': "OAuth2 %s" % api_token}
+
+ if self.args['using_proxy']:
+ # We're using a proxy, so tell the proxy how many copies we
+ # want it to store
+ headers['X-Keep-Desired-Replication'] = str(self.args['want_copies'])
+
try:
+ logging.debug("Uploading to {}".format(url))
resp, content = h.request(url.encode('utf-8'), 'PUT',
headers=headers,
body=self.args['data'])
(str(threading.current_thread()),
self.args['data_hash'],
self.args['service_root']))
- return limiter.save_response(content.strip())
+ replicas_stored = 1
+ if 'x-keep-replicas-stored' in resp:
+ # Tick the 'done' counter for the number of replica
+ # reported stored by the server, for the case that
+ # we're talking to a proxy or other backend that
+ # stores to multiple copies for us.
+ try:
+ replicas_stored = int(resp['x-keep-replicas-stored'])
+ except ValueError:
+ pass
+ return limiter.save_response(content.strip(), replicas_stored)
+
logging.warning("Request fail: PUT %s => %s %s" %
(url, resp['status'], content))
except (httplib2.HttpLib2Error, httplib.HTTPException) as e:
self._cache = []
# default 256 megabyte cache
self.cache_max = 256 * 1024 * 1024
+ self.using_proxy = False
def shuffled_service_roots(self, hash):
if self.service_roots == None:
self.lock.acquire()
- try:
- keep_disks = arvados.api().keep_disks().list().execute()['items']
- roots = (("http%s://%s:%d/" %
- ('s' if f['service_ssl_flag'] else '',
- f['service_host'],
- f['service_port']))
- for f in keep_disks)
- self.service_roots = sorted(set(roots))
- logging.debug(str(self.service_roots))
- finally:
- self.lock.release()
+
+ # Override normal keep disk lookup with an explict proxy
+ # configuration.
+ keep_proxy_env = config.get("ARVADOS_KEEP_PROXY")
+ if keep_proxy_env != None and len(keep_proxy_env) > 0:
+
+ if keep_proxy_env[-1:] != '/':
+ keep_proxy_env += "/"
+ self.service_roots = [keep_proxy_env]
+ self.using_proxy = True
+ else:
+ try:
+ try:
+ keep_services = arvados.api().keep_services().accessible().execute()['items']
+ except Exception:
+ keep_services = arvados.api().keep_disks().list().execute()['items']
+
+ if len(keep_services) == 0:
+ raise arvados.errors.NoKeepServersError()
+
+ if 'service_type' in keep_services[0] and keep_services[0]['service_type'] == 'proxy':
+ self.using_proxy = True
+
+ roots = (("http%s://%s:%d/" %
+ ('s' if f['service_ssl_flag'] else '',
+ f['service_host'],
+ f['service_port']))
+ for f in keep_services)
+ self.service_roots = sorted(set(roots))
+ logging.debug(str(self.service_roots))
+ finally:
+ self.lock.release()
# Build an ordering with which to query the Keep servers based on the
# contents of the hash.
t = KeepClient.KeepWriterThread(data=data,
data_hash=data_hash,
service_root=service_root,
- thread_limiter=thread_limiter)
+ thread_limiter=thread_limiter,
+ using_proxy=self.using_proxy,
+ want_copies=(want_copies if self.using_proxy else 1))
t.start()
threads += [t]
for t in threads:
t.join()
have_copies = thread_limiter.done()
# If we're done, return the response from Keep
- if have_copies == want_copies:
+ if have_copies >= want_copies:
return thread_limiter.response()
raise arvados.errors.KeepWriteError(
"Write fail for %s: wanted %d but wrote %d" %