import logging
import time
import calendar
+import threading
_logger = logging.getLogger('arvados.arvados_fuse')
+portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
+uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
+collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
+group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
+user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
+
+class SafeApi(object):
+ '''Threadsafe wrapper for API object. This stores and returns a different api
+ object per thread, because httplib2 which underlies apiclient is not
+ threadsafe.
+ '''
+
+ def __init__(self, config):
+ self.host = config.get('ARVADOS_API_HOST')
+ self.token = config.get('ARVADOS_API_TOKEN')
+ self.insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE')
+ self.local = threading.local()
+
+ def localapi(self):
+ if 'api' not in self.local.__dict__:
+ self.local.api = arvados.api('v1', False, self.host, self.token, self.insecure)
+ return self.local.api
+
+ def collections(self):
+ return self.localapi().collections()
+
+ def links(self):
+ return self.localapi().links()
+
+ def groups(self):
+ return self.localapi().groups()
+
+ def users(self):
+ return self.localapi().users()
+
def convertTime(t):
+ '''Parse Arvados timestamp to unix time.'''
return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
def sanitize_filename(dirty):
+ '''Remove troublesome characters from filenames.'''
# http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html
if dirty is None:
return None
self._stale = True
self._poll = False
self._last_update = time.time()
+ self._atime = time.time()
self._poll_time = 60
# Mark the value as stale
def invalidate(self):
self._stale = True
- # Test if the entries dict is stale
+ # Test if the entries dict is stale. Also updates atime.
def stale(self):
+ self._atime = time.time()
if self._stale:
return True
if self._poll:
- return (self._last_update + self._poll_time) < time.time()
+ return (self._last_update + self._poll_time) < self._atime
return False
def fresh(self):
self._stale = False
self._last_update = time.time()
- def ctime(self):
- return 0
-
- def mtime(self):
- return 0
-
+ def atime(self):
+ return self._atime
class File(FreshBase):
'''Base for file objects.'''
- def __init__(self, parent_inode, _ctime=0, _mtime=0):
+ def __init__(self, parent_inode, _mtime=0):
super(File, self).__init__()
self.inode = None
self.parent_inode = parent_inode
- self._ctime = _ctime
self._mtime = _mtime
def size(self):
def readfrom(self, off, size):
return ''
- def ctime(self):
- return self._ctime
-
def mtime(self):
return self._mtime
class StreamReaderFile(File):
'''Wraps a StreamFileReader as a file.'''
- def __init__(self, parent_inode, reader, _ctime, _mtime):
- super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime)
+ def __init__(self, parent_inode, reader, _mtime):
+ super(StreamReaderFile, self).__init__(parent_inode, _mtime)
self.reader = reader
def size(self):
class StringFile(File):
'''Wrap a simple string as a file'''
- def __init__(self, parent_inode, contents, _ctime, _mtime):
- super(StringFile, self).__init__(parent_inode, _ctime, _mtime)
+ def __init__(self, parent_inode, contents, _mtime):
+ super(StringFile, self).__init__(parent_inode, _mtime)
self.contents = contents
def size(self):
def readfrom(self, off, size):
return self.contents[off:(off+size)]
+
class ObjectFile(StringFile):
'''Wrap a dict as a serialized json object.'''
- def __init__(self, parent_inode, contents):
- _ctime = convertTime(contents['created_at']) if 'created_at' in contents else 0
- _mtime = convertTime(contents['modified_at']) if 'modified_at' in contents else 0
- super(ObjectFile, self).__init__(parent_inode, json.dumps(contents, indent=4, sort_keys=True)+"\n", _ctime, _mtime)
- self.contentsdict = contents
- self.uuid = self.contentsdict['uuid']
+ def __init__(self, parent_inode, obj):
+ super(ObjectFile, self).__init__(parent_inode, "", 0)
+ self.uuid = obj['uuid']
+ self.update(obj)
+
+ def update(self, obj):
+ self._mtime = convertTime(obj['modified_at']) if 'modified_at' in obj else 0
+ self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n"
class Directory(FreshBase):
raise Exception("parent_inode should be an int")
self.parent_inode = parent_inode
self._entries = {}
+ self._mtime = time.time()
# Overriden by subclasses to implement logic to update the entries dict
# when the directory is stale
oldentries = self._entries
self._entries = {}
+ changed = False
for i in items:
name = sanitize_filename(fn(i))
if name:
ent = new_entry(i)
if ent is not None:
self._entries[name] = self.inodes.add_entry(ent)
+ changed = True
# delete any other directory entries that were not in found in 'items'
for i in oldentries:
llfuse.invalidate_entry(self.inode, str(i))
self.inodes.del_entry(oldentries[i])
+ changed = True
+
+ if changed:
+ self._mtime = time.time()
+
self.fresh()
def clear(self):
self.inodes.del_entry(oldentries[n])
self.invalidate()
+ def mtime(self):
+ return self._mtime
+
class CollectionDirectory(Directory):
'''Represents the root of a directory tree holding a collection.'''
- def __init__(self, parent_inode, inodes, api, collection_locator):
+ def __init__(self, parent_inode, inodes, api, collection):
super(CollectionDirectory, self).__init__(parent_inode)
self.inodes = inodes
self.api = api
- self.collection_locator = collection_locator
- self.manifest_text_file = None
- self.pdh_file = None
+ self.collection_object_file = None
self.collection_object = None
+ if isinstance(collection, dict):
+ self.collection_locator = collection['uuid']
+ else:
+ self.collection_locator = collection
def same(self, i):
return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
+ def new_collection(self, new_collection_object):
+ self.collection_object = new_collection_object
+
+ if self.collection_object_file is not None:
+ self.collection_object_file.update(self.collection_object)
+
+ self.clear()
+ collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
+ for s in collection.all_streams():
+ cwd = self
+ for part in s.name().split('/'):
+ if part != '' and part != '.':
+ partname = sanitize_filename(part)
+ if partname not in cwd._entries:
+ cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
+ cwd = cwd._entries[partname]
+ for k, v in s.files().items():
+ cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))
+
def update(self):
try:
- if self.collection_object is not None and re.match(r'^[a-f0-9]{32}', self.collection_locator):
+ if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator):
return True
- #with llfuse.lock_released:
- new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
- if "portable_data_hash" not in new_collection_object:
- new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
+
+ with llfuse.lock_released:
+ new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+ if "portable_data_hash" not in new_collection_object:
+ new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
+ # end with llfuse.lock_released, re-acquire lock
if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
- self.collection_object = new_collection_object
-
- if self.manifest_text_file is not None:
- self.manifest_text_file.contents = self.collection_object["manifest_text"]
- self.manifest_text_file._ctime = self.ctime()
- self.manifest_text_file._mtime = self.mtime()
- if self.pdh_file is not None:
- self.pdh_file.contents = self.collection_object["portable_data_hash"]
- self.pdh_file._ctime = self.ctime()
- self.pdh_file._mtime = self.mtime()
-
- self.clear()
- collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
- for s in collection.all_streams():
- cwd = self
- for part in s.name().split('/'):
- if part != '' and part != '.':
- partname = sanitize_filename(part)
- if partname not in cwd._entries:
- cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
- cwd = cwd._entries[partname]
- for k, v in s.files().items():
- cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime()))
+ self.new_collection(new_collection_object)
+
self.fresh()
return True
+ except apiclient.errors.HttpError as e:
+ if e.resp.status == 404:
+ _logger.warn("arv-mount %s: not found", self.collection_locator)
+ else:
+ _logger.error("arv-mount %s: error", self.collection_locator)
+ _logger.exception(detail)
except Exception as detail:
_logger.error("arv-mount %s: error", self.collection_locator)
- _logger.exception(detail)
- return False
+ if "manifest_text" in self.collection_object:
+ _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
+ _logger.exception(detail)
+ return False
def __getitem__(self, item):
self.checkupdate()
- if item == '.manifest_text':
- if self.manifest_text_file is None:
- self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime())
- self.inodes.add_entry(self.manifest_text_file)
- return self.manifest_text_file
- elif item == '.portable_data_hash':
- if self.pdh_file is None:
- self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime())
- self.inodes.add_entry(self.pdh_file)
- return self.pdh_file
+ if item == '.arvados#collection':
+ if self.collection_object_file is None:
+ self.collection_object_file = ObjectFile(self.inode, self.collection_object)
+ self.inodes.add_entry(self.collection_object_file)
+ return self.collection_object_file
else:
return super(CollectionDirectory, self).__getitem__(item)
def __contains__(self, k):
- if k in ('.manifest_text', '.portable_data_hash'):
+ if k == '.arvados#collection':
return True
else:
return super(CollectionDirectory, self).__contains__(k)
- def ctime(self):
- self.checkupdate()
- return convertTime(self.collection_object["created_at"])
-
def mtime(self):
self.checkupdate()
- return convertTime(self.collection_object["modified_at"])
+ return convertTime(self.collection_object["modified_at"]) if self.collection_object is not None and 'modified_at' in self.collection_object else 0
+
class MagicDirectory(Directory):
'''A special directory that logically contains the set of all extant keep
super(MagicDirectory, self).__init__(parent_inode)
self.inodes = inodes
self.api = api
+ self.readme_file = None
def __contains__(self, k):
+ if self.readme_file is None:
+ text = '''This directory provides access to Arvados collections as subdirectories listed
+by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
+the form '1234567890abcdefghijklmnopqrstuv+123').
+
+Note that this directory will appear empty until you attempt to access a
+specific collection subdirectory (such as trying to 'cd' into it), at which
+point the collection will actually be looked up on the server and the directory
+will appear if it exists.
+'''
+ self.readme_file = self.inodes.add_entry(StringFile(self.inode, text, time.time()))
+ self._entries["README"] = self.readme_file
+
if k in self._entries:
return True
+
+ if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k):
+ return False
+
try:
e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, self.api, k))
if e.update():
else:
raise KeyError("No collection with id " + item)
+
class RecursiveInvalidateDirectory(Directory):
def invalidate(self):
if self.inode == llfuse.ROOT_INODE:
if self.inode == llfuse.ROOT_INODE:
llfuse.lock.release()
+
class TagsDirectory(RecursiveInvalidateDirectory):
'''A special directory that contains as subdirectories all tags visible to the user.'''
super(TagsDirectory, self).__init__(parent_inode)
self.inodes = inodes
self.api = api
- #try:
- # arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
- #except:
self._poll = True
self._poll_time = poll_time
def update(self):
- tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute()
+ with llfuse.lock_released:
+ tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute()
if "items" in tags:
self.merge(tags['items'],
lambda i: i['name'] if 'name' in i else i['uuid'],
lambda a, i: a.tag == i,
lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time))
+
class TagDirectory(Directory):
'''A special directory that contains as subdirectories all collections visible
to the user that are tagged with a particular tag.
self._poll_time = poll_time
def update(self):
- taggedcollections = self.api.links().list(filters=[['link_class', '=', 'tag'],
- ['name', '=', self.tag],
- ['head_uuid', 'is_a', 'arvados#collection']],
- select=['head_uuid']).execute()
+ with llfuse.lock_released:
+ taggedcollections = self.api.links().list(filters=[['link_class', '=', 'tag'],
+ ['name', '=', self.tag],
+ ['head_uuid', 'is_a', 'arvados#collection']],
+ select=['head_uuid']).execute()
self.merge(taggedcollections['items'],
lambda i: i['head_uuid'],
lambda a, i: a.collection_locator == i['head_uuid'],
self.inodes = inodes
self.api = api
self.project_object = project_object
+ self.project_object_file = ObjectFile(self.inode, self.project_object)
+ self.inodes.add_entry(self.project_object_file)
self.uuid = project_object['uuid']
def createDirectory(self, i):
- if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']):
- return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid'])
- elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']):
+ if collection_uuid_pattern.match(i['uuid']):
+ return CollectionDirectory(self.inode, self.inodes, self.api, i)
+ elif group_uuid_pattern.match(i['uuid']):
return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time)
- elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
- return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])
- #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']):
- # return None
- elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
+ elif link_uuid_pattern.match(i['uuid']):
+ if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
+ return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])
+ else:
+ return None
+ elif uuid_pattern.match(i['uuid']):
return ObjectFile(self.parent_inode, i)
else:
return None
def update(self):
def namefn(i):
if 'name' in i:
- if i['name'] is None:
+ if i['name'] is None or len(i['name']) == 0:
return None
- elif re.match(r'[a-z0-9]{5}-(4zz18|j7d0g)-[a-z0-9]{15}', i['uuid']):
+ elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']):
# collection or subproject
return i['name']
- elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+ elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection':
# name link
return i['name']
elif 'kind' in i and i['kind'].startswith('arvados#'):
return a.uuid == i['uuid'] and not a.stale()
return False
- #with llfuse.lock_released:
- if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid):
- self.project_object = self.api.groups().get(uuid=self.uuid).execute()
- elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid):
- self.project_object = self.api.users().get(uuid=self.uuid).execute()
-
- contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
- # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
- contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
+ with llfuse.lock_released:
+ if group_uuid_pattern.match(self.uuid):
+ self.project_object = self.api.groups().get(uuid=self.uuid).execute()
+ elif user_uuid_pattern.match(self.uuid):
+ self.project_object = self.api.users().get(uuid=self.uuid).execute()
- #print contents
+ contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
+ # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
+ contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
+
+ # end with llfuse.lock_released, re-acquire lock
self.merge(contents,
namefn,
samefn,
self.createDirectory)
- def ctime(self):
- return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0
-
- def mtime(self):
- return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object else 0
+ def __getitem__(self, item):
+ self.checkupdate()
+ if item == '.arvados#project':
+ return self.project_object_file
+ else:
+ return super(ProjectDirectory, self).__getitem__(item)
+ def __contains__(self, k):
+ if k == '.arvados#project':
+ return True
+ else:
+ return super(ProjectDirectory, self).__contains__(k)
class SharedDirectory(RecursiveInvalidateDirectory):
self.current_user = api.users().current().execute()
self.inodes = inodes
self.api = api
-
- # try:
- # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
- # except:
self._poll = True
self._poll_time = poll_time
def update(self):
- #with llfuse.lock_released:
- all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']])
- objects = {}
- for ob in all_projects:
- objects[ob['uuid']] = ob
-
- roots = []
- root_owners = {}
- for ob in all_projects:
- if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
- roots.append(ob)
- root_owners[ob['owner_uuid']] = True
-
- #with llfuse.lock_released:
- lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]])
- lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]])
-
- users = {}
- groups = {}
-
- for l in lusers:
- objects[l["uuid"]] = l
- for l in lgroups:
- objects[l["uuid"]] = l
-
- contents = {}
- for r in root_owners:
- if r in objects:
- obr = objects[r]
- if "name" in obr:
- contents[obr["name"]] = obr
- if "first_name" in obr:
- contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
-
- for r in roots:
- if r['owner_uuid'] not in objects:
- contents[r['name']] = r
-
+ with llfuse.lock_released:
+ all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']])
+ objects = {}
+ for ob in all_projects:
+ objects[ob['uuid']] = ob
+
+ roots = []
+ root_owners = {}
+ for ob in all_projects:
+ if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
+ roots.append(ob)
+ root_owners[ob['owner_uuid']] = True
+
+ lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]])
+ lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]])
+
+ users = {}
+ groups = {}
+
+ for l in lusers:
+ objects[l["uuid"]] = l
+ for l in lgroups:
+ objects[l["uuid"]] = l
+
+ contents = {}
+ for r in root_owners:
+ if r in objects:
+ obr = objects[r]
+ if "name" in obr:
+ contents[obr["name"]] = obr
+ if "first_name" in obr:
+ contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
+
+ for r in roots:
+ if r['owner_uuid'] not in objects:
+ contents[r['name']] = r
+
+ # end with llfuse.lock_released, re-acquire lock
+
try:
self.merge(contents.items(),
lambda i: i[0],
llfuse has its own global lock which is acquired before calling a request handler,
so request handlers do not run concurrently unless the lock is explicitly released
- with llfuse.lock_released.'''
+ using "with llfuse.lock_released:"'''
def __init__(self, uid, gid):
super(Operations, self).__init__()
entry.st_size = e.size()
entry.st_blksize = 512
- entry.st_blocks = (e.size()/512)
- if e.size()/512 != 0:
- entry.st_blocks += 1
- entry.st_atime = 0
- entry.st_mtime = e.mtime()
- entry.st_ctime = e.ctime()
+ entry.st_blocks = (e.size()/512)+1
+ entry.st_atime = int(e.atime())
+ entry.st_mtime = int(e.mtime())
+ entry.st_ctime = int(e.mtime())
return entry