X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/60c604d8e89501af180487ae7b39c1e25a66c8f3..b366f855557333cd99fce42ab56af1c66388b18e:/services/fuse/arvados_fuse/__init__.py diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index 8b734f2433..6d55b3481e 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -4,8 +4,8 @@ import os import sys - import llfuse +from llfuse import FUSEError import errno import stat import threading @@ -15,16 +15,41 @@ import arvados.events import re import apiclient import json +import logging +import time +import calendar + +_logger = logging.getLogger('arvados.arvados_fuse') + +def convertTime(t): + return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ")) + +def sanitize_filename(dirty): + # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html + if dirty is None: + return None + + fn = "" + for c in dirty: + if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/': + # skip control characters and / + continue + fn += c + + # strip leading - or ~ and leading/trailing whitespace + stripped = fn.lstrip("-~ ").rstrip() + if len(stripped) > 0: + return stripped + else: + return None -from time import time -from llfuse import FUSEError class FreshBase(object): '''Base class for maintaining fresh/stale state to determine when to update.''' def __init__(self): self._stale = True self._poll = False - self._last_update = time() + self._last_update = time.time() self._poll_time = 60 # Mark the value as stale @@ -36,21 +61,29 @@ class FreshBase(object): if self._stale: return True if self._poll: - return (self._last_update + self._poll_time) < time() + return (self._last_update + self._poll_time) < time.time() return False def fresh(self): self._stale = False - self._last_update = time() + self._last_update = time.time() + + def ctime(self): + return 0 + + def mtime(self): + return 0 class File(FreshBase): '''Base for file objects.''' - def __init__(self, parent_inode): + def __init__(self, parent_inode, _ctime=0, _mtime=0): super(File, self).__init__() self.inode = None self.parent_inode = parent_inode + self._ctime = _ctime + self._mtime = _mtime def size(self): return 0 @@ -58,12 +91,18 @@ class File(FreshBase): def readfrom(self, off, size): return '' + def ctime(self): + return self._ctime + + def mtime(self): + return self._mtime + class StreamReaderFile(File): '''Wraps a StreamFileReader as a file.''' - def __init__(self, parent_inode, reader): - super(StreamReaderFile, self).__init__(parent_inode) + def __init__(self, parent_inode, reader, _ctime, _mtime): + super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime) self.reader = reader def size(self): @@ -76,20 +115,27 @@ class StreamReaderFile(File): return False -class ObjectFile(File): - '''Wraps a dict as a serialized json object.''' - - def __init__(self, parent_inode, contents): - super(ObjectFile, self).__init__(parent_inode) - self.contentsdict = contents - self.uuid = self.contentsdict['uuid'] - self.contents = json.dumps(self.contentsdict, indent=4, sort_keys=True) +class StringFile(File): + '''Wrap a simple string as a file''' + def __init__(self, parent_inode, contents, _ctime, _mtime): + super(StringFile, self).__init__(parent_inode, _ctime, _mtime) + self.contents = contents def size(self): return len(self.contents) def readfrom(self, off, size): - return self.contents[off:(off+size)] + return self.contents[off:(off+size)] + +class ObjectFile(StringFile): + '''Wrap a dict as a serialized json object.''' + + def __init__(self, parent_inode, contents): + _ctime = convertTime(contents['created_at']) if 'created_at' in contents else 0 + _mtime = convertTime(contents['modified_at']) if 'modified_at' in contents else 0 + super(ObjectFile, self).__init__(parent_inode, json.dumps(contents, indent=4, sort_keys=True)+"\n", _ctime, _mtime) + self.contentsdict = contents + self.uuid = self.contentsdict['uuid'] class Directory(FreshBase): @@ -123,7 +169,7 @@ class Directory(FreshBase): try: self.update() except apiclient.errors.HttpError as e: - print e + _logger.debug(e) def __getitem__(self, item): self.checkupdate() @@ -142,58 +188,141 @@ class Directory(FreshBase): return k in self._entries def merge(self, items, fn, same, new_entry): - '''Helper method for updating the contents of the directory. + '''Helper method for updating the contents of the directory. Takes a list + describing the new contents of the directory, reuse entries that are + the same in both the old and new lists, create new entries, and delete + old entries missing from the new list. - items: array with new directory contents + items: iterable with new directory contents fn: function to take an entry in 'items' and return the desired file or - directory name + directory name, or None if this entry should be skipped + + same: function to compare an existing entry (a File or Directory + object) with an entry in the items list to determine whether to keep + the existing entry. - same: function to compare an existing entry with an entry in the items - list to determine whether to keep the existing entry. + new_entry: function to create a new directory entry (File or Directory + object) from an entry in the items list. - new_entry: function to create a new directory entry from array entry. ''' oldentries = self._entries self._entries = {} for i in items: - n = fn(i) - if n in oldentries and same(oldentries[n], i): - self._entries[n] = oldentries[n] - del oldentries[n] - else: - self._entries[n] = self.inodes.add_entry(new_entry(i)) + name = sanitize_filename(fn(i)) + if name: + if name in oldentries and same(oldentries[name], i): + # move existing directory entry over + self._entries[name] = oldentries[name] + del oldentries[name] + else: + # create new directory entry + ent = new_entry(i) + if ent is not None: + self._entries[name] = self.inodes.add_entry(ent) + + # delete any other directory entries that were not in found in 'items' + for i in oldentries: + llfuse.invalidate_entry(self.inode, str(i)) + self.inodes.del_entry(oldentries[i]) + self.fresh() + + def clear(self): + '''Delete all entries''' + oldentries = self._entries + self._entries = {} for n in oldentries: + if isinstance(n, Directory): + n.clear() llfuse.invalidate_entry(self.inode, str(n)) self.inodes.del_entry(oldentries[n]) - self.fresh() + self.invalidate() class CollectionDirectory(Directory): '''Represents the root of a directory tree holding a collection.''' - def __init__(self, parent_inode, inodes, collection_locator): + def __init__(self, parent_inode, inodes, api, collection_locator): super(CollectionDirectory, self).__init__(parent_inode) self.inodes = inodes + self.api = api self.collection_locator = collection_locator + self.manifest_text_file = None + self.pdh_file = None + self.collection_object = None def same(self, i): - return i['uuid'] == self.collection_locator + return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator def update(self): - collection = arvados.CollectionReader(arvados.Keep.get(self.collection_locator)) - for s in collection.all_streams(): - cwd = self - for part in s.name().split('/'): - if part != '' and part != '.': - if part not in cwd._entries: - cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode)) - cwd = cwd._entries[part] - for k, v in s.files().items(): - cwd._entries[k] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v)) - self.fresh() + try: + if self.collection_object is not None and re.match(r'^[a-f0-9]{32}', self.collection_locator): + return True + #with llfuse.lock_released: + new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute() + if "portable_data_hash" not in new_collection_object: + new_collection_object["portable_data_hash"] = new_collection_object["uuid"] + + if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]: + self.collection_object = new_collection_object + + if self.manifest_text_file is not None: + self.manifest_text_file.contents = self.collection_object["manifest_text"] + self.manifest_text_file._ctime = self.ctime() + self.manifest_text_file._mtime = self.mtime() + if self.pdh_file is not None: + self.pdh_file.contents = self.collection_object["portable_data_hash"] + self.pdh_file._ctime = self.ctime() + self.pdh_file._mtime = self.mtime() + + self.clear() + collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api) + for s in collection.all_streams(): + cwd = self + for part in s.name().split('/'): + if part != '' and part != '.': + partname = sanitize_filename(part) + if partname not in cwd._entries: + cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode)) + cwd = cwd._entries[partname] + for k, v in s.files().items(): + cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime())) + self.fresh() + return True + except Exception as detail: + _logger.error("arv-mount %s: error", self.collection_locator) + _logger.exception(detail) + return False + + def __getitem__(self, item): + self.checkupdate() + if item == '.manifest_text': + if self.manifest_text_file is None: + self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime()) + self.inodes.add_entry(self.manifest_text_file) + return self.manifest_text_file + elif item == '.portable_data_hash': + if self.pdh_file is None: + self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime()) + self.inodes.add_entry(self.pdh_file) + return self.pdh_file + else: + return super(CollectionDirectory, self).__getitem__(item) + def __contains__(self, k): + if k in ('.manifest_text', '.portable_data_hash'): + return True + else: + return super(CollectionDirectory, self).__contains__(k) + + def ctime(self): + self.checkupdate() + return convertTime(self.collection_object["created_at"]) + + def mtime(self): + self.checkupdate() + return convertTime(self.collection_object["modified_at"]) class MagicDirectory(Directory): '''A special directory that logically contains the set of all extant keep @@ -205,53 +334,65 @@ class MagicDirectory(Directory): to readdir(). ''' - def __init__(self, parent_inode, inodes): + def __init__(self, parent_inode, inodes, api): super(MagicDirectory, self).__init__(parent_inode) self.inodes = inodes + self.api = api def __contains__(self, k): if k in self._entries: return True try: - if arvados.Keep.get(k): + e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, self.api, k)) + if e.update(): + self._entries[k] = e return True else: return False except Exception as e: - #print 'exception keep', e + _logger.debug('arv-mount exception keep %s', e) return False def __getitem__(self, item): - if item not in self._entries: - self._entries[item] = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, item)) - return self._entries[item] + if item in self: + return self._entries[item] + else: + raise KeyError("No collection with id " + item) +class RecursiveInvalidateDirectory(Directory): + def invalidate(self): + if self.inode == llfuse.ROOT_INODE: + llfuse.lock.acquire() + try: + super(RecursiveInvalidateDirectory, self).invalidate() + for a in self._entries: + self._entries[a].invalidate() + except Exception as e: + _logger.exception(e) + finally: + if self.inode == llfuse.ROOT_INODE: + llfuse.lock.release() -class TagsDirectory(Directory): +class TagsDirectory(RecursiveInvalidateDirectory): '''A special directory that contains as subdirectories all tags visible to the user.''' def __init__(self, parent_inode, inodes, api, poll_time=60): super(TagsDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - try: - arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate()) - except: - self._poll = True - self._poll_time = poll_time - - def invalidate(self): - with llfuse.lock: - super(TagsDirectory, self).invalidate() - for a in self._entries: - self._entries[a].invalidate() + #try: + # arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate()) + #except: + self._poll = True + self._poll_time = poll_time def update(self): tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute() - self.merge(tags['items'], - lambda i: i['name'], - lambda a, i: a.tag == i, - lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time)) + if "items" in tags: + self.merge(tags['items'], + lambda i: i['name'] if 'name' in i else i['uuid'], + lambda a, i: a.tag == i, + lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time)) class TagDirectory(Directory): '''A special directory that contains as subdirectories all collections visible @@ -274,88 +415,146 @@ class TagDirectory(Directory): self.merge(taggedcollections['items'], lambda i: i['head_uuid'], lambda a, i: a.collection_locator == i['head_uuid'], - lambda i: CollectionDirectory(self.inode, self.inodes, i['head_uuid'])) + lambda i: CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])) -class GroupsDirectory(Directory): - '''A special directory that contains as subdirectories all groups visible to the user.''' +class ProjectDirectory(RecursiveInvalidateDirectory): + '''A special directory that contains the contents of a project.''' - def __init__(self, parent_inode, inodes, api, poll_time=60): - super(GroupsDirectory, self).__init__(parent_inode) + def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60): + super(ProjectDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - try: - arvados.events.subscribe(self.api, [], lambda ev: self.invalidate()) - except: - self._poll = True - self._poll_time = poll_time - - def invalidate(self): - with llfuse.lock: - super(GroupsDirectory, self).invalidate() - for a in self._entries: - self._entries[a].invalidate() - - def update(self): - groups = self.api.groups().list().execute() - self.merge(groups['items'], - lambda i: i['uuid'], - lambda a, i: a.uuid == i['uuid'], - lambda i: GroupDirectory(self.inode, self.inodes, self.api, i, poll=self._poll, poll_time=self._poll_time)) - - -class GroupDirectory(Directory): - '''A special directory that contains the contents of a group.''' - - def __init__(self, parent_inode, inodes, api, uuid, poll=False, poll_time=60): - super(GroupDirectory, self).__init__(parent_inode) - self.inodes = inodes - self.api = api - self.uuid = uuid['uuid'] - self._poll = poll - self._poll_time = poll_time - - def invalidate(self): - with llfuse.lock: - super(GroupDirectory, self).invalidate() - for a in self._entries: - self._entries[a].invalidate() + self.project_object = project_object + self.uuid = project_object['uuid'] def createDirectory(self, i): - if re.match(r'[0-9a-f]{32}\+\d+', i['uuid']): - return CollectionDirectory(self.inode, self.inodes, i['uuid']) + if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']): + return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid']) elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']): - return GroupDirectory(self.parent_inode, self.inodes, self.api, i, self._poll, self._poll_time) + return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time) + elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection': + return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid']) + #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']): + # return None elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']): return ObjectFile(self.parent_inode, i) - return None + else: + return None def update(self): - contents = self.api.groups().contents(uuid=self.uuid, include_linked=True).execute() - links = {} - for a in contents['links']: - links[a['head_uuid']] = a['name'] - - def choose_name(i): - if i['uuid'] in links: - return links[i['uuid']] + def namefn(i): + if 'name' in i: + if i['name'] is None: + return None + elif re.match(r'[a-z0-9]{5}-(4zz18|j7d0g)-[a-z0-9]{15}', i['uuid']): + # collection or subproject + return i['name'] + elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection': + # name link + return i['name'] + elif 'kind' in i and i['kind'].startswith('arvados#'): + # something else + return "{}.{}".format(i['name'], i['kind'][8:]) else: - return i['uuid'] + return None - def same(a, i): + def samefn(a, i): if isinstance(a, CollectionDirectory): return a.collection_locator == i['uuid'] - elif isinstance(a, GroupDirectory): + elif isinstance(a, ProjectDirectory): return a.uuid == i['uuid'] elif isinstance(a, ObjectFile): return a.uuid == i['uuid'] and not a.stale() return False - self.merge(contents['items'], - choose_name, - same, + #with llfuse.lock_released: + if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid): + self.project_object = self.api.groups().get(uuid=self.uuid).execute() + elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid): + self.project_object = self.api.users().get(uuid=self.uuid).execute() + + contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid) + # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use. + contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']]) + + #print contents + + self.merge(contents, + namefn, + samefn, self.createDirectory) + def ctime(self): + return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0 + + def mtime(self): + return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object else 0 + + + +class SharedDirectory(RecursiveInvalidateDirectory): + '''A special directory that represents users or groups who have shared projects with me.''' + + def __init__(self, parent_inode, inodes, api, exclude, poll=False, poll_time=60): + super(SharedDirectory, self).__init__(parent_inode) + self.current_user = api.users().current().execute() + self.inodes = inodes + self.api = api + + # try: + # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate()) + # except: + self._poll = True + self._poll_time = poll_time + + def update(self): + #with llfuse.lock_released: + all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']]) + objects = {} + for ob in all_projects: + objects[ob['uuid']] = ob + + roots = [] + root_owners = {} + for ob in all_projects: + if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects: + roots.append(ob) + root_owners[ob['owner_uuid']] = True + + #with llfuse.lock_released: + lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]]) + lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]]) + + users = {} + groups = {} + + for l in lusers: + objects[l["uuid"]] = l + for l in lgroups: + objects[l["uuid"]] = l + + contents = {} + for r in root_owners: + if r in objects: + obr = objects[r] + if "name" in obr: + contents[obr["name"]] = obr + if "first_name" in obr: + contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr + + for r in roots: + if r['owner_uuid'] not in objects: + contents[r['name']] = r + + try: + self.merge(contents.items(), + lambda i: i[0], + lambda a, i: a.uuid == i[1]['uuid'], + lambda i: ProjectDirectory(self.inode, self.inodes, self.api, i[1], poll=self._poll, poll_time=self._poll_time)) + except Exception as e: + _logger.exception(e) + class FileHandle(object): '''Connects a numeric file handle to a File or Directory object that has @@ -456,18 +655,19 @@ class Operations(llfuse.Operations): entry.st_size = e.size() - entry.st_blksize = 1024 - entry.st_blocks = e.size()/1024 - if e.size()/1024 != 0: + entry.st_blksize = 512 + entry.st_blocks = (e.size()/512) + if e.size()/512 != 0: entry.st_blocks += 1 entry.st_atime = 0 - entry.st_mtime = 0 - entry.st_ctime = 0 + entry.st_mtime = e.mtime() + entry.st_ctime = e.ctime() return entry def lookup(self, parent_inode, name): - #print "lookup: parent_inode", parent_inode, "name", name + _logger.debug("arv-mount lookup: parent_inode %i name %s", + parent_inode, name) inode = None if name == '.': @@ -503,7 +703,7 @@ class Operations(llfuse.Operations): return fh def read(self, fh, off, size): - #print "read", fh, off, size + _logger.debug("arv-mount read %i %i %i", fh, off, size) if fh in self._filehandles: handle = self._filehandles[fh] else: @@ -520,7 +720,7 @@ class Operations(llfuse.Operations): del self._filehandles[fh] def opendir(self, inode): - #print "opendir: inode", inode + _logger.debug("arv-mount opendir: inode %i", inode) if inode in self.inodes: p = self.inodes[inode] @@ -541,14 +741,14 @@ class Operations(llfuse.Operations): return fh def readdir(self, fh, off): - #print "readdir: fh", fh, "off", off + _logger.debug("arv-mount readdir: fh %i off %i", fh, off) if fh in self._filehandles: handle = self._filehandles[fh] else: raise llfuse.FUSEError(errno.EBADF) - #print "handle.entry", handle.entry + _logger.debug("arv-mount handle.entry %s", handle.entry) e = off while e < len(handle.entry): @@ -561,7 +761,7 @@ class Operations(llfuse.Operations): def statfs(self): st = llfuse.StatvfsData() - st.f_bsize = 1024 * 1024 + st.f_bsize = 64 * 1024 st.f_blocks = 0 st.f_files = 0