X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f159fab8f9d6bc4254192ce43432defd5bd400aa..3088521012dd7f5b88725240cebf163d7c8da31c:/services/fuse/arvados_fuse/fusedir.py diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 2d58012fa8..e8da789fa5 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -2,20 +2,25 @@ # # SPDX-License-Identifier: AGPL-3.0 -import logging -import re -import time -import llfuse -import arvados +from __future__ import absolute_import +from __future__ import division +from future.utils import viewitems +from future.utils import itervalues +from builtins import dict import apiclient +import arvados +import errno import functools +import llfuse +import logging +import re +import sys import threading -from apiclient import errors as apiclient_errors -import errno import time +from apiclient import errors as apiclient_errors -from fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile -from fresh import FreshBase, convertTime, use_counter, check_update +from .fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile +from .fresh import FreshBase, convertTime, use_counter, check_update import arvados.collection from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern @@ -28,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse') # appear as underscores in the fuse mount.) _disallowed_filename_characters = re.compile('[\x00/]') -# '.' and '..' are not reachable if API server is newer than #6277 -def sanitize_filename(dirty): - """Replace disallowed filename characters with harmless "_".""" - if dirty is None: - return None - elif dirty == '': - return '_' - elif dirty == '.': - return '_' - elif dirty == '..': - return '__' - else: - return _disallowed_filename_characters.sub('_', dirty) - class Directory(FreshBase): """Generic directory object, backed by a dict. @@ -50,7 +41,7 @@ class Directory(FreshBase): and the value referencing a File or Directory object. """ - def __init__(self, parent_inode, inodes): + def __init__(self, parent_inode, inodes, apiconfig): """parent_inode is the integer inode number""" super(Directory, self).__init__() @@ -60,11 +51,53 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self.inodes = inodes + self.apiconfig = apiconfig self._entries = {} self._mtime = time.time() - # Overriden by subclasses to implement logic to update the entries dict - # when the directory is stale + def forward_slash_subst(self): + if not hasattr(self, '_fsns'): + self._fsns = None + config = self.apiconfig() + try: + self._fsns = config["Collections"]["ForwardSlashNameSubstitution"] + except KeyError: + # old API server with no FSNS config + self._fsns = '_' + else: + if self._fsns == '' or self._fsns == '/': + self._fsns = None + return self._fsns + + def unsanitize_filename(self, incoming): + """Replace ForwardSlashNameSubstitution value with /""" + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + return incoming.replace(fsns, '/') + else: + return incoming + + def sanitize_filename(self, dirty): + """Replace disallowed filename characters according to + ForwardSlashNameSubstitution in self.api_config.""" + # '.' and '..' are not reachable if API server is newer than #6277 + if dirty is None: + return None + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' + else: + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + dirty = dirty.replace('/', fsns) + return _disallowed_filename_characters.sub('_', dirty) + + + # Overridden by subclasses to implement logic to update the + # entries dict when the directory is stale @use_counter def update(self): pass @@ -133,7 +166,7 @@ class Directory(FreshBase): self._entries = {} changed = False for i in items: - name = sanitize_filename(fn(i)) + name = self.sanitize_filename(fn(i)) if name: if name in oldentries and same(oldentries[name], i): # move existing directory entry over @@ -163,7 +196,7 @@ class Directory(FreshBase): def in_use(self): if super(Directory, self).in_use(): return True - for v in self._entries.itervalues(): + for v in itervalues(self._entries): if v.in_use(): return True return False @@ -171,7 +204,7 @@ class Directory(FreshBase): def has_ref(self, only_children): if super(Directory, self).has_ref(only_children): return True - for v in self._entries.itervalues(): + for v in itervalues(self._entries): if v.has_ref(False): return True return False @@ -193,7 +226,7 @@ class Directory(FreshBase): # Find self on the parent in order to invalidate this path. # Calling the public items() method might trigger a refresh, # which we definitely don't want, so read the internal dict directly. - for k,v in parent._entries.items(): + for k,v in viewitems(parent._entries): if v is self: self.inodes.invalidate_entry(parent, k) break @@ -241,12 +274,13 @@ class CollectionDirectoryBase(Directory): """ - def __init__(self, parent_inode, inodes, collection): - super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) + def __init__(self, parent_inode, inodes, apiconfig, collection): + super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig) + self.apiconfig = apiconfig self.collection = collection def new_entry(self, name, item, mtime): - name = sanitize_filename(name) + name = self.sanitize_filename(name) if hasattr(item, "fuse_entry") and item.fuse_entry is not None: if item.fuse_entry.dead is not True: raise Exception("Can only reparent dead inode entry") @@ -255,7 +289,7 @@ class CollectionDirectoryBase(Directory): item.fuse_entry.dead = False self._entries[name] = item.fuse_entry elif isinstance(item, arvados.collection.RichCollectionBase): - self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item)) + self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item)) self._entries[name].populate(mtime) else: self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime)) @@ -263,7 +297,7 @@ class CollectionDirectoryBase(Directory): def on_event(self, event, collection, name, item): if collection == self.collection: - name = sanitize_filename(name) + name = self.sanitize_filename(name) _logger.debug("collection notify %s %s %s %s", event, collection, name, item) with llfuse.lock: if event == arvados.collection.ADD: @@ -282,7 +316,7 @@ class CollectionDirectoryBase(Directory): def populate(self, mtime): self._mtime = mtime self.collection.subscribe(self.on_event) - for entry, item in self.collection.items(): + for entry, item in viewitems(self.collection): self.new_entry(entry, item, self.mtime()) def writable(self): @@ -352,14 +386,14 @@ class CollectionDirectory(CollectionDirectoryBase): """Represents the root of a directory tree representing a collection.""" def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): - super(CollectionDirectory, self).__init__(parent_inode, inodes, None) + super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None) self.api = api self.num_retries = num_retries self.collection_record_file = None self.collection_record = None self._poll = True try: - self._poll_time = (api._rootDesc.get('blobSignatureTtl', 60*60*2)/2) + self._poll_time = (api._rootDesc.get('blobSignatureTtl', 60*60*2) // 2) except: _logger.debug("Error getting blobSignatureTtl from discovery document: %s", sys.exc_info()[0]) self._poll_time = 60*60 @@ -543,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase): keep_client=api_client.keep, num_retries=num_retries) super(TmpCollectionDirectory, self).__init__( - parent_inode, inodes, collection) + parent_inode, inodes, api_client.config, collection) self.collection_record_file = None self.populate(self.mtime()) @@ -620,7 +654,7 @@ and the directory will appear if it exists. """.lstrip() def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False): - super(MagicDirectory, self).__init__(parent_inode, inodes) + super(MagicDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.pdh_only = pdh_only @@ -649,7 +683,7 @@ and the directory will appear if it exists. if group_uuid_pattern.match(k): project = self.api.groups().list( - filters=[['group_class', '=', 'project'], ["uuid", "=", k]]).execute(num_retries=self.num_retries) + filters=[['group_class', 'in', ['project','filter']], ["uuid", "=", k]]).execute(num_retries=self.num_retries) if project[u'items_available'] == 0: return False e = self.inodes.add_entry(ProjectDirectory( @@ -691,7 +725,7 @@ class TagsDirectory(Directory): """A special directory that contains as subdirectories all tags visible to the user.""" def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60): - super(TagsDirectory, self).__init__(parent_inode, inodes) + super(TagsDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self._poll = True @@ -748,7 +782,7 @@ class TagDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, tag, poll=False, poll_time=60): - super(TagDirectory, self).__init__(parent_inode, inodes) + super(TagDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.tag = tag @@ -777,8 +811,8 @@ class ProjectDirectory(Directory): """A special directory that contains the contents of a project.""" def __init__(self, parent_inode, inodes, api, num_retries, project_object, - poll=False, poll_time=60): - super(ProjectDirectory, self).__init__(parent_inode, inodes) + poll=True, poll_time=3): + super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.project_object = project_object @@ -860,14 +894,16 @@ class ProjectDirectory(Directory): elif user_uuid_pattern.match(self.project_uuid): self.project_object = self.api.users().get( uuid=self.project_uuid).execute(num_retries=self.num_retries) - - contents = arvados.util.list_all(self.api.groups().list, + # do this in 2 steps until #17424 is fixed + contents = arvados.util.list_all(self.api.groups().contents, self.num_retries, - filters=[["owner_uuid", "=", self.project_uuid], - ["group_class", "=", "project"]]) - contents.extend(arvados.util.list_all(self.api.collections().list, + uuid=self.project_uuid, + filters=[["uuid", "is_a", "arvados#group"], + ["groups.group_class", "in", ["project","filter"]]]) + contents.extend(arvados.util.list_all(self.api.groups().contents, self.num_retries, - filters=[["owner_uuid", "=", self.project_uuid]])) + uuid=self.project_uuid, + filters=[["uuid", "is_a", "arvados#collection"]])) # end with llfuse.lock_released, re-acquire lock @@ -892,16 +928,25 @@ class ProjectDirectory(Directory): elif self._full_listing or super(ProjectDirectory, self).__contains__(k): return super(ProjectDirectory, self).__getitem__(k) with llfuse.lock_released: + k2 = self.unsanitize_filename(k) + if k2 == k: + namefilter = ["name", "=", k] + else: + namefilter = ["name", "in", [k, k2]] contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid], - ["group_class", "=", "project"], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + ["group_class", "in", ["project","filter"]], + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if not contents: contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if contents: - name = sanitize_filename(self.namefn(contents[0])) + if len(contents) > 1 and contents[1]['name'] == k: + # If "foo/bar" and "foo[SUBST]bar" both exist, use + # "foo[SUBST]bar". + contents = [contents[1]] + name = self.sanitize_filename(self.namefn(contents[0])) if name != k: raise KeyError(k) return self._add_entry(contents[0], name) @@ -990,8 +1035,8 @@ class ProjectDirectory(Directory): new_attrs = properties.get("new_attributes") or {} old_attrs["uuid"] = ev["object_uuid"] new_attrs["uuid"] = ev["object_uuid"] - old_name = sanitize_filename(self.namefn(old_attrs)) - new_name = sanitize_filename(self.namefn(new_attrs)) + old_name = self.sanitize_filename(self.namefn(old_attrs)) + new_name = self.sanitize_filename(self.namefn(new_attrs)) # create events will have a new name, but not an old name # delete events will have an old name, but not a new name @@ -1033,7 +1078,7 @@ class SharedDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, exclude, poll=False, poll_time=60): - super(SharedDirectory, self).__init__(parent_inode, inodes) + super(SharedDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.current_user = api.users().current().execute(num_retries=num_retries) @@ -1058,7 +1103,7 @@ class SharedDirectory(Directory): if 'httpMethod' in methods.get('shared', {}): page = [] while True: - resp = self.api.groups().shared(filters=[['group_class', '=', 'project']]+page, + resp = self.api.groups().shared(filters=[['group_class', 'in', ['project','filter']]]+page, order="uuid", limit=10000, count="none", @@ -1075,7 +1120,7 @@ class SharedDirectory(Directory): else: all_projects = arvados.util.list_all( self.api.groups().list, self.num_retries, - filters=[['group_class','=','project']], + filters=[['group_class','in',['project','filter']]], select=["uuid", "owner_uuid"]) for ob in all_projects: objects[ob['uuid']] = ob @@ -1116,7 +1161,7 @@ class SharedDirectory(Directory): # end with llfuse.lock_released, re-acquire lock - self.merge(contents.items(), + self.merge(viewitems(contents), lambda i: i[0], lambda a, i: a.uuid() == i[1]['uuid'], lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))