# Match any character which FUSE or Linux cannot accommodate as part
# of a filename. (If present in a collection filename, they will
# appear as underscores in the fuse mount.)
-_disallowed_filename_characters = re.compile('[\x00/]')
+_disallowed_filename_characters = re.compile(r'[\x00/]')
class Directory(FreshBase):
and the value referencing a File or Directory object.
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write):
+ def __init__(self, parent_inode, inodes, enable_write, filters):
"""parent_inode is the integer inode number"""
super(Directory, self).__init__()
raise Exception("parent_inode should be an int")
self.parent_inode = parent_inode
self.inodes = inodes
- self.apiconfig = apiconfig
self._entries = {}
self._mtime = time.time()
self._enable_write = enable_write
-
- def forward_slash_subst(self):
- if not hasattr(self, '_fsns'):
- self._fsns = None
- config = self.apiconfig()
- try:
- self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
- except KeyError:
- # old API server with no FSNS config
- self._fsns = '_'
+ self._filters = filters or []
+
+ def _filters_for(self, subtype, *, qualified):
+ for f in self._filters:
+ f_type, _, f_name = f[0].partition('.')
+ if not f_name:
+ yield f
+ elif f_type != subtype:
+ pass
+ elif qualified:
+ yield f
else:
- if self._fsns == '' or self._fsns == '/':
- self._fsns = None
- return self._fsns
+ yield [f_name, *f[1:]]
def unsanitize_filename(self, incoming):
"""Replace ForwardSlashNameSubstitution value with /"""
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
return incoming.replace(fsns, '/')
else:
elif dirty == '..':
return '__'
else:
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
dirty = dirty.replace('/', fsns)
return _disallowed_filename_characters.sub('_', dirty)
self.inodes.touch(self)
super(Directory, self).fresh()
+ def objsize(self):
+ # This is a very rough guess of the amount of overhead involved for
+ # each directory entry (128 bytes is 16 * 8-byte pointers).
+ return len(self._entries) * 128
+
def merge(self, items, fn, same, new_entry):
"""Helper method for updating the contents of the directory.
entries that are the same in both the old and new lists, create new
entries, and delete old entries missing from the new list.
- :items: iterable with new directory contents
+ Arguments:
+ * items: Iterable --- New directory contents
- :fn: function to take an entry in 'items' and return the desired file or
+ * fn: Callable --- Takes an entry in 'items' and return the desired file or
directory name, or None if this entry should be skipped
- :same: function to compare an existing entry (a File or Directory
+ * same: Callable --- Compare an existing entry (a File or Directory
object) with an entry in the items list to determine whether to keep
the existing entry.
- :new_entry: function to create a new directory entry (File or Directory
+ * new_entry: Callable --- Create a new directory entry (File or Directory
object) from an entry in the items list.
"""
changed = False
for i in items:
name = self.sanitize_filename(fn(i))
- if name:
- if name in oldentries and same(oldentries[name], i):
+ if not name:
+ continue
+ if name in oldentries:
+ ent = oldentries[name]
+ if same(ent, i):
# move existing directory entry over
- self._entries[name] = oldentries[name]
+ self._entries[name] = ent
del oldentries[name]
- else:
- _logger.debug("Adding entry '%s' to inode %i", name, self.inode)
- # create new directory entry
- ent = new_entry(i)
- if ent is not None:
- self._entries[name] = self.inodes.add_entry(ent)
- changed = True
+ self.inodes.inode_cache.touch(ent)
+
+ for i in items:
+ name = self.sanitize_filename(fn(i))
+ if not name:
+ continue
+ if name not in self._entries:
+ # create new directory entry
+ ent = new_entry(i)
+ if ent is not None:
+ self._entries[name] = self.inodes.add_entry(ent)
+ changed = True
+ _logger.debug("Added entry '%s' as inode %i to parent inode %i", name, ent.inode, self.inode)
# delete any other directory entries that were not in found in 'items'
for i in oldentries:
if changed:
self.inodes.invalidate_inode(self)
self._mtime = time.time()
+ self.inodes.inode_cache.update_cache_size(self)
self.fresh()
return True
return False
- def has_ref(self, only_children):
- if super(Directory, self).has_ref(only_children):
- return True
- for v in self._entries.values():
- if v.has_ref(False):
- return True
- return False
-
def clear(self):
"""Delete all entries"""
oldentries = self._entries
self._entries = {}
+ self.invalidate()
for n in oldentries:
- oldentries[n].clear()
self.inodes.del_entry(oldentries[n])
- self.invalidate()
+ self.inodes.inode_cache.update_cache_size(self)
def kernel_invalidate(self):
# Invalidating the dentry on the parent implies invalidating all paths
# below it as well.
- parent = self.inodes[self.parent_inode]
+ if self.parent_inode in self.inodes:
+ parent = self.inodes[self.parent_inode]
+ else:
+ # parent was removed already.
+ return
# Find self on the parent in order to invalidate this path.
# Calling the public items() method might trigger a refresh,
self.inodes.invalidate_entry(parent, k)
break
+ self.inodes.invalidate_inode(self)
+
def mtime(self):
return self._mtime
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write, collection, collection_root):
- super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig, enable_write)
- self.apiconfig = apiconfig
+ def __init__(self, parent_inode, inodes, enable_write, filters, collection, collection_root):
+ super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, enable_write, filters)
self.collection = collection
self.collection_root = collection_root
self.collection_record_file = None
item.fuse_entry.dead = False
self._entries[name] = item.fuse_entry
elif isinstance(item, arvados.collection.RichCollectionBase):
- self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, self._enable_write, item, self.collection_root))
+ self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(
+ self.inode,
+ self.inodes,
+ self._enable_write,
+ self._filters,
+ item,
+ self.collection_root,
+ ))
self._entries[name].populate(mtime)
else:
self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime, self._enable_write))
super(CollectionDirectoryBase, self).clear()
self.collection = None
+ def objsize(self):
+ # objsize for the whole collection is represented at the root,
+ # don't double-count it
+ return 0
class CollectionDirectory(CollectionDirectoryBase):
"""Represents the root of a directory tree representing a collection."""
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, collection_record=None, explicit_collection=None):
- super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, None, self)
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters=None, collection_record=None, explicit_collection=None):
+ super(CollectionDirectory, self).__init__(parent_inode, inodes, enable_write, filters, None, self)
self.api = api
self.num_retries = num_retries
self._poll = True
self.collection_record_file.invalidate()
self.inodes.invalidate_inode(self.collection_record_file)
_logger.debug("%s invalidated collection record file", self)
+ self.inodes.inode_cache.update_cache_size(self)
self.fresh()
def uuid(self):
return (self.collection_locator is not None)
def objsize(self):
- # This is an empirically-derived heuristic to estimate the memory used
- # to store this collection's metadata. Calculating the memory
- # footprint directly would be more accurate, but also more complicated.
- return self._manifest_size * 128
+ # This is a very rough guess of the amount of overhead
+ # involved for a collection; you've got the manifest text
+ # itself which is not discarded by the Collection class, then
+ # the block identifiers that get copied into their own
+ # strings, then the rest of the overhead of the Python
+ # objects.
+ return self._manifest_size * 4
def finalize(self):
if self.collection is not None:
if self.writable():
- self.collection.save()
+ try:
+ self.collection.save()
+ except Exception as e:
+ _logger.exception("Failed to save collection %s", self.collection_locator)
self.collection.stop_threads()
def clear(self):
if self.collection is not None:
self.collection.stop_threads()
- super(CollectionDirectory, self).clear()
self._manifest_size = 0
+ super(CollectionDirectory, self).clear()
class TmpCollectionDirectory(CollectionDirectoryBase):
def save_new(self):
pass
- def __init__(self, parent_inode, inodes, api_client, num_retries, enable_write, storage_classes=None):
+ def __init__(self, parent_inode, inodes, api_client, num_retries, enable_write, filters=None, storage_classes=None):
collection = self.UnsaveableCollection(
api_client=api_client,
keep_client=api_client.keep,
# This is always enable_write=True because it never tries to
# save to the backend
super(TmpCollectionDirectory, self).__init__(
- parent_inode, inodes, api_client.config, True, collection, self)
+ parent_inode, inodes, True, filters, collection, self)
self.populate(self.mtime())
def on_event(self, *args, **kwargs):
""".lstrip()
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, pdh_only=False, storage_classes=None):
- super(MagicDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, pdh_only=False, storage_classes=None):
+ super(MagicDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.pdh_only = pdh_only
# If we're the root directory, add an identical by_id subdirectory.
if self.inode == llfuse.ROOT_INODE:
self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
- self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
- self.pdh_only))
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ self.pdh_only,
+ ))
def __contains__(self, k):
if k in self._entries:
if group_uuid_pattern.match(k):
project = self.api.groups().list(
- filters=[['group_class', 'in', ['project','filter']], ["uuid", "=", k]]).execute(num_retries=self.num_retries)
+ filters=[
+ ['group_class', 'in', ['project','filter']],
+ ["uuid", "=", k],
+ *self._filters_for('groups', qualified=False),
+ ],
+ ).execute(num_retries=self.num_retries)
if project[u'items_available'] == 0:
return False
e = self.inodes.add_entry(ProjectDirectory(
- self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
- project[u'items'][0], storage_classes=self.storage_classes))
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ project[u'items'][0],
+ storage_classes=self.storage_classes,
+ ))
else:
e = self.inodes.add_entry(CollectionDirectory(
- self.inode, self.inodes, self.api, self.num_retries, self._enable_write, k))
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ k,
+ ))
if e.update():
if k not in self._entries:
class TagsDirectory(Directory):
"""A special directory that contains as subdirectories all tags visible to the user."""
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, poll_time=60):
- super(TagsDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, poll_time=60):
+ super(TagsDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self._poll = True
def update(self):
with llfuse.lock_released:
tags = self.api.links().list(
- filters=[['link_class', '=', 'tag'], ["name", "!=", ""]],
- select=['name'], distinct=True, limit=1000
- ).execute(num_retries=self.num_retries)
+ filters=[
+ ['link_class', '=', 'tag'],
+ ['name', '!=', ''],
+ *self._filters_for('links', qualified=False),
+ ],
+ select=['name'],
+ distinct=True,
+ limit=1000,
+ ).execute(num_retries=self.num_retries)
if "items" in tags:
- self.merge(tags['items']+[{"name": n} for n in self._extra],
- lambda i: i['name'],
- lambda a, i: a.tag == i['name'],
- lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
- i['name'], poll=self._poll, poll_time=self._poll_time))
+ self.merge(
+ tags['items']+[{"name": n} for n in self._extra],
+ lambda i: i['name'],
+ lambda a, i: a.tag == i['name'],
+ lambda i: TagDirectory(
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ i['name'],
+ poll=self._poll,
+ poll_time=self._poll_time,
+ ),
+ )
@use_counter
@check_update
return super(TagsDirectory, self).__getitem__(item)
with llfuse.lock_released:
tags = self.api.links().list(
- filters=[['link_class', '=', 'tag'], ['name', '=', item]], limit=1
+ filters=[
+ ['link_class', '=', 'tag'],
+ ['name', '=', item],
+ *self._filters_for('links', qualified=False),
+ ],
+ limit=1,
).execute(num_retries=self.num_retries)
if tags["items"]:
self._extra.add(item)
to the user that are tagged with a particular tag.
"""
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, tag,
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, tag,
poll=False, poll_time=60):
- super(TagDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
+ super(TagDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.tag = tag
def update(self):
with llfuse.lock_released:
taggedcollections = self.api.links().list(
- filters=[['link_class', '=', 'tag'],
- ['name', '=', self.tag],
- ['head_uuid', 'is_a', 'arvados#collection']],
- select=['head_uuid']
- ).execute(num_retries=self.num_retries)
- self.merge(taggedcollections['items'],
- lambda i: i['head_uuid'],
- lambda a, i: a.collection_locator == i['head_uuid'],
- lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i['head_uuid']))
+ filters=[
+ ['link_class', '=', 'tag'],
+ ['name', '=', self.tag],
+ ['head_uuid', 'is_a', 'arvados#collection'],
+ *self._filters_for('links', qualified=False),
+ ],
+ select=['head_uuid'],
+ ).execute(num_retries=self.num_retries)
+ self.merge(
+ taggedcollections['items'],
+ lambda i: i['head_uuid'],
+ lambda a, i: a.collection_locator == i['head_uuid'],
+ lambda i: CollectionDirectory(
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ i['head_uuid'],
+ ),
+ )
class ProjectDirectory(Directory):
"""A special directory that contains the contents of a project."""
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, project_object,
- poll=True, poll_time=3, storage_classes=None):
- super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
+ project_object, poll=True, poll_time=3, storage_classes=None):
+ super(ProjectDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.project_object = project_object
self._current_user = None
self._full_listing = False
self.storage_classes = storage_classes
+ self.recursively_contained = False
+
+ # Filter groups can contain themselves, which causes tools
+ # that walk the filesystem to get stuck in an infinite loop,
+ # so suppress returning a listing in that case.
+ if self.project_object.get("group_class") == "filter":
+ iter_parent_inode = parent_inode
+ while iter_parent_inode != llfuse.ROOT_INODE:
+ parent_dir = self.inodes[iter_parent_inode]
+ if isinstance(parent_dir, ProjectDirectory) and parent_dir.project_uuid == self.project_uuid:
+ self.recursively_contained = True
+ break
+ iter_parent_inode = parent_dir.parent_inode
def want_event_subscribe(self):
return True
def createDirectory(self, i):
+ common_args = (self.inode, self.inodes, self.api, self.num_retries, self._enable_write, self._filters)
if collection_uuid_pattern.match(i['uuid']):
- return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i)
+ return CollectionDirectory(*common_args, i)
elif group_uuid_pattern.match(i['uuid']):
- return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
- i, self._poll, self._poll_time, self.storage_classes)
+ return ProjectDirectory(*common_args, i, self._poll, self._poll_time, self.storage_classes)
elif link_uuid_pattern.match(i['uuid']):
if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
- return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i['head_uuid'])
+ return CollectionDirectory(*common_args, i['head_uuid'])
else:
return None
elif uuid_pattern.match(i['uuid']):
self.project_object_file = ObjectFile(self.inode, self.project_object)
self.inodes.add_entry(self.project_object_file)
- if not self._full_listing:
+ if self.recursively_contained or not self._full_listing:
return True
def samefn(a, i):
self.project_object = self.api.users().get(
uuid=self.project_uuid).execute(num_retries=self.num_retries)
# do this in 2 steps until #17424 is fixed
- contents = list(arvados.util.keyset_list_all(self.api.groups().contents,
- order_key="uuid",
- num_retries=self.num_retries,
- uuid=self.project_uuid,
- filters=[["uuid", "is_a", "arvados#group"],
- ["groups.group_class", "in", ["project","filter"]]]))
- contents.extend(filter(lambda i: i["current_version_uuid"] == i["uuid"],
- arvados.util.keyset_list_all(self.api.groups().contents,
- order_key="uuid",
- num_retries=self.num_retries,
- uuid=self.project_uuid,
- filters=[["uuid", "is_a", "arvados#collection"]])))
-
-
+ contents = list(arvados.util.keyset_list_all(
+ self.api.groups().contents,
+ order_key='uuid',
+ num_retries=self.num_retries,
+ uuid=self.project_uuid,
+ filters=[
+ ['uuid', 'is_a', 'arvados#group'],
+ ['groups.group_class', 'in', ['project', 'filter']],
+ *self._filters_for('groups', qualified=True),
+ ],
+ ))
+ contents.extend(obj for obj in arvados.util.keyset_list_all(
+ self.api.groups().contents,
+ order_key='uuid',
+ num_retries=self.num_retries,
+ uuid=self.project_uuid,
+ filters=[
+ ['uuid', 'is_a', 'arvados#collection'],
+ *self._filters_for('collections', qualified=True),
+ ],
+ ) if obj['current_version_uuid'] == obj['uuid'])
# end with llfuse.lock_released, re-acquire lock
self.merge(contents,
namefilter = ["name", "=", k]
else:
namefilter = ["name", "in", [k, k2]]
- contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid],
- ["group_class", "in", ["project","filter"]],
- namefilter],
- limit=2).execute(num_retries=self.num_retries)["items"]
+ contents = self.api.groups().list(
+ filters=[
+ ["owner_uuid", "=", self.project_uuid],
+ ["group_class", "in", ["project","filter"]],
+ namefilter,
+ *self._filters_for('groups', qualified=False),
+ ],
+ limit=2,
+ ).execute(num_retries=self.num_retries)["items"]
if not contents:
- contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid],
- namefilter],
- limit=2).execute(num_retries=self.num_retries)["items"]
+ contents = self.api.collections().list(
+ filters=[
+ ["owner_uuid", "=", self.project_uuid],
+ namefilter,
+ *self._filters_for('collections', qualified=False),
+ ],
+ limit=2,
+ ).execute(num_retries=self.num_retries)["items"]
if contents:
if len(contents) > 1 and contents[1]['name'] == k:
# If "foo/bar" and "foo[SUBST]bar" both exist, use
class SharedDirectory(Directory):
"""A special directory that represents users or groups who have shared projects with me."""
- def __init__(self, parent_inode, inodes, api, num_retries, enable_write, exclude,
- poll=False, poll_time=60, storage_classes=None):
- super(SharedDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
+ def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
+ exclude, poll=False, poll_time=60, storage_classes=None):
+ super(SharedDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.current_user = api.users().current().execute(num_retries=num_retries)
if 'httpMethod' in methods.get('shared', {}):
page = []
while True:
- resp = self.api.groups().shared(filters=[['group_class', 'in', ['project','filter']]]+page,
- order="uuid",
- limit=10000,
- count="none",
- include="owner_uuid").execute()
+ resp = self.api.groups().shared(
+ filters=[
+ ['group_class', 'in', ['project','filter']],
+ *page,
+ *self._filters_for('groups', qualified=False),
+ ],
+ order="uuid",
+ limit=10000,
+ count="none",
+ include="owner_uuid",
+ ).execute()
if not resp["items"]:
break
page = [["uuid", ">", resp["items"][len(resp["items"])-1]["uuid"]]]
self.api.groups().list,
order_key="uuid",
num_retries=self.num_retries,
- filters=[['group_class','in',['project','filter']]],
- select=["uuid", "owner_uuid"]))
+ filters=[
+ ['group_class', 'in', ['project','filter']],
+ *self._filters_for('groups', qualified=False),
+ ],
+ select=["uuid", "owner_uuid"],
+ ))
for ob in all_projects:
objects[ob['uuid']] = ob
self.api.users().list,
order_key="uuid",
num_retries=self.num_retries,
- filters=[['uuid','in', list(root_owners)]])
+ filters=[
+ ['uuid', 'in', list(root_owners)],
+ *self._filters_for('users', qualified=False),
+ ],
+ )
lgroups = arvados.util.keyset_list_all(
self.api.groups().list,
order_key="uuid",
num_retries=self.num_retries,
- filters=[['uuid','in', list(root_owners)+roots]])
-
+ filters=[
+ ['uuid', 'in', list(root_owners)+roots],
+ *self._filters_for('groups', qualified=False),
+ ],
+ )
for l in lusers:
objects[l["uuid"]] = l
for l in lgroups:
# end with llfuse.lock_released, re-acquire lock
- self.merge(contents.items(),
- lambda i: i[0],
- lambda a, i: a.uuid() == i[1]['uuid'],
- lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
- i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
+ self.merge(
+ contents.items(),
+ lambda i: i[0],
+ lambda a, i: a.uuid() == i[1]['uuid'],
+ lambda i: ProjectDirectory(
+ self.inode,
+ self.inodes,
+ self.api,
+ self.num_retries,
+ self._enable_write,
+ self._filters,
+ i[1],
+ poll=self._poll,
+ poll_time=self._poll_time,
+ storage_classes=self.storage_classes,
+ ),
+ )
except Exception:
_logger.exception("arv-mount shared dir error")
finally: