+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
"""FUSE driver for Arvados Keep
Architecture:
# unlimited to avoid deadlocks, see https://arvados.org/issues/3198#note-43 for
# details.
-llfuse.capi._notify_queue = Queue.Queue()
+if hasattr(llfuse, 'capi'):
+ # llfuse < 0.42
+ llfuse.capi._notify_queue = Queue.Queue()
+else:
+ # llfuse >= 0.42
+ llfuse._notify_queue = Queue.Queue()
+
+LLFUSE_VERSION_0 = llfuse.__version__.startswith('0')
from fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
from fusefile import StringFile, FuseArvadosFile
self.obj.dec_use()
def flush(self):
- if self.obj.writable():
- return self.obj.flush()
+ pass
class FileHandle(Handle):
"""Connects a numeric file handle to a File object that has
been opened by the client."""
- pass
+
+ def flush(self):
+ if self.obj.writable():
+ return self.obj.flush()
class DirectoryHandle(Handle):
def __init__(self, cap, min_entries=4):
self._entries = collections.OrderedDict()
self._by_uuid = {}
- self._counter = itertools.count(0)
self.cap = cap
self._total = 0
self.min_entries = min_entries
return self._total
def _remove(self, obj, clear):
- if clear and not obj.clear():
- _logger.debug("InodeCache could not clear %i in_use %s", obj.inode, obj.in_use())
- return False
+ if clear:
+ # Kernel behavior seems to be that if a file is
+ # referenced, its parents remain referenced too. This
+ # means has_ref() exits early when a collection is not
+ # candidate for eviction.
+ #
+ # By contrast, in_use() doesn't increment references on
+ # parents, so it requires a full tree walk to determine if
+ # a collection is a candidate for eviction. This takes
+ # .07s for 240000 files, which becomes a major drag when
+ # cap_cache is being called several times a second and
+ # there are multiple non-evictable collections in the
+ # cache.
+ #
+ # So it is important for performance that we do the
+ # has_ref() check first.
+
+ if obj.has_ref(True):
+ _logger.debug("InodeCache cannot clear inode %i, still referenced", obj.inode)
+ return
+
+ if obj.in_use():
+ _logger.debug("InodeCache cannot clear inode %i, in use", obj.inode)
+ return
+
+ obj.kernel_invalidate()
+ _logger.debug("InodeCache sent kernel invalidate inode %i", obj.inode)
+ obj.clear()
+
+ # The llfuse lock is released in del_entry(), which is called by
+ # Directory.clear(). While the llfuse lock is released, it can happen
+ # that a reentrant call removes this entry before this call gets to it.
+ # Ensure that the entry is still valid before trying to remove it.
+ if obj.inode not in self._entries:
+ return
+
self._total -= obj.cache_size
- del self._entries[obj.cache_priority]
+ del self._entries[obj.inode]
if obj.cache_uuid:
self._by_uuid[obj.cache_uuid].remove(obj)
if not self._by_uuid[obj.cache_uuid]:
del self._by_uuid[obj.cache_uuid]
obj.cache_uuid = None
if clear:
- _logger.debug("InodeCache cleared %i total now %i", obj.inode, self._total)
- return True
+ _logger.debug("InodeCache cleared inode %i total now %i", obj.inode, self._total)
def cap_cache(self):
if self._total > self.cap:
- for key in list(self._entries.keys()):
+ for ent in self._entries.values():
if self._total < self.cap or len(self._entries) < self.min_entries:
break
- self._remove(self._entries[key], True)
+ self._remove(ent, True)
def manage(self, obj):
if obj.persisted():
- obj.cache_priority = next(self._counter)
obj.cache_size = obj.objsize()
- self._entries[obj.cache_priority] = obj
+ self._entries[obj.inode] = obj
obj.cache_uuid = obj.uuid()
if obj.cache_uuid:
if obj.cache_uuid not in self._by_uuid:
if obj not in self._by_uuid[obj.cache_uuid]:
self._by_uuid[obj.cache_uuid].append(obj)
self._total += obj.objsize()
- _logger.debug("InodeCache touched %i (size %i) (uuid %s) total now %i", obj.inode, obj.objsize(), obj.cache_uuid, self._total)
+ _logger.debug("InodeCache touched inode %i (size %i) (uuid %s) total now %i (%i entries)",
+ obj.inode, obj.objsize(), obj.cache_uuid, self._total, len(self._entries))
self.cap_cache()
- else:
- obj.cache_priority = None
def touch(self, obj):
if obj.persisted():
- if obj.cache_priority in self._entries:
+ if obj.inode in self._entries:
self._remove(obj, False)
self.manage(obj)
def unmanage(self, obj):
- if obj.persisted() and obj.cache_priority in self._entries:
+ if obj.persisted() and obj.inode in self._entries:
self._remove(obj, True)
def find_by_uuid(self, uuid):
del self._entries[entry.inode]
with llfuse.lock_released:
entry.finalize()
- self.invalidate_inode(entry.inode)
entry.inode = None
else:
entry.dead = True
_logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
- def invalidate_inode(self, inode):
- llfuse.invalidate_inode(inode)
+ def invalidate_inode(self, entry):
+ if entry.has_ref(False):
+ # Only necessary if the kernel has previously done a lookup on this
+ # inode and hasn't yet forgotten about it.
+ llfuse.invalidate_inode(entry.inode)
- def invalidate_entry(self, inode, name):
- llfuse.invalidate_entry(inode, name.encode(self.encoding))
+ def invalidate_entry(self, entry, name):
+ if entry.has_ref(False):
+ # Only necessary if the kernel has previously done a lookup on this
+ # inode and hasn't yet forgotten about it.
+ llfuse.invalidate_entry(entry.inode, name.encode(self.encoding))
def clear(self):
self.inode_cache.clear()
@catch_exceptions
def destroy(self):
- with llfuse.lock:
- self._shutdown_started.set()
- if self.events:
- self.events.close()
- self.events = None
-
+ self._shutdown_started.set()
+ if self.events:
+ self.events.close()
+ self.events = None
+
+ # Different versions of llfuse require and forbid us to
+ # acquire the lock here. See #8345#note-37, #10805#note-9.
+ if LLFUSE_VERSION_0 and llfuse.lock.acquire():
+ # llfuse < 0.42
+ self.inodes.clear()
+ llfuse.lock.release()
+ else:
+ # llfuse >= 0.42
self.inodes.clear()
def access(self, inode, mode, ctx):
return True
def listen_for_events(self):
- self.events = arvados.events.subscribe(self._api_client,
- [["event_type", "in", ["create", "update", "delete"]]],
- self.on_event)
+ self.events = arvados.events.subscribe(
+ self._api_client,
+ [["event_type", "in", ["create", "update", "delete"]]],
+ self.on_event)
@catch_exceptions
def on_event(self, ev):
- if 'event_type' not in ev:
+ if 'event_type' not in ev or ev["event_type"] not in ("create", "update", "delete"):
return
with llfuse.lock:
+ properties = ev.get("properties") or {}
+ old_attrs = properties.get("old_attributes") or {}
+ new_attrs = properties.get("new_attributes") or {}
+
for item in self.inodes.inode_cache.find_by_uuid(ev["object_uuid"]):
item.invalidate()
- if ev["object_kind"] == "arvados#collection":
- new_attr = (ev.get("properties") and
- ev["properties"].get("new_attributes") and
- ev["properties"]["new_attributes"])
-
+ if ev.get("object_kind") == "arvados#collection":
+ pdh = new_attrs.get("portable_data_hash")
# new_attributes.modified_at currently lacks
# subsecond precision (see #6347) so use event_at
# which should always be the same.
- record_version = (
- (ev["event_at"], new_attr["portable_data_hash"])
- if new_attr else None)
-
- item.update(to_record_version=record_version)
- else:
- item.update()
-
- oldowner = (
- ev.get("properties") and
- ev["properties"].get("old_attributes") and
- ev["properties"]["old_attributes"].get("owner_uuid"))
- newowner = ev["object_owner_uuid"]
+ stamp = ev.get("event_at")
+ if (stamp and pdh and item.writable() and
+ item.collection is not None and
+ item.collection.modified() and
+ new_attrs.get("is_trashed") is not True):
+ item.update(to_record_version=(stamp, pdh))
+
+ oldowner = old_attrs.get("owner_uuid")
+ newowner = ev.get("object_owner_uuid")
for parent in (
self.inodes.inode_cache.find_by_uuid(oldowner) +
self.inodes.inode_cache.find_by_uuid(newowner)):
- parent.invalidate()
- parent.update()
-
+ parent.child_event(ev)
@catch_exceptions
- def getattr(self, inode):
+ def getattr(self, inode, ctx=None):
if inode not in self.inodes:
raise llfuse.FUSEError(errno.ENOENT)
entry = llfuse.EntryAttributes()
entry.st_ino = inode
entry.generation = 0
- entry.entry_timeout = 60 if e.allow_dirent_cache else 0
- entry.attr_timeout = 60 if e.allow_attr_cache else 0
+ entry.entry_timeout = 0
+ entry.attr_timeout = e.time_to_next_poll() if e.allow_attr_cache else 0
entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
if isinstance(e, Directory):
entry.st_blksize = 512
entry.st_blocks = (entry.st_size/512)+1
- entry.st_atime = int(e.atime())
- entry.st_mtime = int(e.mtime())
- entry.st_ctime = int(e.mtime())
+ if hasattr(entry, 'st_atime_ns'):
+ # llfuse >= 0.42
+ entry.st_atime_ns = int(e.atime() * 1000000000)
+ entry.st_mtime_ns = int(e.mtime() * 1000000000)
+ entry.st_ctime_ns = int(e.mtime() * 1000000000)
+ else:
+ # llfuse < 0.42
+ entry.st_atime = int(e.atime)
+ entry.st_mtime = int(e.mtime)
+ entry.st_ctime = int(e.mtime)
return entry
@catch_exceptions
- def setattr(self, inode, attr):
+ def setattr(self, inode, attr, fields=None, fh=None, ctx=None):
entry = self.getattr(inode)
- e = self.inodes[inode]
+ if fh is not None and fh in self._filehandles:
+ handle = self._filehandles[fh]
+ e = handle.obj
+ else:
+ e = self.inodes[inode]
- if attr.st_size is not None and isinstance(e, FuseArvadosFile):
+ if fields is None:
+ # llfuse < 0.42
+ update_size = attr.st_size is not None
+ else:
+ # llfuse >= 0.42
+ update_size = fields.update_size
+ if update_size and isinstance(e, FuseArvadosFile):
with llfuse.lock_released:
e.arvfile.truncate(attr.st_size)
entry.st_size = e.arvfile.size()
return entry
@catch_exceptions
- def lookup(self, parent_inode, name):
+ def lookup(self, parent_inode, name, ctx=None):
name = unicode(name, self.inodes.encoding)
inode = None
self.inodes.del_entry(ent)
@catch_exceptions
- def open(self, inode, flags):
+ def open(self, inode, flags, ctx=None):
if inode in self.inodes:
p = self.inodes[inode]
else:
fh = next(self._filehandles_counter)
self._filehandles[fh] = FileHandle(fh, p)
self.inodes.touch(p)
+
+ # Normally, we will have received an "update" event if the
+ # parent collection is stale here. However, even if the parent
+ # collection hasn't changed, the manifest might have been
+ # fetched so long ago that the signatures on the data block
+ # locators have expired. Calling checkupdate() on all
+ # ancestors ensures the signatures will be refreshed if
+ # necessary.
while p.parent_inode in self.inodes:
if p == self.inodes[p.parent_inode]:
break
@catch_exceptions
def release(self, fh):
if fh in self._filehandles:
+ _logger.debug("arv-mount release fh %i", fh)
try:
self._filehandles[fh].flush()
except Exception:
self.release(fh)
@catch_exceptions
- def opendir(self, inode):
+ def opendir(self, inode, ctx=None):
_logger.debug("arv-mount opendir: inode %i", inode)
if inode in self.inodes:
e += 1
@catch_exceptions
- def statfs(self):
+ def statfs(self, ctx=None):
st = llfuse.StatvfsData()
st.f_bsize = 128 * 1024
st.f_blocks = 0
return p
@catch_exceptions
- def create(self, inode_parent, name, mode, flags, ctx):
- _logger.debug("arv-mount create: %i '%s' %o", inode_parent, name, mode)
+ def create(self, inode_parent, name, mode, flags, ctx=None):
+ _logger.debug("arv-mount create: parent_inode %i '%s' %o", inode_parent, name, mode)
p = self._check_writable(inode_parent)
p.create(name)
return (fh, self.getattr(f.inode))
@catch_exceptions
- def mkdir(self, inode_parent, name, mode, ctx):
- _logger.debug("arv-mount mkdir: %i '%s' %o", inode_parent, name, mode)
+ def mkdir(self, inode_parent, name, mode, ctx=None):
+ _logger.debug("arv-mount mkdir: parent_inode %i '%s' %o", inode_parent, name, mode)
p = self._check_writable(inode_parent)
p.mkdir(name)
return self.getattr(d.inode)
@catch_exceptions
- def unlink(self, inode_parent, name):
- _logger.debug("arv-mount unlink: %i '%s'", inode_parent, name)
+ def unlink(self, inode_parent, name, ctx=None):
+ _logger.debug("arv-mount unlink: parent_inode %i '%s'", inode_parent, name)
p = self._check_writable(inode_parent)
p.unlink(name)
@catch_exceptions
- def rmdir(self, inode_parent, name):
- _logger.debug("arv-mount rmdir: %i '%s'", inode_parent, name)
+ def rmdir(self, inode_parent, name, ctx=None):
+ _logger.debug("arv-mount rmdir: parent_inode %i '%s'", inode_parent, name)
p = self._check_writable(inode_parent)
p.rmdir(name)
@catch_exceptions
- def rename(self, inode_parent_old, name_old, inode_parent_new, name_new):
- _logger.debug("arv-mount rename: %i '%s' %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
+ def rename(self, inode_parent_old, name_old, inode_parent_new, name_new, ctx=None):
+ _logger.debug("arv-mount rename: old_parent_inode %i '%s' new_parent_inode %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
src = self._check_writable(inode_parent_old)
dest = self._check_writable(inode_parent_new)
dest.rename(name_old, name_new, src)