21541: Code cleanups 21541-arv-mount-keyerror-rebase
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 2 Apr 2024 14:52:09 +0000 (10:52 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 2 Apr 2024 14:52:09 +0000 (10:52 -0400)
* Remove the gc threshold change
* Add/improve comments
* Use "truthy" tests instead of len(foo) > 0

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/fusedir.py

index aeb0ce4ba7426103e3746d27e05a21cc34912fe4..d827aefab70a3292780799721766c6fea002c52e 100644 (file)
@@ -77,7 +77,6 @@ from prometheus_client import Summary
 import queue
 from dataclasses import dataclass
 import typing
-import gc
 
 from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
 from .fusefile import File, StringFile, FuseArvadosFile
@@ -154,6 +153,9 @@ class InodeCache(object):
     """
 
     def __init__(self, cap, min_entries=4):
+        # Standard dictionaries are ordered, but OrderedDict is still better here, see
+        # https://docs.python.org/3.11/library/collections.html#ordereddict-objects
+        # specifically we use move_to_end() which standard dicts don't have.
         self._cache_entries = collections.OrderedDict()
         self.cap = cap
         self._total = 0
@@ -188,7 +190,7 @@ class InodeCache(object):
         # "values"
         values = collections.deque(self._cache_entries.values())
 
-        while len(values) > 0:
+        while values:
             if self._total < self.cap or len(self._cache_entries) < self.min_entries:
                 break
             yield values.popleft()
@@ -410,7 +412,7 @@ class Inodes(object):
                 qentry = None
 
             with llfuse.lock:
-                while len(locked_ops) > 0:
+                while locked_ops:
                     if locked_ops.popleft().inode_op(self, None):
                         self._inode_remove_queue.task_done()
                 self.cap_cache_event.clear()
@@ -627,13 +629,6 @@ class Operations(llfuse.Operations):
 
         self.events = None
 
-        # We rely on the cyclic garbage collector to deallocate
-        # Collection objects from the Python SDK.  A lower GC
-        # threshold encourages Python to be more aggressive in
-        # reclaiming these and seems to slow down the growth in memory
-        # usage over time.
-        gc.set_threshold(200)
-
     def init(self):
         # Allow threads that are waiting for the driver to be finished
         # initializing to continue
index dbb7bb83f7b2e61750b243e83b4ea78d2087d412..9c78805107358dadf8b2f87221154753399b2c63 100644 (file)
@@ -218,7 +218,7 @@ class Directory(FreshBase):
 
     def clear(self):
         """Delete all entries"""
-        if len(self._entries) == 0:
+        if not self._entries:
             return
         oldentries = self._entries
         self._entries = {}
@@ -645,9 +645,10 @@ class CollectionDirectory(CollectionDirectoryBase):
 
     def objsize(self):
         # This is a rough guess of the amount of overhead involved for
-        # a collection; the calculation is each file averages 128
-        # bytes in the manifest, but consume 1024 bytes when blown up
-        # into Python data structures.
+        # a collection; the assumptions are that that each file
+        # averages 128 bytes in the manifest, but consume 1024 bytes
+        # of Python data structures, so 1024/128=8 means we estimate
+        # the RAM footprint at 8 times the size of bare manifest text.
         return self._manifest_size * 8
 
     def finalize(self):