Merge branch '3877-log-memory-leak' closes #3877
[arvados.git] / services / fuse / arvados_fuse / __init__.py
index aecc7a9417d788e189d4d8ce3a8d068f071f83d8..f49b94777b76884ace7275a11367c83856c2b481 100644 (file)
@@ -19,16 +19,10 @@ import logging
 import time
 import calendar
 import threading
+from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
 
 _logger = logging.getLogger('arvados.arvados_fuse')
 
-portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
-uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
-collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
-group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
-user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
-link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
-
 class SafeApi(object):
     '''Threadsafe wrapper for API object.  This stores and returns a different api
     object per thread, because httplib2 which underlies apiclient is not
@@ -40,12 +34,18 @@ class SafeApi(object):
         self.token = config.get('ARVADOS_API_TOKEN')
         self.insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE')
         self.local = threading.local()
+        self.block_cache = arvados.KeepBlockCache()
 
     def localapi(self):
         if 'api' not in self.local.__dict__:
             self.local.api = arvados.api('v1', False, self.host, self.token, self.insecure)
         return self.local.api
 
+    def localkeep(self):
+        if 'keep' not in self.local.__dict__:
+            self.local.keep = arvados.KeepClient(api_client=self.localapi(), block_cache=self.block_cache)
+        return self.local.keep
+
     def collections(self):
         return self.localapi().collections()
 
@@ -57,10 +57,13 @@ class SafeApi(object):
 
     def users(self):
         return self.localapi().users()
-        
+
 def convertTime(t):
     '''Parse Arvados timestamp to unix time.'''
-    return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
+    try:
+        return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
+    except (TypeError, ValueError):
+        return 0
 
 def sanitize_filename(dirty):
     '''Remove troublesome characters from filenames.'''
@@ -96,9 +99,8 @@ class FreshBase(object):
     def invalidate(self):
         self._stale = True
 
-    # Test if the entries dict is stale.  Also updates atime.
+    # Test if the entries dict is stale.
     def stale(self):
-        self._atime = time.time()
         if self._stale:
             return True
         if self._poll:
@@ -158,7 +160,7 @@ class StringFile(File):
         return len(self.contents)
 
     def readfrom(self, off, size):
-        return self.contents[off:(off+size)]    
+        return self.contents[off:(off+size)]
 
 
 class ObjectFile(StringFile):
@@ -259,10 +261,10 @@ class Directory(FreshBase):
                     ent = new_entry(i)
                     if ent is not None:
                         self._entries[name] = self.inodes.add_entry(ent)
-                    changed = True
+                        changed = True
 
         # delete any other directory entries that were not in found in 'items'
-        for i in oldentries:            
+        for i in oldentries:
             llfuse.invalidate_entry(self.inode, str(i))
             self.inodes.del_entry(oldentries[i])
             changed = True
@@ -311,7 +313,7 @@ class CollectionDirectory(Directory):
             self.collection_object_file.update(self.collection_object)
 
         self.clear()
-        collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
+        collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api, self.api.localkeep())
         for s in collection.all_streams():
             cwd = self
             for part in s.name().split('/'):
@@ -321,7 +323,7 @@ class CollectionDirectory(Directory):
                         cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
                     cwd = cwd._entries[partname]
             for k, v in s.files().items():
-                cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))        
+                cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))
 
     def update(self):
         try:
@@ -345,11 +347,15 @@ class CollectionDirectory(Directory):
             else:
                 _logger.error("arv-mount %s: error", self.collection_locator)
                 _logger.exception(detail)
+        except arvados.errors.ArgumentError as detail:
+            _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
+            if self.collection_object is not None and "manifest_text" in self.collection_object:
+                _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
         except Exception as detail:
             _logger.error("arv-mount %s: error", self.collection_locator)
-            if "manifest_text" in self.collection_object:
+            if self.collection_object is not None and "manifest_text" in self.collection_object:
                 _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
-            _logger.exception(detail)                
+            _logger.exception(detail)
         return False
 
     def __getitem__(self, item):
@@ -387,9 +393,11 @@ class MagicDirectory(Directory):
         super(MagicDirectory, self).__init__(parent_inode)
         self.inodes = inodes
         self.api = api
+        # Have to defer creating readme_file because at this point we don't
+        # yet have an inode assigned.
         self.readme_file = None
 
-    def __contains__(self, k):
+    def create_readme(self):
         if self.readme_file is None:
             text = '''This directory provides access to Arvados collections as subdirectories listed
 by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
@@ -403,6 +411,9 @@ will appear if it exists.
             self.readme_file = self.inodes.add_entry(StringFile(self.inode, text, time.time()))
             self._entries["README"] = self.readme_file
 
+    def __contains__(self, k):
+        self.create_readme()
+
         if k in self._entries:
             return True
 
@@ -420,6 +431,10 @@ will appear if it exists.
             _logger.debug('arv-mount exception keep %s', e)
             return False
 
+    def items(self):
+        self.create_readme()
+        return self._entries.items()
+
     def __getitem__(self, item):
         if item in self:
             return self._entries[item]
@@ -487,7 +502,7 @@ class TagDirectory(Directory):
                    lambda i: CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid']))
 
 
-class ProjectDirectory(RecursiveInvalidateDirectory):
+class ProjectDirectory(Directory):
     '''A special directory that contains the contents of a project.'''
 
     def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60):
@@ -495,8 +510,7 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
         self.inodes = inodes
         self.api = api
         self.project_object = project_object
-        self.project_object_file = ObjectFile(self.inode, self.project_object)
-        self.inodes.add_entry(self.project_object_file)
+        self.project_object_file = None
         self.uuid = project_object['uuid']
 
     def createDirectory(self, i):
@@ -515,6 +529,10 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
             return None
 
     def update(self):
+        if self.project_object_file == None:
+            self.project_object_file = ObjectFile(self.inode, self.project_object)
+            self.inodes.add_entry(self.project_object_file)
+
         def namefn(i):
             if 'name' in i:
                 if i['name'] is None or len(i['name']) == 0:
@@ -527,7 +545,7 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
                     return i['name']
                 elif 'kind' in i and i['kind'].startswith('arvados#'):
                     # something else
-                    return "{}.{}".format(i['name'], i['kind'][8:])                    
+                    return "{}.{}".format(i['name'], i['kind'][8:])
             else:
                 return None
 
@@ -549,7 +567,7 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
             contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
             # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
             contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
-            
+
         # end with llfuse.lock_released, re-acquire lock
 
         self.merge(contents,
@@ -571,7 +589,7 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
             return super(ProjectDirectory, self).__contains__(k)
 
 
-class SharedDirectory(RecursiveInvalidateDirectory):
+class SharedDirectory(Directory):
     '''A special directory that represents users or groups who have shared projects with me.'''
 
     def __init__(self, parent_inode, inodes, api, exclude, poll=False, poll_time=60):
@@ -750,7 +768,7 @@ class Operations(llfuse.Operations):
                 p = self.inodes[parent_inode]
                 if name == '..':
                     inode = p.parent_inode
-                elif name in p:
+                elif isinstance(p, Directory) and name in p:
                     inode = p[name].inode
 
         if inode != None:
@@ -782,10 +800,17 @@ class Operations(llfuse.Operations):
         else:
             raise llfuse.FUSEError(errno.EBADF)
 
+        # update atime
+        handle.entry._atime = time.time()
+
         try:
             with llfuse.lock_released:
                 return handle.entry.readfrom(off, size)
-        except:
+        except arvados.errors.NotFoundError as e:
+            _logger.warning("Block not found: " + str(e))
+            raise llfuse.FUSEError(errno.EIO)
+        except Exception as e:
+            _logger.exception(e)
             raise llfuse.FUSEError(errno.EIO)
 
     def release(self, fh):
@@ -810,6 +835,9 @@ class Operations(llfuse.Operations):
         else:
             raise llfuse.FUSEError(errno.EIO)
 
+        # update atime
+        p._atime = time.time()
+
         self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items()))
         return fh