3644: Tested, fixed various mount modes.
[arvados.git] / services / fuse / arvados_fuse / __init__.py
index e7b05ef04c1b000afa7b60a59ff83d8f79e324bc..6d55b3481e14b09012b00455eca8116cbc27625e 100644 (file)
@@ -24,6 +24,26 @@ _logger = logging.getLogger('arvados.arvados_fuse')
 def convertTime(t):
     return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
 
+def sanitize_filename(dirty):
+    # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html
+    if dirty is None:
+        return None
+
+    fn = ""
+    for c in dirty:
+        if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/':
+            # skip control characters and /
+            continue
+        fn += c
+
+    # strip leading - or ~ and leading/trailing whitespace
+    stripped = fn.lstrip("-~ ").rstrip()
+    if len(stripped) > 0:
+        return stripped
+    else:
+        return None
+
+
 class FreshBase(object):
     '''Base class for maintaining fresh/stale state to determine when to update.'''
     def __init__(self):
@@ -58,10 +78,12 @@ class FreshBase(object):
 class File(FreshBase):
     '''Base for file objects.'''
 
-    def __init__(self, parent_inode):
+    def __init__(self, parent_inode, _ctime=0, _mtime=0):
         super(File, self).__init__()
         self.inode = None
         self.parent_inode = parent_inode
+        self._ctime = _ctime
+        self._mtime = _mtime
 
     def size(self):
         return 0
@@ -69,14 +91,19 @@ class File(FreshBase):
     def readfrom(self, off, size):
         return ''
 
+    def ctime(self):
+        return self._ctime
+
+    def mtime(self):
+        return self._mtime
+
 
 class StreamReaderFile(File):
     '''Wraps a StreamFileReader as a file.'''
 
-    def __init__(self, parent_inode, reader, collection):
-        super(StreamReaderFile, self).__init__(parent_inode)
+    def __init__(self, parent_inode, reader, _ctime, _mtime):
+        super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime)
         self.reader = reader
-        self.collection = collection
 
     def size(self):
         return self.reader.size()
@@ -87,27 +114,28 @@ class StreamReaderFile(File):
     def stale(self):
         return False
 
-    def ctime(self):
-        return convertTime(self.collection["created_at"])
 
-    def mtime(self):
-        return convertTime(self.collection["modified_at"])
+class StringFile(File):
+    '''Wrap a simple string as a file'''
+    def __init__(self, parent_inode, contents, _ctime, _mtime):
+        super(StringFile, self).__init__(parent_inode, _ctime, _mtime)
+        self.contents = contents
+
+    def size(self):
+        return len(self.contents)
 
+    def readfrom(self, off, size):
+        return self.contents[off:(off+size)]    
 
-class ObjectFile(File):
-    '''Wraps a dict as a serialized json object.'''
+class ObjectFile(StringFile):
+    '''Wrap a dict as a serialized json object.'''
 
     def __init__(self, parent_inode, contents):
-        super(ObjectFile, self).__init__(parent_inode)
+        _ctime = convertTime(contents['created_at']) if 'created_at' in contents else 0
+        _mtime = convertTime(contents['modified_at']) if 'modified_at' in contents else 0
+        super(ObjectFile, self).__init__(parent_inode, json.dumps(contents, indent=4, sort_keys=True)+"\n", _ctime, _mtime)
         self.contentsdict = contents
         self.uuid = self.contentsdict['uuid']
-        self.contents = json.dumps(self.contentsdict, indent=4, sort_keys=True)
-
-    def size(self):
-        return len(self.contents)
-
-    def readfrom(self, off, size):
-        return self.contents[off:(off+size)]
 
 
 class Directory(FreshBase):
@@ -160,33 +188,44 @@ class Directory(FreshBase):
         return k in self._entries
 
     def merge(self, items, fn, same, new_entry):
-        '''Helper method for updating the contents of the directory.
+        '''Helper method for updating the contents of the directory.  Takes a list
+        describing the new contents of the directory, reuse entries that are
+        the same in both the old and new lists, create new entries, and delete
+        old entries missing from the new list.
 
-        items: array with new directory contents
+        items: iterable with new directory contents
 
         fn: function to take an entry in 'items' and return the desired file or
-        directory name
+        directory name, or None if this entry should be skipped
 
-        same: function to compare an existing entry with an entry in the items
-        list to determine whether to keep the existing entry.
+        same: function to compare an existing entry (a File or Directory
+        object) with an entry in the items list to determine whether to keep
+        the existing entry.
+
+        new_entry: function to create a new directory entry (File or Directory
+        object) from an entry in the items list.
 
-        new_entry: function to create a new directory entry from array entry.
         '''
 
         oldentries = self._entries
         self._entries = {}
         for i in items:
-            n = fn(i)
-            if n in oldentries and same(oldentries[n], i):
-                self._entries[n] = oldentries[n]
-                del oldentries[n]
-            else:
-                ent = new_entry(i)
-                if ent is not None:
-                    self._entries[n] = self.inodes.add_entry(ent)
-        for n in oldentries:
-            llfuse.invalidate_entry(self.inode, str(n))
-            self.inodes.del_entry(oldentries[n])
+            name = sanitize_filename(fn(i))
+            if name:
+                if name in oldentries and same(oldentries[name], i):
+                    # move existing directory entry over
+                    self._entries[name] = oldentries[name]
+                    del oldentries[name]
+                else:
+                    # create new directory entry
+                    ent = new_entry(i)
+                    if ent is not None:
+                        self._entries[name] = self.inodes.add_entry(ent)
+
+        # delete any other directory entries that were not in found in 'items'
+        for i in oldentries:            
+            llfuse.invalidate_entry(self.inode, str(i))
+            self.inodes.del_entry(oldentries[i])
         self.fresh()
 
     def clear(self):
@@ -209,39 +248,80 @@ class CollectionDirectory(Directory):
         self.inodes = inodes
         self.api = api
         self.collection_locator = collection_locator
-        self.portable_data_hash = None
-        self.collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+        self.manifest_text_file = None
+        self.pdh_file = None
+        self.collection_object = None
 
     def same(self, i):
         return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
 
     def update(self):
         try:
-            self.collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
-            if self.portable_data_hash != self.collection_object["portable_data_hash"]:
-                self.portable_data_hash = self.collection_object["portable_data_hash"]
+            if self.collection_object is not None and re.match(r'^[a-f0-9]{32}', self.collection_locator):
+                return True
+            #with llfuse.lock_released:
+            new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+            if "portable_data_hash" not in new_collection_object:
+                new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
+
+            if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
+                self.collection_object = new_collection_object
+
+                if self.manifest_text_file is not None:
+                    self.manifest_text_file.contents = self.collection_object["manifest_text"]
+                    self.manifest_text_file._ctime = self.ctime()
+                    self.manifest_text_file._mtime = self.mtime()
+                if self.pdh_file is not None:
+                    self.pdh_file.contents = self.collection_object["portable_data_hash"]
+                    self.pdh_file._ctime = self.ctime()
+                    self.pdh_file._mtime = self.mtime()
+
                 self.clear()
                 collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
                 for s in collection.all_streams():
                     cwd = self
                     for part in s.name().split('/'):
                         if part != '' and part != '.':
-                            if part not in cwd._entries:
-                                cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode))
-                            cwd = cwd._entries[part]
+                            partname = sanitize_filename(part)
+                            if partname not in cwd._entries:
+                                cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
+                            cwd = cwd._entries[partname]
                     for k, v in s.files().items():
-                        cwd._entries[k] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.collection_object))
+                        cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime()))
             self.fresh()
             return True
         except Exception as detail:
-            _logger.debug("arv-mount %s: error: %s",
-                          self.collection_locator, detail)
+            _logger.error("arv-mount %s: error", self.collection_locator)
+            _logger.exception(detail)
             return False
 
+    def __getitem__(self, item):
+        self.checkupdate()
+        if item == '.manifest_text':
+            if self.manifest_text_file is None:
+                self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime())
+                self.inodes.add_entry(self.manifest_text_file)
+            return self.manifest_text_file
+        elif item == '.portable_data_hash':
+            if self.pdh_file is None:
+                self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime())
+                self.inodes.add_entry(self.pdh_file)
+            return self.pdh_file
+        else:
+            return super(CollectionDirectory, self).__getitem__(item)
+
+    def __contains__(self, k):
+        if k in ('.manifest_text', '.portable_data_hash'):
+            return True
+        else:
+            return super(CollectionDirectory, self).__contains__(k)
+
     def ctime(self):
+        self.checkupdate()
         return convertTime(self.collection_object["created_at"])
 
     def mtime(self):
+        self.checkupdate()
         return convertTime(self.collection_object["modified_at"])
 
 class MagicDirectory(Directory):
@@ -300,11 +380,11 @@ class TagsDirectory(RecursiveInvalidateDirectory):
         super(TagsDirectory, self).__init__(parent_inode)
         self.inodes = inodes
         self.api = api
-        try:
-            arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
-        except:
-            self._poll = True
-            self._poll_time = poll_time
+        #try:
+        #    arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
+        #except:
+        self._poll = True
+        self._poll_time = poll_time
 
     def update(self):
         tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute()
@@ -341,44 +421,45 @@ class TagDirectory(Directory):
 class ProjectDirectory(RecursiveInvalidateDirectory):
     '''A special directory that contains the contents of a project.'''
 
-    def __init__(self, parent_inode, inodes, api, uuid, poll=False, poll_time=60):
+    def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60):
         super(ProjectDirectory, self).__init__(parent_inode)
         self.inodes = inodes
         self.api = api
-        self.uuid = uuid['uuid']
-
-        self.project_object = None
-        if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid):
-            self.project_object = self.api.groups().get(uuid=self.uuid).execute()
-
-        if parent_inode == llfuse.ROOT_INODE:
-            try:
-                arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
-            except:
-                self._poll = True
-                self._poll_time = poll_time
-        else:
-            self._poll = poll
-            self._poll_time = poll_time
-
+        self.project_object = project_object
+        self.uuid = project_object['uuid']
 
     def createDirectory(self, i):
-        if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']) and i['name'] is not None:
+        if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']):
             return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid'])
         elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']):
             return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time)
+        elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+            return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])
         #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']):
         #    return None
-        #elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
-        #    return ObjectFile(self.parent_inode, i)
+        elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
+            return ObjectFile(self.parent_inode, i)
         else:
             return None
 
-    def contents(self):
-        return arvados.util.all_contents(self.api, self.uuid)
-
     def update(self):
-        def same(a, i):
+        def namefn(i):
+            if 'name' in i:
+                if i['name'] is None:
+                    return None
+                elif re.match(r'[a-z0-9]{5}-(4zz18|j7d0g)-[a-z0-9]{15}', i['uuid']):
+                    # collection or subproject
+                    return i['name']
+                elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+                    # name link
+                    return i['name']
+                elif 'kind' in i and i['kind'].startswith('arvados#'):
+                    # something else
+                    return "{}.{}".format(i['name'], i['kind'][8:])                    
+            else:
+                return None
+
+        def samefn(a, i):
             if isinstance(a, CollectionDirectory):
                 return a.collection_locator == i['uuid']
             elif isinstance(a, ProjectDirectory):
@@ -387,58 +468,93 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
                 return a.uuid == i['uuid'] and not a.stale()
             return False
 
+        #with llfuse.lock_released:
         if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid):
             self.project_object = self.api.groups().get(uuid=self.uuid).execute()
+        elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid):
+            self.project_object = self.api.users().get(uuid=self.uuid).execute()
+
+        contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
+        # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
+        contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
+
+        #print contents
 
-        self.merge(self.contents(),
-                   lambda i: i['name'] if 'name' in i and i['name'] is not None and len(i['name']) > 0 else i['uuid'],
-                   same,
+        self.merge(contents,
+                   namefn,
+                   samefn,
                    self.createDirectory)
 
     def ctime(self):
-        return convertTime(self.project_object["created_at"]) if self.project_object is not None else 0
+        return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0
 
     def mtime(self):
-        return convertTime(self.project_object["modified_at"]) if self.project_object is not None else 0
+        return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object  else 0
 
 
 
-class HomeDirectory(ProjectDirectory):
-    '''A special directory that represents the "home" project.'''
+class SharedDirectory(RecursiveInvalidateDirectory):
+    '''A special directory that represents users or groups who have shared projects with me.'''
 
-    def __init__(self, parent_inode, inodes, api, poll=False, poll_time=60):
+    def __init__(self, parent_inode, inodes, api, exclude, poll=False, poll_time=60):
+        super(SharedDirectory, self).__init__(parent_inode)
         self.current_user = api.users().current().execute()
-        super(HomeDirectory, self).__init__(parent_inode, inodes, api, self.current_user)
+        self.inodes = inodes
+        self.api = api
 
-    def build_project_trees():
-        all_projects = self.api.groups().list(filters=[['group_class','=','project']], order=['name']).execute()['items']
-        parent_of = {self.current_user['uuid']: 'me'}
-        for ob in all_projects:
-            parent_of[ob['uuid']] = ob['owner_uuid']
-        children_of = {False: [], 'me': [self.current_user]}
+        # try:
+        #     arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
+        # except:
+        self._poll = True
+        self._poll_time = poll_time
 
+    def update(self):
+        #with llfuse.lock_released:
+        all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']])
+        objects = {}
         for ob in all_projects:
-            if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in parent_of:
-                parent_of[ob['uuid']] = False
-            if parent_of[ob['uuid']] not in children_of:
-                children_of[parent_of[ob['uuid']]] = []
-            children_of[parent_of[ob['uuid']]] += ob
-
-        def buildtree(children_of, root_uuid):
-            tree = {}
-            for ob in children_of[root_uuid]:
-                tree[ob] = buildtree(children_of, ob['uuid'])
-            return tree
+            objects[ob['uuid']] = ob
 
-        my_project_tree = buildtree(children_of, 'me')
-        shared_project_tree = buildtree(children_of, False)
-
-        import pprint
-        pprint.pprint(my_project_tree)
-        pprint.pprint(shared_project_tree)
+        roots = []
+        root_owners = {}
+        for ob in all_projects:
+            if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
+                roots.append(ob)
+                root_owners[ob['owner_uuid']] = True
+
+        #with llfuse.lock_released:
+        lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]])
+        lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]])
+
+        users = {}
+        groups = {}
+
+        for l in lusers:
+            objects[l["uuid"]] = l
+        for l in lgroups:
+            objects[l["uuid"]] = l
+
+        contents = {}
+        for r in root_owners:
+            if r in objects:
+                obr = objects[r]
+                if "name" in obr:
+                    contents[obr["name"]] = obr
+                if "first_name" in obr:
+                    contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
+
+        for r in roots:
+            if r['owner_uuid'] not in objects:
+                contents[r['name']] = r
+        
+        try:
+            self.merge(contents.items(),
+                       lambda i: i[0],
+                       lambda a, i: a.uuid == i[1]['uuid'],
+                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, i[1], poll=self._poll, poll_time=self._poll_time))
+        except Exception as e:
+            _logger.exception(e)
 
-    #def contents(self):
-    #    return self.api.groups().contents(uuid=self.uuid).execute()['items']
 
 class FileHandle(object):
     '''Connects a numeric file handle to a File or Directory object that has
@@ -539,9 +655,9 @@ class Operations(llfuse.Operations):
 
         entry.st_size = e.size()
 
-        entry.st_blksize = 1024
-        entry.st_blocks = e.size()/1024
-        if e.size()/1024 != 0:
+        entry.st_blksize = 512
+        entry.st_blocks = (e.size()/512)
+        if e.size()/512 != 0:
             entry.st_blocks += 1
         entry.st_atime = 0
         entry.st_mtime = e.mtime()
@@ -645,7 +761,7 @@ class Operations(llfuse.Operations):
 
     def statfs(self):
         st = llfuse.StatvfsData()
-        st.f_bsize = 1024 * 1024
+        st.f_bsize = 64 * 1024
         st.f_blocks = 0
         st.f_files = 0