8 from fusefile import StringFile, StreamReaderFile, ObjectFile
9 from fresh import FreshBase, convertTime
11 from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
13 _logger = logging.getLogger('arvados.arvados_fuse')
16 # Match any character which FUSE or Linux cannot accommodate as part
17 # of a filename. (If present in a collection filename, they will
18 # appear as underscores in the fuse mount.)
19 _disallowed_filename_characters = re.compile('[\x00/]')
21 def sanitize_filename(dirty):
22 """Replace disallowed filename characters with harmless "_"."""
32 return _disallowed_filename_characters.sub('_', dirty)
35 class Directory(FreshBase):
36 """Generic directory object, backed by a dict.
38 Consists of a set of entries with the key representing the filename
39 and the value referencing a File or Directory object.
42 def __init__(self, parent_inode):
43 super(Directory, self).__init__()
45 """parent_inode is the integer inode number"""
47 if not isinstance(parent_inode, int):
48 raise Exception("parent_inode should be an int")
49 self.parent_inode = parent_inode
51 self._mtime = time.time()
53 # Overriden by subclasses to implement logic to update the entries dict
54 # when the directory is stale
58 # Only used when computing the size of the disk footprint of the directory
63 def checkupdate(self):
67 except apiclient.errors.HttpError as e:
70 def __getitem__(self, item):
72 return self._entries[item]
76 return self._entries.items()
80 return self._entries.iterkeys()
82 def __contains__(self, k):
84 return k in self._entries
86 def merge(self, items, fn, same, new_entry):
87 """Helper method for updating the contents of the directory.
89 Takes a list describing the new contents of the directory, reuse
90 entries that are the same in both the old and new lists, create new
91 entries, and delete old entries missing from the new list.
93 :items: iterable with new directory contents
95 :fn: function to take an entry in 'items' and return the desired file or
96 directory name, or None if this entry should be skipped
98 :same: function to compare an existing entry (a File or Directory
99 object) with an entry in the items list to determine whether to keep
102 :new_entry: function to create a new directory entry (File or Directory
103 object) from an entry in the items list.
107 oldentries = self._entries
111 name = sanitize_filename(fn(i))
113 if name in oldentries and same(oldentries[name], i):
114 # move existing directory entry over
115 self._entries[name] = oldentries[name]
118 # create new directory entry
121 self._entries[name] = self.inodes.add_entry(ent)
124 # delete any other directory entries that were not in found in 'items'
126 llfuse.invalidate_entry(self.inode, str(i))
127 self.inodes.del_entry(oldentries[i])
131 self._mtime = time.time()
136 """Delete all entries"""
137 oldentries = self._entries
140 if isinstance(n, Directory):
142 llfuse.invalidate_entry(self.inode, str(n))
143 self.inodes.del_entry(oldentries[n])
144 llfuse.invalidate_inode(self.inode)
151 class CollectionDirectory(Directory):
152 """Represents the root of a directory tree holding a collection."""
154 def __init__(self, parent_inode, inodes, api, num_retries, collection):
155 super(CollectionDirectory, self).__init__(parent_inode)
158 self.num_retries = num_retries
159 self.collection_object_file = None
160 self.collection_object = None
161 if isinstance(collection, dict):
162 self.collection_locator = collection['uuid']
163 self._mtime = convertTime(collection.get('modified_at'))
165 self.collection_locator = collection
169 return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
171 # Used by arv-web.py to switch the contents of the CollectionDirectory
172 def change_collection(self, new_locator):
173 """Switch the contents of the CollectionDirectory.
175 Must be called with llfuse.lock held.
178 self.collection_locator = new_locator
179 self.collection_object = None
182 def new_collection(self, new_collection_object, coll_reader):
183 self.collection_object = new_collection_object
185 self._mtime = convertTime(self.collection_object.get('modified_at'))
187 if self.collection_object_file is not None:
188 self.collection_object_file.update(self.collection_object)
191 for s in coll_reader.all_streams():
193 for part in s.name().split('/'):
194 if part != '' and part != '.':
195 partname = sanitize_filename(part)
196 if partname not in cwd._entries:
197 cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
198 cwd = cwd._entries[partname]
199 for k, v in s.files().items():
200 cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))
204 if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator):
207 if self.collection_locator is None:
211 with llfuse.lock_released:
212 coll_reader = arvados.CollectionReader(
213 self.collection_locator, self.api, self.api.keep,
214 num_retries=self.num_retries)
215 new_collection_object = coll_reader.api_response() or {}
216 # If the Collection only exists in Keep, there will be no API
217 # response. Fill in the fields we need.
218 if 'uuid' not in new_collection_object:
219 new_collection_object['uuid'] = self.collection_locator
220 if "portable_data_hash" not in new_collection_object:
221 new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
222 if 'manifest_text' not in new_collection_object:
223 new_collection_object['manifest_text'] = coll_reader.manifest_text()
224 coll_reader.normalize()
225 # end with llfuse.lock_released, re-acquire lock
227 if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
228 self.new_collection(new_collection_object, coll_reader)
232 except arvados.errors.NotFoundError:
233 _logger.exception("arv-mount %s: error", self.collection_locator)
234 except arvados.errors.ArgumentError as detail:
235 _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
236 if self.collection_object is not None and "manifest_text" in self.collection_object:
237 _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
239 _logger.exception("arv-mount %s: error", self.collection_locator)
240 if self.collection_object is not None and "manifest_text" in self.collection_object:
241 _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
244 def __getitem__(self, item):
246 if item == '.arvados#collection':
247 if self.collection_object_file is None:
248 self.collection_object_file = ObjectFile(self.inode, self.collection_object)
249 self.inodes.add_entry(self.collection_object_file)
250 return self.collection_object_file
252 return super(CollectionDirectory, self).__getitem__(item)
254 def __contains__(self, k):
255 if k == '.arvados#collection':
258 return super(CollectionDirectory, self).__contains__(k)
261 class MagicDirectory(Directory):
262 """A special directory that logically contains the set of all extant keep locators.
264 When a file is referenced by lookup(), it is tested to see if it is a valid
265 keep locator to a manifest, and if so, loads the manifest contents as a
266 subdirectory of this directory with the locator as the directory name.
267 Since querying a list of all extant keep locators is impractical, only
268 collections that have already been accessed are visible to readdir().
273 This directory provides access to Arvados collections as subdirectories listed
274 by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
275 the form '1234567890abcdefghijklmnopqrstuv+123').
277 Note that this directory will appear empty until you attempt to access a
278 specific collection subdirectory (such as trying to 'cd' into it), at which
279 point the collection will actually be looked up on the server and the directory
280 will appear if it exists.
283 def __init__(self, parent_inode, inodes, api, num_retries):
284 super(MagicDirectory, self).__init__(parent_inode)
287 self.num_retries = num_retries
289 def __setattr__(self, name, value):
290 super(MagicDirectory, self).__setattr__(name, value)
291 # When we're assigned an inode, add a README.
292 if ((name == 'inode') and (self.inode is not None) and
293 (not self._entries)):
294 self._entries['README'] = self.inodes.add_entry(
295 StringFile(self.inode, self.README_TEXT, time.time()))
296 # If we're the root directory, add an identical by_id subdirectory.
297 if self.inode == llfuse.ROOT_INODE:
298 self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
299 self.inode, self.inodes, self.api, self.num_retries))
301 def __contains__(self, k):
302 if k in self._entries:
305 if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k):
309 e = self.inodes.add_entry(CollectionDirectory(
310 self.inode, self.inodes, self.api, self.num_retries, k))
316 except Exception as e:
317 _logger.debug('arv-mount exception keep %s', e)
320 def __getitem__(self, item):
322 return self._entries[item]
324 raise KeyError("No collection with id " + item)
327 class RecursiveInvalidateDirectory(Directory):
328 def invalidate(self):
329 if self.inode == llfuse.ROOT_INODE:
330 llfuse.lock.acquire()
332 super(RecursiveInvalidateDirectory, self).invalidate()
333 for a in self._entries:
334 self._entries[a].invalidate()
338 if self.inode == llfuse.ROOT_INODE:
339 llfuse.lock.release()
342 class TagsDirectory(RecursiveInvalidateDirectory):
343 """A special directory that contains as subdirectories all tags visible to the user."""
345 def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
346 super(TagsDirectory, self).__init__(parent_inode)
349 self.num_retries = num_retries
351 self._poll_time = poll_time
354 with llfuse.lock_released:
355 tags = self.api.links().list(
356 filters=[['link_class', '=', 'tag']],
357 select=['name'], distinct=True
358 ).execute(num_retries=self.num_retries)
360 self.merge(tags['items'],
362 lambda a, i: a.tag == i['name'],
363 lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time))
366 class TagDirectory(Directory):
367 """A special directory that contains as subdirectories all collections visible
368 to the user that are tagged with a particular tag.
371 def __init__(self, parent_inode, inodes, api, num_retries, tag,
372 poll=False, poll_time=60):
373 super(TagDirectory, self).__init__(parent_inode)
376 self.num_retries = num_retries
379 self._poll_time = poll_time
382 with llfuse.lock_released:
383 taggedcollections = self.api.links().list(
384 filters=[['link_class', '=', 'tag'],
385 ['name', '=', self.tag],
386 ['head_uuid', 'is_a', 'arvados#collection']],
388 ).execute(num_retries=self.num_retries)
389 self.merge(taggedcollections['items'],
390 lambda i: i['head_uuid'],
391 lambda a, i: a.collection_locator == i['head_uuid'],
392 lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']))
395 class ProjectDirectory(Directory):
396 """A special directory that contains the contents of a project."""
398 def __init__(self, parent_inode, inodes, api, num_retries, project_object,
399 poll=False, poll_time=60):
400 super(ProjectDirectory, self).__init__(parent_inode)
403 self.num_retries = num_retries
404 self.project_object = project_object
405 self.project_object_file = None
406 self.uuid = project_object['uuid']
408 self._poll_time = poll_time
410 def createDirectory(self, i):
411 if collection_uuid_pattern.match(i['uuid']):
412 return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
413 elif group_uuid_pattern.match(i['uuid']):
414 return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time)
415 elif link_uuid_pattern.match(i['uuid']):
416 if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
417 return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
420 elif uuid_pattern.match(i['uuid']):
421 return ObjectFile(self.parent_inode, i)
426 if self.project_object_file == None:
427 self.project_object_file = ObjectFile(self.inode, self.project_object)
428 self.inodes.add_entry(self.project_object_file)
432 if i['name'] is None or len(i['name']) == 0:
434 elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']):
435 # collection or subproject
437 elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection':
440 elif 'kind' in i and i['kind'].startswith('arvados#'):
442 return "{}.{}".format(i['name'], i['kind'][8:])
447 if isinstance(a, CollectionDirectory):
448 return a.collection_locator == i['uuid']
449 elif isinstance(a, ProjectDirectory):
450 return a.uuid == i['uuid']
451 elif isinstance(a, ObjectFile):
452 return a.uuid == i['uuid'] and not a.stale()
455 with llfuse.lock_released:
456 if group_uuid_pattern.match(self.uuid):
457 self.project_object = self.api.groups().get(
458 uuid=self.uuid).execute(num_retries=self.num_retries)
459 elif user_uuid_pattern.match(self.uuid):
460 self.project_object = self.api.users().get(
461 uuid=self.uuid).execute(num_retries=self.num_retries)
463 contents = arvados.util.list_all(self.api.groups().contents,
464 self.num_retries, uuid=self.uuid)
465 # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
466 contents += arvados.util.list_all(
467 self.api.links().list, self.num_retries,
468 filters=[['tail_uuid', '=', self.uuid],
469 ['link_class', '=', 'name']])
471 # end with llfuse.lock_released, re-acquire lock
476 self.createDirectory)
478 def __getitem__(self, item):
480 if item == '.arvados#project':
481 return self.project_object_file
483 return super(ProjectDirectory, self).__getitem__(item)
485 def __contains__(self, k):
486 if k == '.arvados#project':
489 return super(ProjectDirectory, self).__contains__(k)
492 class SharedDirectory(Directory):
493 """A special directory that represents users or groups who have shared projects with me."""
495 def __init__(self, parent_inode, inodes, api, num_retries, exclude,
496 poll=False, poll_time=60):
497 super(SharedDirectory, self).__init__(parent_inode)
500 self.num_retries = num_retries
501 self.current_user = api.users().current().execute(num_retries=num_retries)
503 self._poll_time = poll_time
506 with llfuse.lock_released:
507 all_projects = arvados.util.list_all(
508 self.api.groups().list, self.num_retries,
509 filters=[['group_class','=','project']])
511 for ob in all_projects:
512 objects[ob['uuid']] = ob
516 for ob in all_projects:
517 if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
519 root_owners[ob['owner_uuid']] = True
521 lusers = arvados.util.list_all(
522 self.api.users().list, self.num_retries,
523 filters=[['uuid','in', list(root_owners)]])
524 lgroups = arvados.util.list_all(
525 self.api.groups().list, self.num_retries,
526 filters=[['uuid','in', list(root_owners)]])
532 objects[l["uuid"]] = l
534 objects[l["uuid"]] = l
537 for r in root_owners:
541 contents[obr["name"]] = obr
542 if "first_name" in obr:
543 contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
546 if r['owner_uuid'] not in objects:
547 contents[r['name']] = r
549 # end with llfuse.lock_released, re-acquire lock
552 self.merge(contents.items(),
554 lambda a, i: a.uuid == i[1]['uuid'],
555 lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))