Add arv-mount.
[arvados.git] / sdk / python / bin / arv-mount
1 #!/usr/bin/env python
2
3 import argparse
4 import hashlib
5 import os
6 import re
7 import string
8 import sys
9 import logging
10 import fuse
11 import errno
12 import stat
13 import arvados
14 import time
15
16 class KeepMount(fuse.LoggingMixIn, fuse.Operations):
17     'Read-only Keep mount.'
18
19     def __init__(self):
20         self.arv = arvados.api('v1')
21         self.reader = None
22         self.collections = {}
23         self.audited = dict(read={})
24
25     def load_collection(self, uuid):
26         if uuid in self.collections:
27             return
28         now = time.time()
29         reader = arvados.CollectionReader(uuid)
30         files = {}
31         files[''] = dict(
32             stat=dict(
33                 st_mode=(stat.S_IFDIR | 0755), st_ctime=now,
34                 st_mtime=now, st_atime=now, st_nlink=2))
35         try:
36             for s in reader.all_streams():
37                 for f in s.all_files():
38                     path = re.sub(r'^\./', '', os.path.join(s.name(), f.name()))
39                     files[path] = dict(
40                         stat=dict(
41                             st_mode=(stat.S_IFREG | 0444),
42                             st_size=f.size(), st_nlink=1,
43                             st_ctime=now, st_mtime=now, st_atime=now),
44                         arv_file=f)
45                     logger.debug("collection.load: %s: %s" % (uuid, path))
46         except:
47             # TODO: propagate real error, don't assume ENOENT
48             raise fuse.FuseOSError(errno.ENOENT)
49         self.collections[uuid] = dict(reader=reader, files=files)
50         logger.info("collection.load %s" % uuid)
51
52     def setup_reader(self, path):
53         logger.debug("%s", path.split('/'))
54         return True
55
56     def set_args(self, args):
57         self.args = args
58
59     def parse_and_load(self, path):
60         parts = path.split(os.path.sep, 2)
61         while len(parts) < 3:
62             parts += ['']
63         if not re.match(r'[0-9a-f]{32,}(\+\S+?)*', parts[1]):
64             raise fuse.FuseOSError(errno.ENOENT)
65         if self.args.collection != []:
66             if parts[1] not in self.args.collection:
67                 raise fuse.FuseOSError(errno.EPERM)
68         self.load_collection(parts[1])
69         return parts[0:3]
70
71     def audit_read(self, uuid):
72         if self.args.audit and uuid not in self.audited['read']:
73             self.audited['read'][uuid] = True
74             logger.info("collection.read %s" % uuid)
75
76     def read(self, path, size, offset, fh):
77         _, uuid, target = self.parse_and_load(path)
78         if (uuid not in self.collections or
79             target not in self.collections[uuid]['files']):
80             raise fuse.FuseOSError(errno.ENOENT)
81         self.audit_read(uuid)
82         f = self.collections[uuid]['files'][target]['arv_file']
83         f.seek(offset)
84         return f.read(size)
85
86     def readdir(self, path, fh):
87         if path == '/':
88             raise fuse.FuseOSError(errno.EPERM)
89         _, uuid, target = self.parse_and_load(path)
90         if uuid not in self.collections:
91             raise fuse.FuseOSError(errno.ENOENT)
92         if target != '' and target[-1] != os.path.sep:
93             target += os.path.sep
94         dirs = {}
95         for filepath in self.collections[uuid]['files']:
96             if filepath != '':
97                 logger.debug(filepath)
98                 if target == '' or 0 == string.find(filepath, target):
99                     dirs[filepath[len(target):].split(os.path.sep)[0]] = True
100         return ['.', '..'] + dirs.keys()
101
102     def getattr(self, path, fh=None):
103         if path == '/':
104             now = time.time()
105             return dict(st_mode=(stat.S_IFDIR | 0111), st_ctime=now,
106                         st_mtime=now, st_atime=now, st_nlink=2)
107         _, uuid, target = self.parse_and_load(path)
108         if uuid not in self.collections:
109             raise fuse.FuseOSError(errno.ENOENT)
110         if target in self.collections[uuid]['files']:
111             return self.collections[uuid]['files'][target]['stat']
112         for filepath in self.collections[uuid]['files']:
113             if filepath != '':
114                 if target == '' or 0 == string.find(filepath, target + '/'):
115                     return self.collections[uuid]['files']['']['stat']
116         raise fuse.FuseOSError(errno.ENOENT)
117
118 def parse_args():
119     parser = argparse.ArgumentParser(
120         description='Mount Keep data under the local filesystem.')
121     parser.add_argument('mountpoint', type=str,
122                         help="""
123 Mount point.
124 """)
125     parser.add_argument('--collection', type=str, action='append', default=[],
126                         help="""
127 Collection locator. If none supplied, provide access to all readable
128 manifests.
129 """)
130     parser.add_argument('--audit', action='store_true',
131                         help="""
132 Print the collection uuid on stderr the first time a given collection
133 is read.
134 """)
135     parser.add_argument('--debug', action='store_true',
136                         help="""
137 Print debug messages.
138 """)
139     parser.add_argument('--foreground', action='store_true',
140                         help="""
141 Run in foreground, instead of detaching and running as a daemon.
142 """)
143     args = parser.parse_args()
144     return args
145
146 if __name__ == '__main__':
147     args = parse_args()
148     logger = logging.getLogger(os.path.basename(sys.argv[0]))
149     if args.audit:
150         logging.basicConfig(level=logging.INFO)
151     if args.debug:
152         logging.basicConfig(level=logging.DEBUG)
153     mounter = KeepMount()
154     mounter.set_args(args)
155     fuse = fuse.FUSE(mounter,
156                      args.mountpoint,
157                      foreground=args.foreground,
158                      fsname='arv-mount')