* You can browse, open and read Keep entries as if they are regular files.
* It is easy for existing tools to access files in Keep.
-* Data is downloaded on demand. It is not necessary to download an entire file or collection to start processing.
+* Data is streamed on demand. It is not necessary to download an entire file or collection to start processing.
The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory. To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by their Keep locator. For instance, a collection may be found by its content hash in the @keep/by_id@ directory.
The last line unmounts Keep. Subdirectories will no longer be accessible.
Within each directory on Keep, there is a @.arvados#collection@ file that does not show up with @ls@. Its contents include, for instance, the @portable_data_hash@, which is the same as the Keep locator.
+
+h3. Modifying files and directories in Keep
+
+By default, all files in the Keep mount are read only. However, @arv-mount --enable-write@ enables you to perform the following operations using normal Unix command line tools (@touch@, @mv@, @rm@, @mkdir@, @rmdir@) and your own programs using standard POSIX file system APIs:
+
+* Create, update, rename and delete individual files within collections
+* Create and delete subdirectories inside collections
+* Move files and directories within and between collections
+* Create and delete collections within a project (using @mkdir@ and @rmdir@ in a project directory)
+
+Not supported:
+
+* Symlinks, hard links
+* Changing permissions
+* Extended attributes
+
+If multiple clients try to modify the same file in the same collection, this result in a conflict. In this case, the most recent file wins, and the "loser" will be renamed to a conflict file in the form @name~YYYYMMDD-HHMMSS~conflict~@.
+
+Please note this feature is in beta testing. In particular, the conflict mechanism is itself currently subject to race condiditions with potential for data loss when a collection is being modified simultaneously by multiple clients. This issue will be resolved in future development.
# We should always start at the first segment due to the binary
# search.
- while i < len(data_locators):
- if limit and len(resp) > limit:
- break
+ while i < len(data_locators) and len(resp) != limit:
dl = data_locators[i]
block_start = dl.range_start
block_size = dl.range_size
else:
raise AssertionError("Buffer block is not writable")
+ STATE_TRANSITIONS = frozenset([
+ (WRITABLE, PENDING),
+ (PENDING, COMMITTED),
+ (PENDING, ERROR),
+ (ERROR, PENDING)])
+
@synchronized
def set_state(self, nextstate, val=None):
- if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or
- (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED) or
- (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.ERROR) or
- (self._state == _BufferBlock.ERROR and nextstate == _BufferBlock.PENDING)):
- self._state = nextstate
-
- if self._state == _BufferBlock.PENDING:
- self.wait_for_commit.clear()
-
- if self._state == _BufferBlock.COMMITTED:
- self._locator = val
- self.buffer_view = None
- self.buffer_block = None
- self.wait_for_commit.set()
-
- if self._state == _BufferBlock.ERROR:
- self.error = val
- self.wait_for_commit.set()
- else:
+ if (self._state, nextstate) not in self.STATE_TRANSITIONS:
raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+ self._state = nextstate
+
+ if self._state == _BufferBlock.PENDING:
+ self.wait_for_commit.clear()
+
+ if self._state == _BufferBlock.COMMITTED:
+ self._locator = val
+ self.buffer_view = None
+ self.buffer_block = None
+ self.wait_for_commit.set()
+
+ if self._state == _BufferBlock.ERROR:
+ self.error = val
+ self.wait_for_commit.set()
@synchronized
def state(self):
"""
- def __init__(self, uid, gid, encoding="utf-8", inode_cache=None, num_retries=4):
+ def __init__(self, uid, gid, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
super(Operations, self).__init__()
if not inode_cache:
self.inodes = Inodes(inode_cache, encoding=encoding)
self.uid = uid
self.gid = gid
+ self.enable_write = enable_write
# dict of inode to filehandle
self._filehandles = {}
if isinstance(e, FuseArvadosFile):
entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
- if e.writable():
+ if self.enable_write and e.writable():
entry.st_mode |= stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
entry.st_nlink = 1
return st
def _check_writable(self, inode_parent):
+ if not self.enable_write:
+ raise llfuse.FUSEError(errno.EROFS)
+
if inode_parent in self.inodes:
p = self.inodes[inode_parent]
else:
parser.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
parser.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
+ parser.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
+ parser.add_argument('--enable-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
+
parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
dest="exec_args", metavar=('command', 'args', '...', '--'),
help="""Mount, run a command, then unmount and exit""")
arvados.logger.setLevel(logging.DEBUG)
logger.debug("arv-mount debugging enabled")
+ logger.warn("enable write is %s", args.enable_write)
+
try:
# Create the request handler
operations = Operations(os.getuid(),
os.getgid(),
encoding=args.encoding,
- inode_cache=InodeCache(cap=args.directory_cache))
+ inode_cache=InodeCache(cap=args.directory_cache),
+ enable_write=args.enable_write)
api = ThreadSafeApiCache(apiconfig=arvados.config.settings(),
- keep_params={"block_cache": arvados.keep.KeepBlockCache(args.file_cache)})
+ keep_params={"block_cache": arvados.keep.KeepBlockCache(args.file_cache)})
usr = api.users().current().execute(num_retries=args.retries)
now = time.time()
self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
def make_mount(self, root_class, **root_kwargs):
- self.operations = fuse.Operations(os.getuid(), os.getgid())
+ self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
self.operations.inodes.add_entry(root_class(
llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
llfuse.init(self.operations, self.mounttmp, [])
return self.operations.inodes[llfuse.ROOT_INODE]
def tearDown(self):
- self.pool.close()
+ self.pool.terminate()
+ self.pool.join()
del self.pool
# llfuse.close is buggy, so use fusermount instead.
m.new_collection(collection.api_response(), collection)
self.assertTrue(m.writable())
- # See note in FuseWriteFileTest
+ # See note in MountTestBase.setUp
self.pool.apply(fuseUpdateFileTestHelper, (self.mounttmp,))
collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
with collection2.open("file1.txt", "w") as f:
f.write("foo")
- # See comment in FuseWriteFileTest
+ # See note in MountTestBase.setUp
self.pool.apply(fuseFileConflictTestHelper, (self.mounttmp,))
with llfuse.lock:
m.new_collection(collection.api_response(), collection)
- # See comment in FuseWriteFileTest
+ # See note in MountTestBase.setUp
self.pool.apply(fuseUnlinkOpenFileTest, (self.mounttmp,))
self.assertEqual(collection.manifest_text(), "")
m = self.make_mount(fuse.MagicDirectory)
- # See comment in FuseWriteFileTest
+ # See note in MountTestBase.setUp
self.pool.apply(fuseMvFileBetweenCollectionsTest1, (self.mounttmp,
collection1.manifest_locator(),
collection2.manifest_locator()))
m = self.make_mount(fuse.MagicDirectory)
- # See comment in FuseWriteFileTest
+ # See note in MountTestBase.setUp
self.pool.apply(fuseMvDirBetweenCollectionsTest1, (self.mounttmp,
collection1.manifest_locator(),
collection2.manifest_locator()))