Merge branch 'master' into 6518-crunch2-dispatch-slurm
[arvados.git] / sdk / python / arvados / collection.py
index 82708cc02ebd5b99cd96b3a3095c934e4d95b2ea..8450bd1ca086687928c43e09b2d497ae598c5128 100644 (file)
@@ -3,16 +3,19 @@ import logging
 import os
 import re
 import errno
 import os
 import re
 import errno
+import hashlib
 import time
 import time
+import threading
 
 from collections import deque
 from stat import *
 
 
 from collections import deque
 from stat import *
 
-from .arvfile import ArvadosFileBase, split, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, BlockManager, synchronized, must_be_writable, SYNC_READONLY, SYNC_EXPLICIT, SYNC_LIVE, NoopLock
-from keep import *
-from .stream import StreamReader, normalize_stream, locator_block_size
-from .ranges import Range, LocatorAndRange
-from .safeapi import SafeApi
+from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
+from keep import KeepLocator, KeepClient
+from .stream import StreamReader
+from ._normalize_stream import normalize_stream
+from ._ranges import Range, LocatorAndRange
+from .safeapi import ThreadSafeApiCache
 import config
 import errors
 import util
 import config
 import errors
 import util
@@ -35,7 +38,8 @@ class CollectionBase(object):
         return self._keep_client
 
     def stripped_manifest(self):
         return self._keep_client
 
     def stripped_manifest(self):
-        """
+        """Get the manifest with locator hints stripped.
+
         Return the manifest for the current collection with all
         non-portable hints (i.e., permission signatures and other
         hints other than size hints) removed from the locators.
         Return the manifest for the current collection with all
         non-portable hints (i.e., permission signatures and other
         hints other than size hints) removed from the locators.
@@ -54,197 +58,7 @@ class CollectionBase(object):
         return ''.join(clean)
 
 
         return ''.join(clean)
 
 
-class CollectionReader(CollectionBase):
-    def __init__(self, manifest_locator_or_text, api_client=None,
-                 keep_client=None, num_retries=0):
-        """Instantiate a CollectionReader.
-
-        This class parses Collection manifests to provide a simple interface
-        to read its underlying files.
-
-        Arguments:
-        * manifest_locator_or_text: One of a Collection UUID, portable data
-          hash, or full manifest text.
-        * api_client: The API client to use to look up Collections.  If not
-          provided, CollectionReader will build one from available Arvados
-          configuration.
-        * keep_client: The KeepClient to use to download Collection data.
-          If not provided, CollectionReader will build one from available
-          Arvados configuration.
-        * num_retries: The default number of times to retry failed
-          service requests.  Default 0.  You may change this value
-          after instantiation, but note those changes may not
-          propagate to related objects like the Keep client.
-        """
-        self._api_client = api_client
-        self._keep_client = keep_client
-        self.num_retries = num_retries
-        if re.match(util.keep_locator_pattern, manifest_locator_or_text):
-            self._manifest_locator = manifest_locator_or_text
-            self._manifest_text = None
-        elif re.match(util.collection_uuid_pattern, manifest_locator_or_text):
-            self._manifest_locator = manifest_locator_or_text
-            self._manifest_text = None
-        elif re.match(util.manifest_pattern, manifest_locator_or_text):
-            self._manifest_text = manifest_locator_or_text
-            self._manifest_locator = None
-        else:
-            raise errors.ArgumentError(
-                "Argument to CollectionReader must be a manifest or a collection UUID")
-        self._api_response = None
-        self._streams = None
-
-    def _populate_from_api_server(self):
-        # As in KeepClient itself, we must wait until the last
-        # possible moment to instantiate an API client, in order to
-        # avoid tripping up clients that don't have access to an API
-        # server.  If we do build one, make sure our Keep client uses
-        # it.  If instantiation fails, we'll fall back to the except
-        # clause, just like any other Collection lookup
-        # failure. Return an exception, or None if successful.
-        try:
-            if self._api_client is None:
-                self._api_client = arvados.api('v1')
-                self._keep_client = None  # Make a new one with the new api.
-            self._api_response = self._api_client.collections().get(
-                uuid=self._manifest_locator).execute(
-                num_retries=self.num_retries)
-            self._manifest_text = self._api_response['manifest_text']
-            return None
-        except Exception as e:
-            return e
-
-    def _populate_from_keep(self):
-        # Retrieve a manifest directly from Keep. This has a chance of
-        # working if [a] the locator includes a permission signature
-        # or [b] the Keep services are operating in world-readable
-        # mode. Return an exception, or None if successful.
-        try:
-            self._manifest_text = self._my_keep().get(
-                self._manifest_locator, num_retries=self.num_retries)
-        except Exception as e:
-            return e
-
-    def _populate(self):
-        error_via_api = None
-        error_via_keep = None
-        should_try_keep = ((self._manifest_text is None) and
-                           util.keep_locator_pattern.match(
-                self._manifest_locator))
-        if ((self._manifest_text is None) and
-            util.signed_locator_pattern.match(self._manifest_locator)):
-            error_via_keep = self._populate_from_keep()
-        if self._manifest_text is None:
-            error_via_api = self._populate_from_api_server()
-            if error_via_api is not None and not should_try_keep:
-                raise error_via_api
-        if ((self._manifest_text is None) and
-            not error_via_keep and
-            should_try_keep):
-            # Looks like a keep locator, and we didn't already try keep above
-            error_via_keep = self._populate_from_keep()
-        if self._manifest_text is None:
-            # Nothing worked!
-            raise arvados.errors.NotFoundError(
-                ("Failed to retrieve collection '{}' " +
-                 "from either API server ({}) or Keep ({})."
-                 ).format(
-                    self._manifest_locator,
-                    error_via_api,
-                    error_via_keep))
-        self._streams = [sline.split()
-                         for sline in self._manifest_text.split("\n")
-                         if sline]
-
-    def _populate_first(orig_func):
-        # Decorator for methods that read actual Collection data.
-        @functools.wraps(orig_func)
-        def wrapper(self, *args, **kwargs):
-            if self._streams is None:
-                self._populate()
-            return orig_func(self, *args, **kwargs)
-        return wrapper
-
-    @_populate_first
-    def api_response(self):
-        """api_response() -> dict or None
-
-        Returns information about this Collection fetched from the API server.
-        If the Collection exists in Keep but not the API server, currently
-        returns None.  Future versions may provide a synthetic response.
-        """
-        return self._api_response
-
-    @_populate_first
-    def normalize(self):
-        # Rearrange streams
-        streams = {}
-        for s in self.all_streams():
-            for f in s.all_files():
-                streamname, filename = split(s.name() + "/" + f.name())
-                if streamname not in streams:
-                    streams[streamname] = {}
-                if filename not in streams[streamname]:
-                    streams[streamname][filename] = []
-                for r in f.segments:
-                    streams[streamname][filename].extend(s.locators_and_ranges(r.locator, r.range_size))
-
-        self._streams = [normalize_stream(s, streams[s])
-                         for s in sorted(streams)]
-
-        # Regenerate the manifest text based on the normalized streams
-        self._manifest_text = ''.join(
-            [StreamReader(stream, keep=self._my_keep()).manifest_text()
-             for stream in self._streams])
-
-    @_populate_first
-    def open(self, streampath, filename=None):
-        """open(streampath[, filename]) -> file-like object
-
-        Pass in the path of a file to read from the Collection, either as a
-        single string or as two separate stream name and file name arguments.
-        This method returns a file-like object to read that file.
-        """
-        if filename is None:
-            streampath, filename = split(streampath)
-        keep_client = self._my_keep()
-        for stream_s in self._streams:
-            stream = StreamReader(stream_s, keep_client,
-                                  num_retries=self.num_retries)
-            if stream.name() == streampath:
-                break
-        else:
-            raise ValueError("stream '{}' not found in Collection".
-                             format(streampath))
-        try:
-            return stream.files()[filename]
-        except KeyError:
-            raise ValueError("file '{}' not found in Collection stream '{}'".
-                             format(filename, streampath))
-
-    @_populate_first
-    def all_streams(self):
-        return [StreamReader(s, self._my_keep(), num_retries=self.num_retries)
-                for s in self._streams]
-
-    def all_files(self):
-        for s in self.all_streams():
-            for f in s.all_files():
-                yield f
-
-    @_populate_first
-    def manifest_text(self, strip=False, normalize=False):
-        if normalize:
-            cr = CollectionReader(self.manifest_text())
-            cr.normalize()
-            return cr.manifest_text(strip=strip, normalize=False)
-        elif strip:
-            return self.stripped_manifest()
-        else:
-            return self._manifest_text
-
-
-class _WriterFile(ArvadosFileBase):
+class _WriterFile(_FileLikeObjectBase):
     def __init__(self, coll_writer, name):
         super(_WriterFile, self).__init__(name, 'wb')
         self.dest = coll_writer
     def __init__(self, coll_writer, name):
         super(_WriterFile, self).__init__(name, 'wb')
         self.dest = coll_writer
@@ -253,16 +67,16 @@ class _WriterFile(ArvadosFileBase):
         super(_WriterFile, self).close()
         self.dest.finish_current_file()
 
         super(_WriterFile, self).close()
         self.dest.finish_current_file()
 
-    @ArvadosFileBase._before_close
+    @_FileLikeObjectBase._before_close
     def write(self, data):
         self.dest.write(data)
 
     def write(self, data):
         self.dest.write(data)
 
-    @ArvadosFileBase._before_close
+    @_FileLikeObjectBase._before_close
     def writelines(self, seq):
         for data in seq:
             self.write(data)
 
     def writelines(self, seq):
         for data in seq:
             self.write(data)
 
-    @ArvadosFileBase._before_close
+    @_FileLikeObjectBase._before_close
     def flush(self):
         self.dest.flush_data()
 
     def flush(self):
         self.dest.flush_data()
 
@@ -656,17 +470,26 @@ class ResumableCollectionWriter(CollectionWriter):
                 "resumable writer can't accept unsourced data")
         return super(ResumableCollectionWriter, self).write(data)
 
                 "resumable writer can't accept unsourced data")
         return super(ResumableCollectionWriter, self).write(data)
 
+
 ADD = "add"
 DEL = "del"
 MOD = "mod"
 ADD = "add"
 DEL = "del"
 MOD = "mod"
+TOK = "tok"
+FILE = "file"
+COLLECTION = "collection"
+
+class RichCollectionBase(CollectionBase):
+    """Base class for Collections and Subcollections.
 
 
-class SynchronizedCollectionBase(CollectionBase):
-    """Base class for Collections and Subcollections.  Implements the majority of
-    functionality relating to accessing items in the Collection."""
+    Implements the majority of functionality relating to accessing items in the
+    Collection.
+
+    """
 
     def __init__(self, parent=None):
         self.parent = parent
 
     def __init__(self, parent=None):
         self.parent = parent
-        self._modified = True
+        self._committed = False
+        self._callback = None
         self._items = {}
 
     def _my_api(self):
         self._items = {}
 
     def _my_api(self):
@@ -678,10 +501,7 @@ class SynchronizedCollectionBase(CollectionBase):
     def _my_block_manager(self):
         raise NotImplementedError()
 
     def _my_block_manager(self):
         raise NotImplementedError()
 
-    def _populate(self):
-        raise NotImplementedError()
-
-    def sync_mode(self):
+    def writable(self):
         raise NotImplementedError()
 
     def root_collection(self):
         raise NotImplementedError()
 
     def root_collection(self):
@@ -690,61 +510,95 @@ class SynchronizedCollectionBase(CollectionBase):
     def notify(self, event, collection, name, item):
         raise NotImplementedError()
 
     def notify(self, event, collection, name, item):
         raise NotImplementedError()
 
+    def stream_name(self):
+        raise NotImplementedError()
+
+    @must_be_writable
     @synchronized
     @synchronized
-    def find(self, path, create=False, create_collection=False):
-        """Recursively search the specified file path.  May return either a Collection
-        or ArvadosFile.
-
-        :create:
-          If true, create path components (i.e. Collections) that are
-          missing.  If "create" is False, return None if a path component is
-          not found.
-
-        :create_collection:
-          If the path is not found, "create" is True, and
-          "create_collection" is False, then create and return a new
-          ArvadosFile for the last path component.  If "create_collection" is
-          True, then create and return a new Collection for the last path
-          component.
+    def find_or_create(self, path, create_type):
+        """Recursively search the specified file path.
+
+        May return either a `Collection` or `ArvadosFile`.  If not found, will
+        create a new item at the specified path based on `create_type`.  Will
+        create intermediate subcollections needed to contain the final item in
+        the path.
+
+        :create_type:
+          One of `arvados.collection.FILE` or
+          `arvados.collection.COLLECTION`.  If the path is not found, and value
+          of create_type is FILE then create and return a new ArvadosFile for
+          the last path component.  If COLLECTION, then create and return a new
+          Collection for the last path component.
 
         """
 
         """
-        if create and self.sync_mode() == SYNC_READONLY:
-            raise IOError((errno.EROFS, "Collection is read only"))
-
-        p = path.split("/")
-        if p[0] == '.':
-            del p[0]
-
-        if p and p[0]:
-            item = self._items.get(p[0])
-            if len(p) == 1:
-                # item must be a file
-                if item is None and create:
+
+        pathcomponents = path.split("/", 1)
+        if pathcomponents[0]:
+            item = self._items.get(pathcomponents[0])
+            if len(pathcomponents) == 1:
+                if item is None:
                     # create new file
                     # create new file
-                    if create_collection:
-                        item = Subcollection(self)
+                    if create_type == COLLECTION:
+                        item = Subcollection(self, pathcomponents[0])
                     else:
                     else:
-                        item = ArvadosFile(self)
-                    self._items[p[0]] = item
-                    self._modified = True
-                    self.notify(ADD, self, p[0], item)
+                        item = ArvadosFile(self, pathcomponents[0])
+                    self._items[pathcomponents[0]] = item
+                    self._committed = False
+                    self.notify(ADD, self, pathcomponents[0], item)
                 return item
             else:
                 return item
             else:
-                if item is None and create:
+                if item is None:
                     # create new collection
                     # create new collection
-                    item = Subcollection(self)
-                    self._items[p[0]] = item
-                    self._modified = True
-                    self.notify(ADD, self, p[0], item)
-                del p[0]
-                if isinstance(item, SynchronizedCollectionBase):
-                    return item.find("/".join(p), create=create)
+                    item = Subcollection(self, pathcomponents[0])
+                    self._items[pathcomponents[0]] = item
+                    self._committed = False
+                    self.notify(ADD, self, pathcomponents[0], item)
+                if isinstance(item, RichCollectionBase):
+                    return item.find_or_create(pathcomponents[1], create_type)
                 else:
                 else:
-                    raise errors.ArgumentError("Interior path components must be subcollection")
+                    raise IOError(errno.ENOTDIR, "Not a directory: '%s'" % pathcomponents[0])
         else:
             return self
 
         else:
             return self
 
-    def open(self, path, mode):
+    @synchronized
+    def find(self, path):
+        """Recursively search the specified file path.
+
+        May return either a Collection or ArvadosFile.  Return None if not
+        found.
+
+        """
+        if not path:
+            raise errors.ArgumentError("Parameter 'path' is empty.")
+
+        pathcomponents = path.split("/", 1)
+        item = self._items.get(pathcomponents[0])
+        if len(pathcomponents) == 1:
+            return item
+        else:
+            if isinstance(item, RichCollectionBase):
+                if pathcomponents[1]:
+                    return item.find(pathcomponents[1])
+                else:
+                    return item
+            else:
+                raise IOError(errno.ENOTDIR, "Is not a directory: %s" % pathcomponents[0])
+
+    @synchronized
+    def mkdirs(self, path):
+        """Recursive subcollection create.
+
+        Like `os.makedirs()`.  Will create intermediate subcollections needed
+        to contain the leaf subcollection path.
+
+        """
+
+        if self.find(path) != None:
+            raise IOError(errno.EEXIST, "Directory or file exists: '%s'" % path)
+
+        return self.find_or_create(path, COLLECTION)
+
+    def open(self, path, mode="r"):
         """Open a file-like object for access.
 
         :path:
         """Open a file-like object for access.
 
         :path:
@@ -764,71 +618,76 @@ class SynchronizedCollectionBase(CollectionBase):
         """
         mode = mode.replace("b", "")
         if len(mode) == 0 or mode[0] not in ("r", "w", "a"):
         """
         mode = mode.replace("b", "")
         if len(mode) == 0 or mode[0] not in ("r", "w", "a"):
-            raise ArgumentError("Bad mode '%s'" % mode)
+            raise errors.ArgumentError("Bad mode '%s'" % mode)
         create = (mode != "r")
 
         create = (mode != "r")
 
-        if create and self.sync_mode() == SYNC_READONLY:
-            raise IOError((errno.EROFS, "Collection is read only"))
+        if create and not self.writable():
+            raise IOError(errno.EROFS, "Collection is read only")
 
 
-        f = self.find(path, create=create)
+        if create:
+            arvfile = self.find_or_create(path, FILE)
+        else:
+            arvfile = self.find(path)
 
 
-        if f is None:
-            raise IOError((errno.ENOENT, "File not found"))
-        if not isinstance(f, ArvadosFile):
-            raise IOError((errno.EISDIR, "Path must refer to a file."))
+        if arvfile is None:
+            raise IOError(errno.ENOENT, "File not found")
+        if not isinstance(arvfile, ArvadosFile):
+            raise IOError(errno.EISDIR, "Is a directory: %s" % path)
 
         if mode[0] == "w":
 
         if mode[0] == "w":
-            f.truncate(0)
+            arvfile.truncate(0)
+
+        name = os.path.basename(path)
 
         if mode == "r":
 
         if mode == "r":
-            return ArvadosFileReader(f, path, mode, num_retries=self.num_retries)
+            return ArvadosFileReader(arvfile, num_retries=self.num_retries)
         else:
         else:
-            return ArvadosFileWriter(f, path, mode, num_retries=self.num_retries)
+            return ArvadosFileWriter(arvfile, mode, num_retries=self.num_retries)
 
 
-    @synchronized
     def modified(self):
     def modified(self):
-        """Test if the collection (or any subcollection or file) has been modified
-        since it was created."""
-        if self._modified:
-            return True
-        for k,v in self._items.items():
-            if v.modified():
-                return True
-        return False
+        """Determine if the collection has been modified since last commited."""
+        return not self.committed()
 
     @synchronized
 
     @synchronized
-    def set_unmodified(self):
-        """Recursively clear modified flag"""
-        self._modified = False
+    def committed(self):
+        """Determine if the collection has been committed to the API server."""
+
+        if self._committed is False:
+            return False
+        for v in self._items.values():
+            if v.committed() is False:
+                return False
+        return True
+
+    @synchronized
+    def set_committed(self):
+        """Recursively set committed flag to True."""
+        self._committed = True
         for k,v in self._items.items():
         for k,v in self._items.items():
-            v.set_unmodified()
+            v.set_committed()
 
     @synchronized
     def __iter__(self):
         """Iterate over names of files and collections contained in this collection."""
 
     @synchronized
     def __iter__(self):
         """Iterate over names of files and collections contained in this collection."""
-        return self._items.keys().__iter__()
-
-    @synchronized
-    def iterkeys(self):
-        """Iterate over names of files and collections directly contained in this collection."""
-        return self._items.keys()
+        return iter(self._items.keys())
 
     @synchronized
     def __getitem__(self, k):
 
     @synchronized
     def __getitem__(self, k):
-        """Get a file or collection that is directly contained by this collection.  If
-        you want to search a path, use `find()` instead.
+        """Get a file or collection that is directly contained by this collection.
+
+        If you want to search a path, use `find()` instead.
+
         """
         return self._items[k]
 
     @synchronized
     def __contains__(self, k):
         """
         return self._items[k]
 
     @synchronized
     def __contains__(self, k):
-        """If there is a file or collection a directly contained by this collection
-        with name "k"."""
+        """Test if there is a file or collection a directly contained by this collection."""
         return k in self._items
 
     @synchronized
     def __len__(self):
         return k in self._items
 
     @synchronized
     def __len__(self):
-        """Get the number of items directly contained in this collection"""
+        """Get the number of items directly contained in this collection."""
         return len(self._items)
 
     @must_be_writable
         return len(self._items)
 
     @must_be_writable
@@ -836,7 +695,7 @@ class SynchronizedCollectionBase(CollectionBase):
     def __delitem__(self, p):
         """Delete an item by name which is directly contained by this collection."""
         del self._items[p]
     def __delitem__(self, p):
         """Delete an item by name which is directly contained by this collection."""
         del self._items[p]
-        self._modified = True
+        self._committed = False
         self.notify(DEL, self, p, None)
 
     @synchronized
         self.notify(DEL, self, p, None)
 
     @synchronized
@@ -855,128 +714,254 @@ class SynchronizedCollectionBase(CollectionBase):
         return self._items.items()
 
     def exists(self, path):
         return self._items.items()
 
     def exists(self, path):
-        """Test if there is a file or collection at "path" """
-        return self.find(path) != None
+        """Test if there is a file or collection at `path`."""
+        return self.find(path) is not None
 
     @must_be_writable
     @synchronized
 
     @must_be_writable
     @synchronized
-    def remove(self, path, rm_r=False):
+    def remove(self, path, recursive=False):
         """Remove the file or subcollection (directory) at `path`.
         """Remove the file or subcollection (directory) at `path`.
-        :rm_r:
+
+        :recursive:
           Specify whether to remove non-empty subcollections (True), or raise an error (False).
         """
           Specify whether to remove non-empty subcollections (True), or raise an error (False).
         """
-        p = path.split("/")
-        if p[0] == '.':
-            # Remove '.' from the front of the path
-            del p[0]
-
-        if len(p) > 0:
-            item = self._items.get(p[0])
-            if item is None:
-                raise IOError((errno.ENOENT, "File not found"))
-            if len(p) == 1:
-                if isinstance(self._items[p[0]], SynchronizedCollectionBase) and len(self._items[p[0]]) > 0 and not rm_r:
-                    raise IOError((errno.ENOTEMPTY, "Subcollection not empty"))
-                d = self._items[p[0]]
-                del self._items[p[0]]
-                self._modified = True
-                self.notify(DEL, self, p[0], d)
-            else:
-                del p[0]
-                item.remove("/".join(p))
+
+        if not path:
+            raise errors.ArgumentError("Parameter 'path' is empty.")
+
+        pathcomponents = path.split("/", 1)
+        item = self._items.get(pathcomponents[0])
+        if item is None:
+            raise IOError(errno.ENOENT, "File not found")
+        if len(pathcomponents) == 1:
+            if isinstance(self._items[pathcomponents[0]], RichCollectionBase) and len(self._items[pathcomponents[0]]) > 0 and not recursive:
+                raise IOError(errno.ENOTEMPTY, "Subcollection not empty")
+            deleteditem = self._items[pathcomponents[0]]
+            del self._items[pathcomponents[0]]
+            self._committed = False
+            self.notify(DEL, self, pathcomponents[0], deleteditem)
         else:
         else:
-            raise IOError((errno.ENOENT, "File not found"))
+            item.remove(pathcomponents[1])
 
 
-    def _cloneinto(self, target):
-        for k,v in self._items.items():
-            target._items[k] = v.clone(target)
+    def _clonefrom(self, source):
+        for k,v in source.items():
+            self._items[k] = v.clone(self, k)
 
     def clone(self):
         raise NotImplementedError()
 
     @must_be_writable
     @synchronized
 
     def clone(self):
         raise NotImplementedError()
 
     @must_be_writable
     @synchronized
-    def copy(self, source, target_path, source_collection=None, overwrite=False):
-        """Copy a file or subcollection to a new path in this collection.
-
-        :source:
-          An ArvadosFile, Subcollection, or string with a path to source file or subcollection
+    def add(self, source_obj, target_name, overwrite=False, reparent=False):
+        """Copy or move a file or subcollection to this collection.
 
 
-        :target_path:
-          Destination file or path.  If the target path already exists and is a
-          subcollection, the item will be placed inside the subcollection.  If
-          the target path already exists and is a file, this will raise an error
-          unless you specify `overwrite=True`.
+        :source_obj:
+          An ArvadosFile, or Subcollection object
 
 
-        :source_collection:
-          Collection to copy `source_path` from (default `self`)
+        :target_name:
+          Destination item name.  If the target name already exists and is a
+          file, this will raise an error unless you specify `overwrite=True`.
 
         :overwrite:
           Whether to overwrite target file if it already exists.
 
         :overwrite:
           Whether to overwrite target file if it already exists.
+
+        :reparent:
+          If True, source_obj will be moved from its parent collection to this collection.
+          If False, source_obj will be copied and the parent collection will be
+          unmodified.
+
         """
         """
+
+        if target_name in self and not overwrite:
+            raise IOError(errno.EEXIST, "File already exists")
+
+        modified_from = None
+        if target_name in self:
+            modified_from = self[target_name]
+
+        # Actually make the move or copy.
+        if reparent:
+            source_obj._reparent(self, target_name)
+            item = source_obj
+        else:
+            item = source_obj.clone(self, target_name)
+
+        self._items[target_name] = item
+        self._committed = False
+
+        if modified_from:
+            self.notify(MOD, self, target_name, (modified_from, item))
+        else:
+            self.notify(ADD, self, target_name, item)
+
+    def _get_src_target(self, source, target_path, source_collection, create_dest):
         if source_collection is None:
             source_collection = self
 
         if source_collection is None:
             source_collection = self
 
-        # Find the object to copy
+        # Find the object
         if isinstance(source, basestring):
             source_obj = source_collection.find(source)
             if source_obj is None:
         if isinstance(source, basestring):
             source_obj = source_collection.find(source)
             if source_obj is None:
-                raise IOError((errno.ENOENT, "File not found"))
-            sp = source.split("/")
+                raise IOError(errno.ENOENT, "File not found")
+            sourcecomponents = source.split("/")
         else:
             source_obj = source
         else:
             source_obj = source
-            sp = None
+            sourcecomponents = None
 
         # Find parent collection the target path
 
         # Find parent collection the target path
-        tp = target_path.split("/")
+        targetcomponents = target_path.split("/")
 
         # Determine the name to use.
 
         # Determine the name to use.
-        target_name = tp[-1] if tp[-1] else (sp[-1] if sp else None)
+        target_name = targetcomponents[-1] if targetcomponents[-1] else sourcecomponents[-1]
 
         if not target_name:
             raise errors.ArgumentError("Target path is empty and source is an object.  Cannot determine destination filename to use.")
 
 
         if not target_name:
             raise errors.ArgumentError("Target path is empty and source is an object.  Cannot determine destination filename to use.")
 
-        target_dir = self.find("/".join(tp[0:-1]), create=True, create_collection=True)
+        if create_dest:
+            target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
+        else:
+            if len(targetcomponents) > 1:
+                target_dir = self.find("/".join(targetcomponents[0:-1]))
+            else:
+                target_dir = self
 
 
-        with target_dir.lock:
-            if target_name in target_dir:
-                if isinstance(target_dir[target_name], SynchronizedCollectionBase) and sp:
-                    target_dir = target_dir[target_name]
-                    target_name = sp[-1]
-                elif not overwrite:
-                    raise IOError((errno.EEXIST, "File already exists"))
+        if target_dir is None:
+            raise IOError(errno.ENOENT, "Target directory not found.")
 
 
-            mod = None
-            if target_name in target_dir:
-                mod = target_dir[target_name]
+        if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
+            target_dir = target_dir[target_name]
+            target_name = sourcecomponents[-1]
 
 
-            # Actually make the copy.
-            dup = source_obj.clone(target_dir)
-            target_dir._items[target_name] = dup
-            target_dir._modified = True
+        return (source_obj, target_dir, target_name)
 
 
-        if mod:
-            self.notify(MOD, target_dir, target_name, (mod, dup))
-        else:
-            self.notify(ADD, target_dir, target_name, dup)
+    @must_be_writable
+    @synchronized
+    def copy(self, source, target_path, source_collection=None, overwrite=False):
+        """Copy a file or subcollection to a new path in this collection.
+
+        :source:
+          A string with a path to source file or subcollection, or an actual ArvadosFile or Subcollection object.
+
+        :target_path:
+          Destination file or path.  If the target path already exists and is a
+          subcollection, the item will be placed inside the subcollection.  If
+          the target path already exists and is a file, this will raise an error
+          unless you specify `overwrite=True`.
+
+        :source_collection:
+          Collection to copy `source_path` from (default `self`)
+
+        :overwrite:
+          Whether to overwrite target file if it already exists.
+        """
+
+        source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
+        target_dir.add(source_obj, target_name, overwrite, False)
+
+    @must_be_writable
+    @synchronized
+    def rename(self, source, target_path, source_collection=None, overwrite=False):
+        """Move a file or subcollection from `source_collection` to a new path in this collection.
+
+        :source:
+          A string with a path to source file or subcollection.
+
+        :target_path:
+          Destination file or path.  If the target path already exists and is a
+          subcollection, the item will be placed inside the subcollection.  If
+          the target path already exists and is a file, this will raise an error
+          unless you specify `overwrite=True`.
+
+        :source_collection:
+          Collection to copy `source_path` from (default `self`)
+
+        :overwrite:
+          Whether to overwrite target file if it already exists.
+        """
+
+        source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, False)
+        if not source_obj.writable():
+            raise IOError(errno.EROFS, "Source collection is read only.")
+        target_dir.add(source_obj, target_name, overwrite, True)
+
+    def portable_manifest_text(self, stream_name="."):
+        """Get the manifest text for this collection, sub collections and files.
+
+        This method does not flush outstanding blocks to Keep.  It will return
+        a normalized manifest with access tokens stripped.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        """
+        return self._get_manifest_text(stream_name, True, True)
 
     @synchronized
 
     @synchronized
-    def manifest_text(self, strip=False, normalize=False):
+    def manifest_text(self, stream_name=".", strip=False, normalize=False):
         """Get the manifest text for this collection, sub collections and files.
 
         """Get the manifest text for this collection, sub collections and files.
 
+        This method will flush outstanding blocks to Keep.  By default, it will
+        not normalize an unmodified manifest or strip access tokens.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
         :strip:
           If True, remove signing tokens from block locators if present.
         :strip:
           If True, remove signing tokens from block locators if present.
-          If False, block locators are left unchanged.
+          If False (default), block locators are left unchanged.
 
         :normalize:
           If True, always export the manifest text in normalized form
 
         :normalize:
           If True, always export the manifest text in normalized form
-          even if the Collection is not modified.  If False and the collection
+          even if the Collection is not modified.  If False (default) and the collection
           is not modified, return the original manifest text even if it is not
           in normalized form.
 
         """
           is not modified, return the original manifest text even if it is not
           in normalized form.
 
         """
-        if self.modified() or self._manifest_text is None or normalize:
-            return export_manifest(self, stream_name=".", portable_locators=strip)
+
+        self._my_block_manager().commit_all()
+        return self._get_manifest_text(stream_name, strip, normalize)
+
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize):
+        """Get the manifest text for this collection, sub collections and files.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        :strip:
+          If True, remove signing tokens from block locators if present.
+          If False (default), block locators are left unchanged.
+
+        :normalize:
+          If True, always export the manifest text in normalized form
+          even if the Collection is not modified.  If False (default) and the collection
+          is not modified, return the original manifest text even if it is not
+          in normalized form.
+
+        """
+
+        if not self.committed() or self._manifest_text is None or normalize:
+            stream = {}
+            buf = []
+            sorted_keys = sorted(self.keys())
+            for filename in [s for s in sorted_keys if isinstance(self[s], ArvadosFile)]:
+                # Create a stream per file `k`
+                arvfile = self[filename]
+                filestream = []
+                for segment in arvfile.segments():
+                    loc = segment.locator
+                    if arvfile.parent._my_block_manager().is_bufferblock(loc):
+                        loc = arvfile.parent._my_block_manager().get_bufferblock(loc).locator()
+                    if strip:
+                        loc = KeepLocator(loc).stripped()
+                    filestream.append(LocatorAndRange(loc, KeepLocator(loc).size,
+                                         segment.segment_offset, segment.range_size))
+                stream[filename] = filestream
+            if stream:
+                buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
+            for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
+                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True))
+            return "".join(buf)
         else:
             if strip:
                 return self.stripped_manifest()
         else:
             if strip:
                 return self.stripped_manifest()
@@ -985,40 +970,48 @@ class SynchronizedCollectionBase(CollectionBase):
 
     @synchronized
     def diff(self, end_collection, prefix=".", holding_collection=None):
 
     @synchronized
     def diff(self, end_collection, prefix=".", holding_collection=None):
-        """
-        Generate list of add/modify/delete actions which, when given to `apply`, will
-        change `self` to match `end_collection`
+        """Generate list of add/modify/delete actions.
+
+        When given to `apply`, will change `self` to match `end_collection`
+
         """
         changes = []
         if holding_collection is None:
         """
         changes = []
         if holding_collection is None:
-            holding_collection = CollectionRoot(api_client=self._my_api(), keep_client=self._my_keep(), sync=SYNC_READONLY)
+            holding_collection = Collection(api_client=self._my_api(), keep_client=self._my_keep())
         for k in self:
             if k not in end_collection:
         for k in self:
             if k not in end_collection:
-               changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection)))
+               changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection, "")))
         for k in end_collection:
             if k in self:
                 if isinstance(end_collection[k], Subcollection) and isinstance(self[k], Subcollection):
                     changes.extend(self[k].diff(end_collection[k], os.path.join(prefix, k), holding_collection))
                 elif end_collection[k] != self[k]:
         for k in end_collection:
             if k in self:
                 if isinstance(end_collection[k], Subcollection) and isinstance(self[k], Subcollection):
                     changes.extend(self[k].diff(end_collection[k], os.path.join(prefix, k), holding_collection))
                 elif end_collection[k] != self[k]:
-                    changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection), end_collection[k].clone(holding_collection)))
+                    changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection, ""), end_collection[k].clone(holding_collection, "")))
+                else:
+                    changes.append((TOK, os.path.join(prefix, k), self[k].clone(holding_collection, ""), end_collection[k].clone(holding_collection, "")))
             else:
             else:
-                changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection)))
+                changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection, "")))
         return changes
 
     @must_be_writable
     @synchronized
     def apply(self, changes):
         return changes
 
     @must_be_writable
     @synchronized
     def apply(self, changes):
+        """Apply changes from `diff`.
+
+        If a change conflicts with a local change, it will be saved to an
+        alternate path indicating the conflict.
+
         """
         """
-        Apply changes from `diff`.  If a change conflicts with a local change, it
-        will be saved to an alternate path indicating the conflict.
-        """
-        for c in changes:
-            path = c[1]
-            initial = c[2]
+        if changes:
+            self._committed = False
+        for change in changes:
+            event_type = change[0]
+            path = change[1]
+            initial = change[2]
             local = self.find(path)
             local = self.find(path)
-            conflictpath = "%s~conflict-%s~" % (path, time.strftime("%Y-%m-%d-%H:%M:%S",
+            conflictpath = "%s~%s~conflict~" % (path, time.strftime("%Y%m%d-%H%M%S",
                                                                     time.gmtime()))
                                                                     time.gmtime()))
-            if c[0] == ADD:
+            if event_type == ADD:
                 if local is None:
                     # No local file at path, safe to copy over new file
                     self.copy(initial, path)
                 if local is None:
                     # No local file at path, safe to copy over new file
                     self.copy(initial, path)
@@ -1026,38 +1019,57 @@ class SynchronizedCollectionBase(CollectionBase):
                     # There is already local file and it is different:
                     # save change to conflict file.
                     self.copy(initial, conflictpath)
                     # There is already local file and it is different:
                     # save change to conflict file.
                     self.copy(initial, conflictpath)
-            elif c[0] == MOD:
+            elif event_type == MOD or event_type == TOK:
+                final = change[3]
                 if local == initial:
                     # Local matches the "initial" item so it has not
                     # changed locally and is safe to update.
                 if local == initial:
                     # Local matches the "initial" item so it has not
                     # changed locally and is safe to update.
-                    if isinstance(local, ArvadosFile) and isinstance(c[3], ArvadosFile):
+                    if isinstance(local, ArvadosFile) and isinstance(final, ArvadosFile):
                         # Replace contents of local file with new contents
                         # Replace contents of local file with new contents
-                        local.replace_contents(c[3])
+                        local.replace_contents(final)
                     else:
                         # Overwrite path with new item; this can happen if
                         # path was a file and is now a collection or vice versa
                     else:
                         # Overwrite path with new item; this can happen if
                         # path was a file and is now a collection or vice versa
-                        self.copy(c[3], path, overwrite=True)
+                        self.copy(final, path, overwrite=True)
                 else:
                     # Local is missing (presumably deleted) or local doesn't
                     # match the "start" value, so save change to conflict file
                 else:
                     # Local is missing (presumably deleted) or local doesn't
                     # match the "start" value, so save change to conflict file
-                    self.copy(c[3], conflictpath)
-            elif c[0] == DEL:
+                    self.copy(final, conflictpath)
+            elif event_type == DEL:
                 if local == initial:
                     # Local item matches "initial" value, so it is safe to remove.
                 if local == initial:
                     # Local item matches "initial" value, so it is safe to remove.
-                    self.remove(path, rm_r=True)
+                    self.remove(path, recursive=True)
                 # else, the file is modified or already removed, in either
                 # case we don't want to try to remove it.
 
     def portable_data_hash(self):
         """Get the portable data hash for this collection's manifest."""
                 # else, the file is modified or already removed, in either
                 # case we don't want to try to remove it.
 
     def portable_data_hash(self):
         """Get the portable data hash for this collection's manifest."""
-        stripped = self.manifest_text(strip=True)
+        stripped = self.portable_manifest_text()
         return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
         return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
+    @synchronized
+    def subscribe(self, callback):
+        if self._callback is None:
+            self._callback = callback
+        else:
+            raise errors.ArgumentError("A callback is already set on this collection.")
+
+    @synchronized
+    def unsubscribe(self):
+        if self._callback is not None:
+            self._callback = None
+
+    @synchronized
+    def notify(self, event, collection, name, item):
+        if self._callback:
+            self._callback(event, collection, name, item)
+        self.root_collection().notify(event, collection, name, item)
+
     @synchronized
     def __eq__(self, other):
         if other is self:
             return True
     @synchronized
     def __eq__(self, other):
         if other is self:
             return True
-        if not isinstance(other, SynchronizedCollectionBase):
+        if not isinstance(other, RichCollectionBase):
             return False
         if len(self._items) != len(other):
             return False
             return False
         if len(self._items) != len(other):
             return False
@@ -1071,11 +1083,22 @@ class SynchronizedCollectionBase(CollectionBase):
     def __ne__(self, other):
         return not self.__eq__(other)
 
     def __ne__(self, other):
         return not self.__eq__(other)
 
-class CollectionRoot(SynchronizedCollectionBase):
-    """Represents the root of an Arvados Collection, which may be associated with
-    an API server Collection record.
+    @synchronized
+    def flush(self):
+        """Flush bufferblocks to Keep."""
+        for e in self.values():
+            e.flush()
+
+
+class Collection(RichCollectionBase):
+    """Represents the root of an Arvados Collection.
+
+    This class is threadsafe.  The root collection object, all subcollections
+    and files are protected by a single lock (i.e. each access locks the entire
+    collection).
 
 
-    Brief summary of useful methods:
+    Brief summary of
+    useful methods:
 
     :To read an existing file:
       `c.open("myfile", "r")`
 
     :To read an existing file:
       `c.open("myfile", "r")`
@@ -1101,21 +1124,21 @@ class CollectionRoot(SynchronizedCollectionBase):
     :To merge remote changes into this object:
       `c.update()`
 
     :To merge remote changes into this object:
       `c.update()`
 
-    This class is threadsafe.  The root collection object, all subcollections
-    and files are protected by a single lock (i.e. each access locks the entire
-    collection).
+    Must be associated with an API server Collection record (during
+    initialization, or using `save_new`) to use `save` or `update`
 
     """
 
     def __init__(self, manifest_locator_or_text=None,
 
     """
 
     def __init__(self, manifest_locator_or_text=None,
-                 parent=None,
-                 apiconfig=None,
                  api_client=None,
                  keep_client=None,
                  num_retries=None,
                  api_client=None,
                  keep_client=None,
                  num_retries=None,
-                 block_manager=None,
-                 sync=None):
-        """:manifest_locator_or_text:
+                 parent=None,
+                 apiconfig=None,
+                 block_manager=None):
+        """Collection constructor.
+
+        :manifest_locator_or_text:
           One of Arvados collection UUID, block locator of
           a manifest, raw manifest text, or None (to create an empty collection).
         :parent:
           One of Arvados collection UUID, block locator of
           a manifest, raw manifest text, or None (to create an empty collection).
         :parent:
@@ -1132,19 +1155,9 @@ class CollectionRoot(SynchronizedCollectionBase):
           the number of retries for API and Keep requests.
         :block_manager:
           the block manager to use.  If not specified, create one.
           the number of retries for API and Keep requests.
         :block_manager:
           the block manager to use.  If not specified, create one.
-        :sync:
-          Set synchronization policy with API server collection record.
-          :SYNC_READONLY:
-            Collection is read only.  No synchronization.  This mode will
-            also forego locking, which gives better performance.
-          :SYNC_EXPLICIT:
-            Collection is writable.  Synchronize on explicit request via `update()` or `save()`
-          :SYNC_LIVE:
-            Collection is writable.  Synchronize with server in response to
-            background websocket events, on block write, or on file close.
 
         """
 
         """
-        super(CollectionRoot, self).__init__(parent)
+        super(Collection, self).__init__(parent)
         self._api_client = api_client
         self._keep_client = keep_client
         self._block_manager = block_manager
         self._api_client = api_client
         self._keep_client = keep_client
         self._block_manager = block_manager
@@ -1154,17 +1167,13 @@ class CollectionRoot(SynchronizedCollectionBase):
         else:
             self._config = config.settings()
 
         else:
             self._config = config.settings()
 
-        self.num_retries = num_retries
+        self.num_retries = num_retries if num_retries is not None else 0
         self._manifest_locator = None
         self._manifest_text = None
         self._api_response = None
         self._manifest_locator = None
         self._manifest_text = None
         self._api_response = None
+        self._past_versions = set()
 
 
-        if sync is None:
-            raise errors.ArgumentError("Must specify sync mode")
-
-        self._sync = sync
         self.lock = threading.RLock()
         self.lock = threading.RLock()
-        self.callbacks = []
         self.events = None
 
         if manifest_locator_or_text:
         self.events = None
 
         if manifest_locator_or_text:
@@ -1176,52 +1185,53 @@ class CollectionRoot(SynchronizedCollectionBase):
                 self._manifest_text = manifest_locator_or_text
             else:
                 raise errors.ArgumentError(
                 self._manifest_text = manifest_locator_or_text
             else:
                 raise errors.ArgumentError(
-                    "Argument to CollectionReader must be a manifest or a collection UUID")
-
-            self._populate()
-
-            if self._sync == SYNC_LIVE:
-                if not self._has_collection_uuid():
-                    raise errors.ArgumentError("Cannot SYNC_LIVE associated with a collection uuid")
-                self.events = events.subscribe(arvados.api(apiconfig=self._config),
-                                               [["object_uuid", "=", self._manifest_locator]],
-                                               self.on_message)
+                    "Argument to CollectionReader is not a manifest or a collection UUID")
 
 
+            try:
+                self._populate()
+            except (IOError, errors.SyntaxError) as e:
+                raise errors.ArgumentError("Error processing manifest text: %s", e)
 
     def root_collection(self):
         return self
 
 
     def root_collection(self):
         return self
 
-    def sync_mode(self):
-        return self._sync
+    def stream_name(self):
+        return "."
 
 
-    def on_message(self, event):
-        if event.get("object_uuid") == self._manifest_locator:
-            self.update()
+    def writable(self):
+        return True
 
 
-    @staticmethod
-    def create(name, owner_uuid=None, sync=SYNC_EXPLICIT, apiconfig=None):
-        """Create a new empty Collection with associated collection record."""
-        c = Collection(sync=SYNC_EXPLICIT, apiconfig=apiconfig)
-        c.save_new(name, owner_uuid=owner_uuid, ensure_unique_name=True)
-        if sync == SYNC_LIVE:
-            c.events = events.subscribe(arvados.api(apiconfig=self._config), [["object_uuid", "=", c._manifest_locator]], c.on_message)
-        return c
+    @synchronized
+    def known_past_version(self, modified_at_and_portable_data_hash):
+        return modified_at_and_portable_data_hash in self._past_versions
 
     @synchronized
     @retry_method
     def update(self, other=None, num_retries=None):
 
     @synchronized
     @retry_method
     def update(self, other=None, num_retries=None):
+        """Merge the latest collection on the API server with the current collection."""
+
         if other is None:
             if self._manifest_locator is None:
                 raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
         if other is None:
             if self._manifest_locator is None:
                 raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
-            n = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
-            other = import_collection(n["manifest_text"])
-        baseline = import_collection(self._manifest_text)
-        self.apply(other.diff(baseline))
+            response = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
+            if (self.known_past_version((response.get("modified_at"), response.get("portable_data_hash"))) and
+                response.get("portable_data_hash") != self.portable_data_hash()):
+                # The record on the server is different from our current one, but we've seen it before,
+                # so ignore it because it's already been merged.
+                # However, if it's the same as our current record, proceed with the update, because we want to update
+                # our tokens.
+                return
+            else:
+                self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash")))
+            other = CollectionReader(response["manifest_text"])
+        baseline = CollectionReader(self._manifest_text)
+        self.apply(baseline.diff(other))
+        self._manifest_text = self.manifest_text()
 
     @synchronized
     def _my_api(self):
         if self._api_client is None:
 
     @synchronized
     def _my_api(self):
         if self._api_client is None:
-            self._api_client = arvados.SafeApi(self._config)
+            self._api_client = ThreadSafeApiCache(self._config)
             self._keep_client = self._api_client.keep
         return self._api_client
 
             self._keep_client = self._api_client.keep
         return self._api_client
 
@@ -1231,15 +1241,19 @@ class CollectionRoot(SynchronizedCollectionBase):
             if self._api_client is None:
                 self._my_api()
             else:
             if self._api_client is None:
                 self._my_api()
             else:
-                self._keep_client = KeepClient(api=self._api_client)
+                self._keep_client = KeepClient(api_client=self._api_client)
         return self._keep_client
 
     @synchronized
     def _my_block_manager(self):
         if self._block_manager is None:
         return self._keep_client
 
     @synchronized
     def _my_block_manager(self):
         if self._block_manager is None:
-            self._block_manager = BlockManager(self._my_keep())
+            self._block_manager = _BlockManager(self._my_keep())
         return self._block_manager
 
         return self._block_manager
 
+    def _remember_api_response(self, response):
+        self._api_response = response
+        self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash")))
+
     def _populate_from_api_server(self):
         # As in KeepClient itself, we must wait until the last
         # possible moment to instantiate an API client, in order to
     def _populate_from_api_server(self):
         # As in KeepClient itself, we must wait until the last
         # possible moment to instantiate an API client, in order to
@@ -1249,9 +1263,9 @@ class CollectionRoot(SynchronizedCollectionBase):
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
         try:
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
         try:
-            self._api_response = self._my_api().collections().get(
+            self._remember_api_response(self._my_api().collections().get(
                 uuid=self._manifest_locator).execute(
                 uuid=self._manifest_locator).execute(
-                    num_retries=self.num_retries)
+                    num_retries=self.num_retries))
             self._manifest_text = self._api_response['manifest_text']
             return None
         except Exception as e:
             self._manifest_text = self._api_response['manifest_text']
             return None
         except Exception as e:
@@ -1290,7 +1304,7 @@ class CollectionRoot(SynchronizedCollectionBase):
             error_via_keep = self._populate_from_keep()
         if self._manifest_text is None:
             # Nothing worked!
             error_via_keep = self._populate_from_keep()
         if self._manifest_text is None:
             # Nothing worked!
-            raise arvados.errors.NotFoundError(
+            raise errors.NotFoundError(
                 ("Failed to retrieve collection '{}' " +
                  "from either API server ({}) or Keep ({})."
                  ).format(
                 ("Failed to retrieve collection '{}' " +
                  "from either API server ({}) or Keep ({})."
                  ).format(
@@ -1299,12 +1313,8 @@ class CollectionRoot(SynchronizedCollectionBase):
                     error_via_keep))
         # populate
         self._baseline_manifest = self._manifest_text
                     error_via_keep))
         # populate
         self._baseline_manifest = self._manifest_text
-        import_manifest(self._manifest_text, self)
+        self._import_manifest(self._manifest_text)
 
 
-        if self._sync == SYNC_READONLY:
-            # Now that we're populated, knowing that this will be readonly,
-            # forego any further locking.
-            self.lock = NoopLock()
 
     def _has_collection_uuid(self):
         return self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator)
 
     def _has_collection_uuid(self):
         return self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator)
@@ -1313,80 +1323,137 @@ class CollectionRoot(SynchronizedCollectionBase):
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
-        """Support scoped auto-commit in a with: block"""
-        if self._sync != SYNC_READONLY and self._has_collection_uuid():
-            self.save()
+        """Support scoped auto-commit in a with: block."""
+        if exc_type is None:
+            if self.writable() and self._has_collection_uuid():
+                self.save()
+        self.stop_threads()
+
+    def stop_threads(self):
         if self._block_manager is not None:
             self._block_manager.stop_threads()
 
     @synchronized
         if self._block_manager is not None:
             self._block_manager.stop_threads()
 
     @synchronized
-    def clone(self, new_parent=None, new_sync=SYNC_READONLY, new_config=None):
+    def manifest_locator(self):
+        """Get the manifest locator, if any.
+
+        The manifest locator will be set when the collection is loaded from an
+        API server record or the portable data hash of a manifest.
+
+        The manifest locator will be None if the collection is newly created or
+        was created directly from manifest text.  The method `save_new()` will
+        assign a manifest locator.
+
+        """
+        return self._manifest_locator
+
+    @synchronized
+    def clone(self, new_parent=None, new_name=None, readonly=False, new_config=None):
         if new_config is None:
             new_config = self._config
         if new_config is None:
             new_config = self._config
-        c = CollectionRoot(parent=new_parent, apiconfig=new_config, sync=new_sync)
-        if new_sync == SYNC_READONLY:
-            c.lock = NoopLock()
-        c._items = {}
-        self._cloneinto(c)
-        return c
+        if readonly:
+            newcollection = CollectionReader(parent=new_parent, apiconfig=new_config)
+        else:
+            newcollection = Collection(parent=new_parent, apiconfig=new_config)
+
+        newcollection._clonefrom(self)
+        return newcollection
 
     @synchronized
     def api_response(self):
 
     @synchronized
     def api_response(self):
-        """
-        api_response() -> dict or None
+        """Returns information about this Collection fetched from the API server.
 
 
-        Returns information about this Collection fetched from the API server.
         If the Collection exists in Keep but not the API server, currently
         returns None.  Future versions may provide a synthetic response.
         If the Collection exists in Keep but not the API server, currently
         returns None.  Future versions may provide a synthetic response.
+
         """
         return self._api_response
 
         """
         return self._api_response
 
+    def find_or_create(self, path, create_type):
+        """See `RichCollectionBase.find_or_create`"""
+        if path == ".":
+            return self
+        else:
+            return super(Collection, self).find_or_create(path[2:] if path.startswith("./") else path, create_type)
+
+    def find(self, path):
+        """See `RichCollectionBase.find`"""
+        if path == ".":
+            return self
+        else:
+            return super(Collection, self).find(path[2:] if path.startswith("./") else path)
+
+    def remove(self, path, recursive=False):
+        """See `RichCollectionBase.remove`"""
+        if path == ".":
+            raise errors.ArgumentError("Cannot remove '.'")
+        else:
+            return super(Collection, self).remove(path[2:] if path.startswith("./") else path, recursive)
+
     @must_be_writable
     @synchronized
     @retry_method
     def save(self, merge=True, num_retries=None):
     @must_be_writable
     @synchronized
     @retry_method
     def save(self, merge=True, num_retries=None):
-        """Commit pending buffer blocks to Keep, merge with remote record (if
-        update=True), write the manifest to Keep, and update the collection
-        record.  Will raise AssertionError if not associated with a collection
-        record on the API server.  If you want to save a manifest to Keep only,
-        see `save_new()`.
+        """Save collection to an existing collection record.
+
+        Commit pending buffer blocks to Keep, merge with remote record (if
+        merge=True, the default), and update the collection record.  Returns
+        the current manifest text.
+
+        Will raise AssertionError if not associated with a collection record on
+        the API server.  If you want to save a manifest to Keep only, see
+        `save_new()`.
 
 
-        :update:
+        :merge:
           Update and merge remote changes before saving.  Otherwise, any
           remote changes will be ignored and overwritten.
 
           Update and merge remote changes before saving.  Otherwise, any
           remote changes will be ignored and overwritten.
 
+        :num_retries:
+          Retry count on API calls (if None,  use the collection default)
+
         """
         """
-        if self.modified():
+        if not self.committed():
             if not self._has_collection_uuid():
             if not self._has_collection_uuid():
-                raise AssertionError("Collection manifest_locator must be a collection uuid.  Use save_as() for new collections.")
+                raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
+
             self._my_block_manager().commit_all()
             self._my_block_manager().commit_all()
+
             if merge:
                 self.update()
             if merge:
                 self.update()
-            self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
 
 
-            mt = self.manifest_text(strip=False)
-            self._api_response = self._my_api().collections().update(
+            text = self.manifest_text(strip=False)
+            self._remember_api_response(self._my_api().collections().update(
                 uuid=self._manifest_locator,
                 uuid=self._manifest_locator,
-                body={'manifest_text': mt}
+                body={'manifest_text': text}
                 ).execute(
                 ).execute(
-                    num_retries=num_retries)
-            self._manifest_text = mt
-            self.set_unmodified()
+                    num_retries=num_retries))
+            self._manifest_text = self._api_response["manifest_text"]
+            self.set_committed()
+
+        return self._manifest_text
+
 
     @must_be_writable
     @synchronized
     @retry_method
 
     @must_be_writable
     @synchronized
     @retry_method
-    def save_new(self, name=None, create_collection_record=True, owner_uuid=None, ensure_unique_name=False, num_retries=None):
-        """Commit pending buffer blocks to Keep, write the manifest to Keep, and create
-        a new collection record (if create_collection_record True).  After
-        creating a new collection record, this Collection object will be
-        associated with the new record for `save()` and SYNC_LIVE updates.
+    def save_new(self, name=None,
+                 create_collection_record=True,
+                 owner_uuid=None,
+                 ensure_unique_name=False,
+                 num_retries=None):
+        """Save collection to a new collection record.
+
+        Commit pending buffer blocks to Keep and, when create_collection_record
+        is True (default), create a new collection record.  After creating a
+        new collection record, this Collection object will be associated with
+        the new record used by `save()`.  Returns the current manifest text.
 
         :name:
           The collection name.
 
 
         :name:
           The collection name.
 
-        :keep_only:
-          Only save the manifest to keep, do not create a collection record.
+        :create_collection_record:
+           If True, create a collection record on the API server.
+           If False, only commit blocks to Keep and return the manifest text.
 
         :owner_uuid:
           the user, or project uuid that will own this collection.
 
         :owner_uuid:
           the user, or project uuid that will own this collection.
@@ -1397,72 +1464,122 @@ class CollectionRoot(SynchronizedCollectionBase):
           if it conflicts with a collection with the same name and owner.  If
           False, a name conflict will result in an error.
 
           if it conflicts with a collection with the same name and owner.  If
           False, a name conflict will result in an error.
 
+        :num_retries:
+          Retry count on API calls (if None,  use the collection default)
+
         """
         self._my_block_manager().commit_all()
         """
         self._my_block_manager().commit_all()
-        self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
-        mt = self.manifest_text(strip=False)
+        text = self.manifest_text(strip=False)
 
         if create_collection_record:
             if name is None:
 
         if create_collection_record:
             if name is None:
-                name = "Collection created %s" % (time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()))
+                name = "New collection"
+                ensure_unique_name = True
 
 
-            body = {"manifest_text": mt,
+            body = {"manifest_text": text,
                     "name": name}
             if owner_uuid:
                 body["owner_uuid"] = owner_uuid
 
                     "name": name}
             if owner_uuid:
                 body["owner_uuid"] = owner_uuid
 
-            self._api_response = self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)
-
-            if self.events:
-                self.events.unsubscribe(filters=[["object_uuid", "=", self._manifest_locator]])
+            self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
+            text = self._api_response["manifest_text"]
 
             self._manifest_locator = self._api_response["uuid"]
 
 
             self._manifest_locator = self._api_response["uuid"]
 
-            if self.events:
-                self.events.subscribe(filters=[["object_uuid", "=", self._manifest_locator]])
+            self._manifest_text = text
+            self.set_committed()
 
 
-        self._manifest_text = mt
-        self.set_unmodified()
+        return text
 
     @synchronized
 
     @synchronized
-    def subscribe(self, callback):
-        self.callbacks.append(callback)
+    def _import_manifest(self, manifest_text):
+        """Import a manifest into a `Collection`.
 
 
-    @synchronized
-    def unsubscribe(self, callback):
-        self.callbacks.remove(callback)
+        :manifest_text:
+          The manifest text to import from.
 
 
-    @synchronized
-    def notify(self, event, collection, name, item):
-        for c in self.callbacks:
-            c(event, collection, name, item)
+        """
+        if len(self) > 0:
+            raise ArgumentError("Can only import manifest into an empty collection")
 
 
-def ReadOnlyCollection(*args, **kwargs):
-    kwargs["sync"] = SYNC_READONLY
-    return CollectionRoot(*args, **kwargs)
+        STREAM_NAME = 0
+        BLOCKS = 1
+        SEGMENTS = 2
+
+        stream_name = None
+        state = STREAM_NAME
+
+        for token_and_separator in re.finditer(r'(\S+)(\s+|$)', manifest_text):
+            tok = token_and_separator.group(1)
+            sep = token_and_separator.group(2)
+
+            if state == STREAM_NAME:
+                # starting a new stream
+                stream_name = tok.replace('\\040', ' ')
+                blocks = []
+                segments = []
+                streamoffset = 0L
+                state = BLOCKS
+                self.find_or_create(stream_name, COLLECTION)
+                continue
+
+            if state == BLOCKS:
+                block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
+                if block_locator:
+                    blocksize = long(block_locator.group(1))
+                    blocks.append(Range(tok, streamoffset, blocksize, 0))
+                    streamoffset += blocksize
+                else:
+                    state = SEGMENTS
+
+            if state == SEGMENTS:
+                file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
+                if file_segment:
+                    pos = long(file_segment.group(1))
+                    size = long(file_segment.group(2))
+                    name = file_segment.group(3).replace('\\040', ' ')
+                    filepath = os.path.join(stream_name, name)
+                    afile = self.find_or_create(filepath, FILE)
+                    if isinstance(afile, ArvadosFile):
+                        afile.add_segment(blocks, pos, size)
+                    else:
+                        raise errors.SyntaxError("File %s conflicts with stream of the same name.", filepath)
+                else:
+                    # error!
+                    raise errors.SyntaxError("Invalid manifest format, expected file segment but did not match format: '%s'" % tok)
 
 
-def WritableCollection(*args, **kwargs):
-    kwargs["sync"] = SYNC_EXPLICIT
-    return CollectionRoot(*args, **kwargs)
+            if sep == "\n":
+                stream_name = None
+                state = STREAM_NAME
 
 
-def LiveCollection(*args, **kwargs):
-    kwargs["sync"] = SYNC_LIVE
-    return CollectionRoot(*args, **kwargs)
+        self.set_committed()
 
 
+    @synchronized
+    def notify(self, event, collection, name, item):
+        if self._callback:
+            self._callback(event, collection, name, item)
 
 
-class Subcollection(SynchronizedCollectionBase):
+
+class Subcollection(RichCollectionBase):
     """This is a subdirectory within a collection that doesn't have its own API
     """This is a subdirectory within a collection that doesn't have its own API
-    server record.  It falls under the umbrella of the root collection."""
+    server record.
+
+    Subcollection locking falls under the umbrella lock of its root collection.
 
 
-    def __init__(self, parent):
+    """
+
+    def __init__(self, parent, name):
         super(Subcollection, self).__init__(parent)
         self.lock = self.root_collection().lock
         super(Subcollection, self).__init__(parent)
         self.lock = self.root_collection().lock
+        self._manifest_text = None
+        self.name = name
+        self.num_retries = parent.num_retries
 
     def root_collection(self):
         return self.parent.root_collection()
 
 
     def root_collection(self):
         return self.parent.root_collection()
 
-    def sync_mode(self):
-        return self.root_collection().sync_mode()
+    def writable(self):
+        return self.root_collection().writable()
 
     def _my_api(self):
         return self.root_collection()._my_api()
 
     def _my_api(self):
         return self.root_collection()._my_api()
@@ -1473,149 +1590,93 @@ class Subcollection(SynchronizedCollectionBase):
     def _my_block_manager(self):
         return self.root_collection()._my_block_manager()
 
     def _my_block_manager(self):
         return self.root_collection()._my_block_manager()
 
-    def _populate(self):
-        self.root_collection()._populate()
-
-    def notify(self, event, collection, name, item):
-        return self.root_collection().notify(event, collection, name, item)
+    def stream_name(self):
+        return os.path.join(self.parent.stream_name(), self.name)
 
     @synchronized
 
     @synchronized
-    def clone(self, new_parent):
-        c = Subcollection(new_parent)
-        self._cloneinto(c)
+    def clone(self, new_parent, new_name):
+        c = Subcollection(new_parent, new_name)
+        c._clonefrom(self)
         return c
 
         return c
 
-def import_manifest(manifest_text,
-                    into_collection=None,
-                    api_client=None,
-                    keep=None,
-                    num_retries=None,
-                    sync=SYNC_READONLY):
-    """Import a manifest into a `Collection`.
+    @must_be_writable
+    @synchronized
+    def _reparent(self, newparent, newname):
+        self._committed = False
+        self.flush()
+        self.parent.remove(self.name, recursive=True)
+        self.parent = newparent
+        self.name = newname
+        self.lock = self.parent.root_collection().lock
 
 
-    :manifest_text:
-      The manifest text to import from.
 
 
-    :into_collection:
-      The `Collection` that will be initialized (must be empty).
-      If None, create a new `Collection` object.
+class CollectionReader(Collection):
+    """A read-only collection object.
 
 
-    :api_client:
-      The API client object that will be used when creating a new `Collection` object.
+    Initialize from an api collection record locator, a portable data hash of a
+    manifest, or raw manifest text.  See `Collection` constructor for detailed
+    options.
 
 
-    :keep:
-      The keep client object that will be used when creating a new `Collection` object.
+    """
+    def __init__(self, manifest_locator_or_text, *args, **kwargs):
+        self._in_init = True
+        super(CollectionReader, self).__init__(manifest_locator_or_text, *args, **kwargs)
+        self._in_init = False
 
 
-    :num_retries:
-      the default number of api client and keep retries on error.
+        # Forego any locking since it should never change once initialized.
+        self.lock = NoopLock()
 
 
-    :sync:
-      Collection sync mode (only if into_collection is None)
-    """
-    if into_collection is not None:
-        if len(into_collection) > 0:
-            raise ArgumentError("Can only import manifest into an empty collection")
-        c = into_collection
-    else:
-        c = CollectionRoot(api_client=api_client, keep_client=keep, num_retries=num_retries, sync=sync)
-
-    save_sync = c.sync_mode()
-    c._sync = None
-
-    STREAM_NAME = 0
-    BLOCKS = 1
-    SEGMENTS = 2
-
-    stream_name = None
-    state = STREAM_NAME
-
-    for n in re.finditer(r'(\S+)(\s+|$)', manifest_text):
-        tok = n.group(1)
-        sep = n.group(2)
-
-        if state == STREAM_NAME:
-            # starting a new stream
-            stream_name = tok.replace('\\040', ' ')
-            blocks = []
-            segments = []
-            streamoffset = 0L
-            state = BLOCKS
-            continue
-
-        if state == BLOCKS:
-            s = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
-            if s:
-                blocksize = long(s.group(1))
-                blocks.append(Range(tok, streamoffset, blocksize))
-                streamoffset += blocksize
-            else:
-                state = SEGMENTS
-
-        if state == SEGMENTS:
-            s = re.search(r'^(\d+):(\d+):(\S+)', tok)
-            if s:
-                pos = long(s.group(1))
-                size = long(s.group(2))
-                name = s.group(3).replace('\\040', ' ')
-                f = c.find("%s/%s" % (stream_name, name), create=True)
-                f.add_segment(blocks, pos, size)
-            else:
-                # error!
-                raise errors.SyntaxError("Invalid manifest format")
+        # Backwards compatability with old CollectionReader
+        # all_streams() and all_files()
+        self._streams = None
 
 
-        if sep == "\n":
-            stream_name = None
-            state = STREAM_NAME
+    def writable(self):
+        return self._in_init
 
 
-    c.set_unmodified()
-    c._sync = save_sync
-    return c
+    def _populate_streams(orig_func):
+        @functools.wraps(orig_func)
+        def populate_streams_wrapper(self, *args, **kwargs):
+            # Defer populating self._streams until needed since it creates a copy of the manifest.
+            if self._streams is None:
+                if self._manifest_text:
+                    self._streams = [sline.split()
+                                     for sline in self._manifest_text.split("\n")
+                                     if sline]
+                else:
+                    self._streams = []
+            return orig_func(self, *args, **kwargs)
+        return populate_streams_wrapper
 
 
-def export_manifest(item, stream_name=".", portable_locators=False):
-    """
-    :item:
-      Create a manifest for `item` (must be a `Collection` or `ArvadosFile`).  If
-      `item` is a is a `Collection`, this will also export subcollections.
+    @_populate_streams
+    def normalize(self):
+        """Normalize the streams returned by `all_streams`.
 
 
-    :stream_name:
-      the name of the stream when exporting `item`.
+        This method is kept for backwards compatability and only affects the
+        behavior of `all_streams()` and `all_files()`
 
 
-    :portable_locators:
-      If True, strip any permission hints on block locators.
-      If False, use block locators as-is.
-    """
-    buf = ""
-    if isinstance(item, SynchronizedCollectionBase):
-        stream = {}
-        sorted_keys = sorted(item.keys())
-        for k in [s for s in sorted_keys if isinstance(item[s], ArvadosFile)]:
-            v = item[k]
-            st = []
-            for s in v.segments():
-                loc = s.locator
-                if loc.startswith("bufferblock"):
-                    loc = v.parent._my_block_manager()._bufferblocks[loc].locator()
-                if portable_locators:
-                    loc = KeepLocator(loc).stripped()
-                st.append(LocatorAndRange(loc, locator_block_size(loc),
-                                     s.segment_offset, s.range_size))
-            stream[k] = st
-        if stream:
-            buf += ' '.join(normalize_stream(stream_name, stream))
-            buf += "\n"
-        for k in [s for s in sorted_keys if isinstance(item[s], SynchronizedCollectionBase)]:
-            buf += export_manifest(item[k], stream_name=os.path.join(stream_name, k), portable_locators=portable_locators)
-    elif isinstance(item, ArvadosFile):
-        st = []
-        for s in item.segments:
-            loc = s.locator
-            if loc.startswith("bufferblock"):
-                loc = item._bufferblocks[loc].calculate_locator()
-            if portable_locators:
-                loc = KeepLocator(loc).stripped()
-            st.append(LocatorAndRange(loc, locator_block_size(loc),
-                                 s.segment_offset, s.range_size))
-        stream[stream_name] = st
-        buf += ' '.join(normalize_stream(stream_name, stream))
-        buf += "\n"
-    return buf
+        """
+
+        # Rearrange streams
+        streams = {}
+        for s in self.all_streams():
+            for f in s.all_files():
+                streamname, filename = split(s.name() + "/" + f.name())
+                if streamname not in streams:
+                    streams[streamname] = {}
+                if filename not in streams[streamname]:
+                    streams[streamname][filename] = []
+                for r in f.segments:
+                    streams[streamname][filename].extend(s.locators_and_ranges(r.locator, r.range_size))
+
+        self._streams = [normalize_stream(s, streams[s])
+                         for s in sorted(streams)]
+    @_populate_streams
+    def all_streams(self):
+        return [StreamReader(s, self._my_keep(), num_retries=self.num_retries)
+                for s in self._streams]
+
+    @_populate_streams
+    def all_files(self):
+        for s in self.all_streams():
+            for f in s.all_files():
+                yield f