5627: Python file-like objects use SEET_SET as the default whence.

[arvados.git] / sdk / python / arvados / collection.py
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py

index 6d5dd4ff15c7e3b7aba62acb0ba1f4278d16f1c7..3d48652dd53afe4eecc3bc35628646e427a8ac73 100644 (file)
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -3,14 +3,17 @@ import logging
  import os
  import re
  import errno
  import os
  import re
  import errno
+import hashlib
  import time
  import time
+import threading
  
  from collections import deque
  from stat import *
  
  
  from collections import deque
  from stat import *
  
-from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, SYNC_READONLY, SYNC_EXPLICIT, NoopLock
-from keep import *
-from .stream import StreamReader, normalize_stream
+from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
+from keep import KeepLocator, KeepClient
+from .stream import StreamReader
+from ._normalize_stream import normalize_stream
  from ._ranges import Range, LocatorAndRange
  from .safeapi import ThreadSafeApiCache
  import config
  from ._ranges import Range, LocatorAndRange
  from .safeapi import ThreadSafeApiCache
  import config
@@ -35,7 +38,8 @@ class CollectionBase(object):
          return self._keep_client
  
      def stripped_manifest(self):
          return self._keep_client
  
      def stripped_manifest(self):
-        """
+        """Get the manifest with locator hints stripped.
+
          Return the manifest for the current collection with all
          non-portable hints (i.e., permission signatures and other
          hints other than size hints) removed from the locators.
          Return the manifest for the current collection with all
          non-portable hints (i.e., permission signatures and other
          hints other than size hints) removed from the locators.
@@ -466,13 +470,14 @@ class ResumableCollectionWriter(CollectionWriter):
                  "resumable writer can't accept unsourced data")
          return super(ResumableCollectionWriter, self).write(data)
  
                  "resumable writer can't accept unsourced data")
          return super(ResumableCollectionWriter, self).write(data)
  
+
  ADD = "add"
  DEL = "del"
  MOD = "mod"
  FILE = "file"
  COLLECTION = "collection"
  
  ADD = "add"
  DEL = "del"
  MOD = "mod"
  FILE = "file"
  COLLECTION = "collection"
  
-class SynchronizedCollectionBase(CollectionBase):
+class RichCollectionBase(CollectionBase):
      """Base class for Collections and Subcollections.
  
      Implements the majority of functionality relating to accessing items in the
      """Base class for Collections and Subcollections.
  
      Implements the majority of functionality relating to accessing items in the
@@ -494,7 +499,7 @@ class SynchronizedCollectionBase(CollectionBase):
      def _my_block_manager(self):
          raise NotImplementedError()
  
      def _my_block_manager(self):
          raise NotImplementedError()
  
-    def sync_mode(self):
+    def writable(self):
          raise NotImplementedError()
  
      def root_collection(self):
          raise NotImplementedError()
  
      def root_collection(self):
@@ -517,20 +522,18 @@ class SynchronizedCollectionBase(CollectionBase):
          the path.
  
          :create_type:
          the path.
  
          :create_type:
-          One of `arvado.collection.FILE` or
-          `arvado.collection.COLLECTION`.  If the path is not found, and value
+          One of `arvados.collection.FILE` or
+          `arvados.collection.COLLECTION`.  If the path is not found, and value
            of create_type is FILE then create and return a new ArvadosFile for
            the last path component.  If COLLECTION, then create and return a new
            Collection for the last path component.
  
          """
  
            of create_type is FILE then create and return a new ArvadosFile for
            the last path component.  If COLLECTION, then create and return a new
            Collection for the last path component.
  
          """
  
-        pathcomponents = path.split("/")
-
-        if pathcomponents and pathcomponents[0]:
+        pathcomponents = path.split("/", 1)
+        if pathcomponents[0]:
              item = self._items.get(pathcomponents[0])
              if len(pathcomponents) == 1:
              item = self._items.get(pathcomponents[0])
              if len(pathcomponents) == 1:
-                # item must be a file
                  if item is None:
                      # create new file
                      if create_type == COLLECTION:
                  if item is None:
                      # create new file
                      if create_type == COLLECTION:
@@ -548,9 +551,8 @@ class SynchronizedCollectionBase(CollectionBase):
                      self._items[pathcomponents[0]] = item
                      self._modified = True
                      self.notify(ADD, self, pathcomponents[0], item)
                      self._items[pathcomponents[0]] = item
                      self._modified = True
                      self.notify(ADD, self, pathcomponents[0], item)
-                del pathcomponents[0]
-                if isinstance(item, SynchronizedCollectionBase):
-                    return item.find_or_create("/".join(pathcomponents), create_type)
+                if isinstance(item, RichCollectionBase):
+                    return item.find_or_create(pathcomponents[1], create_type)
                  else:
                      raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
          else:
                  else:
                      raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
          else:
@@ -564,21 +566,21 @@ class SynchronizedCollectionBase(CollectionBase):
          found.
  
          """
          found.
  
          """
-        pathcomponents = path.split("/")
+        if not path:
+            raise errors.ArgumentError("Parameter 'path' must not be empty.")
  
  
-        if pathcomponents and pathcomponents[0]:
-            item = self._items.get(pathcomponents[0])
-            if len(pathcomponents) == 1:
-                # item must be a file
-                return item
-            else:
-                del pathcomponents[0]
-                if isinstance(item, SynchronizedCollectionBase):
-                    return item.find("/".join(pathcomponents))
-                else:
-                    raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
+        pathcomponents = path.split("/", 1)
+        item = self._items.get(pathcomponents[0])
+        if len(pathcomponents) == 1:
+            return item
          else:
          else:
-            return self
+            if isinstance(item, RichCollectionBase):
+                if pathcomponents[1]:
+                    return item.find(pathcomponents[1])
+                else:
+                    return item
+            else:
+                raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
  
      def mkdirs(path):
          """Recursive subcollection create.
  
      def mkdirs(path):
          """Recursive subcollection create.
@@ -612,7 +614,7 @@ class SynchronizedCollectionBase(CollectionBase):
              raise errors.ArgumentError("Bad mode '%s'" % mode)
          create = (mode != "r")
  
              raise errors.ArgumentError("Bad mode '%s'" % mode)
          create = (mode != "r")
  
-        if create and self.sync_mode() == SYNC_READONLY:
+        if create and not self.writable():
              raise IOError((errno.EROFS, "Collection is read only"))
  
          if create:
              raise IOError((errno.EROFS, "Collection is read only"))
  
          if create:
@@ -637,8 +639,7 @@ class SynchronizedCollectionBase(CollectionBase):
  
      @synchronized
      def modified(self):
  
      @synchronized
      def modified(self):
-        """Test if the collection (or any subcollection or file) has been modified
-        since it was created."""
+        """Test if the collection (or any subcollection or file) has been modified."""
          if self._modified:
              return True
          for k,v in self._items.items():
          if self._modified:
              return True
          for k,v in self._items.items():
@@ -658,22 +659,18 @@ class SynchronizedCollectionBase(CollectionBase):
          """Iterate over names of files and collections contained in this collection."""
          return iter(self._items.keys())
  
          """Iterate over names of files and collections contained in this collection."""
          return iter(self._items.keys())
  
-    @synchronized
-    def iterkeys(self):
-        """Iterate over names of files and collections directly contained in this collection."""
-        return self._items.keys()
-
      @synchronized
      def __getitem__(self, k):
      @synchronized
      def __getitem__(self, k):
-        """Get a file or collection that is directly contained by this collection.  If
-        you want to search a path, use `find()` instead.
+        """Get a file or collection that is directly contained by this collection.
+
+        If you want to search a path, use `find()` instead.
+
          """
          return self._items[k]
  
      @synchronized
      def __contains__(self, k):
          """
          return self._items[k]
  
      @synchronized
      def __contains__(self, k):
-        """If there is a file or collection a directly contained by this collection
-        with name `k`."""
+        """Test if there is a file or collection a directly contained by this collection."""
          return k in self._items
  
      @synchronized
          return k in self._items
  
      @synchronized
@@ -706,7 +703,7 @@ class SynchronizedCollectionBase(CollectionBase):
  
      def exists(self, path):
          """Test if there is a file or collection at `path`."""
  
      def exists(self, path):
          """Test if there is a file or collection at `path`."""
-        return self.find(path) != None
+        return self.find(path) is not None
  
      @must_be_writable
      @synchronized
  
      @must_be_writable
      @synchronized
@@ -716,39 +713,72 @@ class SynchronizedCollectionBase(CollectionBase):
          :recursive:
            Specify whether to remove non-empty subcollections (True), or raise an error (False).
          """
          :recursive:
            Specify whether to remove non-empty subcollections (True), or raise an error (False).
          """
-        pathcomponents = path.split("/")
  
  
-        if len(pathcomponents) > 0:
-            item = self._items.get(pathcomponents[0])
-            if item is None:
-                raise IOError((errno.ENOENT, "File not found"))
-            if len(pathcomponents) == 1:
-                if isinstance(self._items[pathcomponents[0]], SynchronizedCollectionBase) and len(self._items[pathcomponents[0]]) > 0 and not recursive:
-                    raise IOError((errno.ENOTEMPTY, "Subcollection not empty"))
-                deleteditem = self._items[pathcomponents[0]]
-                del self._items[pathcomponents[0]]
-                self._modified = True
-                self.notify(DEL, self, pathcomponents[0], deleteditem)
-            else:
-                del pathcomponents[0]
-                item.remove("/".join(pathcomponents))
-        else:
+        if not path:
+            raise errors.ArgumentError("Parameter 'path' must not be empty.")
+
+        pathcomponents = path.split("/", 1)
+        item = self._items.get(pathcomponents[0])
+        if item is None:
              raise IOError((errno.ENOENT, "File not found"))
              raise IOError((errno.ENOENT, "File not found"))
+        if len(pathcomponents) == 1:
+            if isinstance(self._items[pathcomponents[0]], RichCollectionBase) and len(self._items[pathcomponents[0]]) > 0 and not recursive:
+                raise IOError((errno.ENOTEMPTY, "Subcollection not empty"))
+            deleteditem = self._items[pathcomponents[0]]
+            del self._items[pathcomponents[0]]
+            self._modified = True
+            self.notify(DEL, self, pathcomponents[0], deleteditem)
+        else:
+            item.remove(pathcomponents[1])
  
  
-    def _cloneinto(self, target):
-        for k,v in self._items.items():
-            target._items[k] = v.clone(target)
+    def _clonefrom(self, source):
+        for k,v in source.items():
+            self._items[k] = v.clone(self)
  
      def clone(self):
          raise NotImplementedError()
  
  
      def clone(self):
          raise NotImplementedError()
  
+    @must_be_writable
+    @synchronized
+    def add(self, source_obj, target_name, overwrite=False):
+        """Copy a file or subcollection to this collection.
+
+        :source_obj:
+          An ArvadosFile, or Subcollection object
+
+        :target_name:
+          Destination item name.  If the target name already exists and is a
+          file, this will raise an error unless you specify `overwrite=True`.
+
+        :overwrite:
+          Whether to overwrite target file if it already exists.
+
+        """
+
+        if target_name in self and not overwrite:
+            raise IOError((errno.EEXIST, "File already exists"))
+
+        modified_from = None
+        if target_name in self:
+            modified_from = self[target_name]
+
+        # Actually make the copy.
+        dup = source_obj.clone(self)
+        self._items[target_name] = dup
+        self._modified = True
+
+        if modified_from:
+            self.notify(MOD, self, target_name, (modified_from, dup))
+        else:
+            self.notify(ADD, self, target_name, dup)
+
      @must_be_writable
      @synchronized
      def copy(self, source, target_path, source_collection=None, overwrite=False):
          """Copy a file or subcollection to a new path in this collection.
  
          :source:
      @must_be_writable
      @synchronized
      def copy(self, source, target_path, source_collection=None, overwrite=False):
          """Copy a file or subcollection to a new path in this collection.
  
          :source:
-          An ArvadosFile, Subcollection, or string with a path to source file or subcollection
+          A string with a path to source file or subcollection, or an actual ArvadosFile or Subcollection object.
  
          :target_path:
            Destination file or path.  If the target path already exists and is a
  
          :target_path:
            Destination file or path.  If the target path already exists and is a
@@ -786,27 +816,11 @@ class SynchronizedCollectionBase(CollectionBase):
  
          target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
  
  
          target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
  
-        with target_dir.lock:
-            if target_name in target_dir:
-                if isinstance(target_dir[target_name], SynchronizedCollectionBase) and sourcecomponents:
-                    target_dir = target_dir[target_name]
-                    target_name = sourcecomponents[-1]
-                elif not overwrite:
-                    raise IOError((errno.EEXIST, "File already exists"))
+        if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
+            target_dir = target_dir[target_name]
+            target_name = sourcecomponents[-1]
  
  
-            modified_from = None
-            if target_name in target_dir:
-                modified_from = target_dir[target_name]
-
-            # Actually make the copy.
-            dup = source_obj.clone(target_dir)
-            target_dir._items[target_name] = dup
-            target_dir._modified = True
-
-        if modified_from:
-            self.notify(MOD, target_dir, target_name, (modified_from, dup))
-        else:
-            self.notify(ADD, target_dir, target_name, dup)
+        target_dir.add(source_obj, target_name, overwrite)
  
      @synchronized
      def manifest_text(self, stream_name=".", strip=False, normalize=False):
  
      @synchronized
      def manifest_text(self, stream_name=".", strip=False, normalize=False):
@@ -828,13 +842,12 @@ class SynchronizedCollectionBase(CollectionBase):
          """
  
          if self.modified() or self._manifest_text is None or normalize:
          """
  
          if self.modified() or self._manifest_text is None or normalize:
-            item  = self
              stream = {}
              stream = {}
-            buf = ""
-            sorted_keys = sorted(item.keys())
-            for filename in [s for s in sorted_keys if isinstance(item[s], ArvadosFile)]:
+            buf = []
+            sorted_keys = sorted(self.keys())
+            for filename in [s for s in sorted_keys if isinstance(self[s], ArvadosFile)]:
                  # Create a stream per file `k`
                  # Create a stream per file `k`
-                arvfile = item[filename]
+                arvfile = self[filename]
                  filestream = []
                  for segment in arvfile.segments():
                      loc = segment.locator
                  filestream = []
                  for segment in arvfile.segments():
                      loc = segment.locator
@@ -846,11 +859,10 @@ class SynchronizedCollectionBase(CollectionBase):
                                           segment.segment_offset, segment.range_size))
                  stream[filename] = filestream
              if stream:
                                           segment.segment_offset, segment.range_size))
                  stream[filename] = filestream
              if stream:
-                buf += ' '.join(normalize_stream(stream_name, stream))
-                buf += "\n"
-            for dirname in [s for s in sorted_keys if isinstance(item[s], SynchronizedCollectionBase)]:
-                buf += item[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip)
-            return buf
+                buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
+            for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
+                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip))
+            return "".join(buf)
          else:
              if strip:
                  return self.stripped_manifest()
          else:
              if strip:
                  return self.stripped_manifest()
@@ -859,9 +871,10 @@ class SynchronizedCollectionBase(CollectionBase):
  
      @synchronized
      def diff(self, end_collection, prefix=".", holding_collection=None):
  
      @synchronized
      def diff(self, end_collection, prefix=".", holding_collection=None):
-        """
-        Generate list of add/modify/delete actions which, when given to `apply`, will
-        change `self` to match `end_collection`
+        """Generate list of add/modify/delete actions.
+
+        When given to `apply`, will change `self` to match `end_collection`
+
          """
          changes = []
          if holding_collection is None:
          """
          changes = []
          if holding_collection is None:
@@ -935,7 +948,7 @@ class SynchronizedCollectionBase(CollectionBase):
      def __eq__(self, other):
          if other is self:
              return True
      def __eq__(self, other):
          if other is self:
              return True
-        if not isinstance(other, SynchronizedCollectionBase):
+        if not isinstance(other, RichCollectionBase):
              return False
          if len(self._items) != len(other):
              return False
              return False
          if len(self._items) != len(other):
              return False
@@ -950,11 +963,15 @@ class SynchronizedCollectionBase(CollectionBase):
          return not self.__eq__(other)
  
  
          return not self.__eq__(other)
  
  
-class Collection(SynchronizedCollectionBase):
-    """Represents the root of an Arvados Collection, which may be associated with
-    an API server Collection record.
+class Collection(RichCollectionBase):
+    """Represents the root of an Arvados Collection.
  
  
-    Brief summary of useful methods:
+    This class is threadsafe.  The root collection object, all subcollections
+    and files are protected by a single lock (i.e. each access locks the entire
+    collection).
+
+    Brief summary of
+    useful methods:
  
      :To read an existing file:
        `c.open("myfile", "r")`
  
      :To read an existing file:
        `c.open("myfile", "r")`
@@ -980,9 +997,8 @@ class Collection(SynchronizedCollectionBase):
      :To merge remote changes into this object:
        `c.update()`
  
      :To merge remote changes into this object:
        `c.update()`
  
-    This class is threadsafe.  The root collection object, all subcollections
-    and files are protected by a single lock (i.e. each access locks the entire
-    collection).
+    Must be associated with an API server Collection record (during
+    initialization, or using `save_new`) to use `save` or `update`
  
      """
  
  
      """
  
@@ -1029,7 +1045,6 @@ class Collection(SynchronizedCollectionBase):
          self._manifest_text = None
          self._api_response = None
  
          self._manifest_text = None
          self._api_response = None
  
-        self._sync = SYNC_EXPLICIT
          self.lock = threading.RLock()
          self.callbacks = []
          self.events = None
          self.lock = threading.RLock()
          self.callbacks = []
          self.events = None
@@ -1045,8 +1060,10 @@ class Collection(SynchronizedCollectionBase):
                  raise errors.ArgumentError(
                      "Argument to CollectionReader must be a manifest or a collection UUID")
  
                  raise errors.ArgumentError(
                      "Argument to CollectionReader must be a manifest or a collection UUID")
  
-            self._populate()
-
+            try:
+                self._populate()
+            except (IOError, errors.SyntaxError) as e:
+                raise errors.ArgumentError("Error processing manifest text: %s", e)
  
      def root_collection(self):
          return self
  
      def root_collection(self):
          return self
@@ -1054,16 +1071,14 @@ class Collection(SynchronizedCollectionBase):
      def stream_name(self):
          return "."
  
      def stream_name(self):
          return "."
  
-    def sync_mode(self):
-        return self._sync
+    def writable(self):
+        return True
  
      @synchronized
      @retry_method
      def update(self, other=None, num_retries=None):
  
      @synchronized
      @retry_method
      def update(self, other=None, num_retries=None):
-        """Fetch the latest collection record on the API server and merge it with the
-        current collection contents.
+        """Merge the latest collection on the API server with the current collection."""
  
  
-        """
          if other is None:
              if self._manifest_locator is None:
                  raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
          if other is None:
              if self._manifest_locator is None:
                  raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
@@ -1144,7 +1159,7 @@ class Collection(SynchronizedCollectionBase):
              error_via_keep = self._populate_from_keep()
          if self._manifest_text is None:
              # Nothing worked!
              error_via_keep = self._populate_from_keep()
          if self._manifest_text is None:
              # Nothing worked!
-            raise arvados.errors.NotFoundError(
+            raise errors.NotFoundError(
                  ("Failed to retrieve collection '{}' " +
                   "from either API server ({}) or Keep ({})."
                   ).format(
                  ("Failed to retrieve collection '{}' " +
                   "from either API server ({}) or Keep ({})."
                   ).format(
@@ -1164,11 +1179,26 @@ class Collection(SynchronizedCollectionBase):
  
      def __exit__(self, exc_type, exc_value, traceback):
          """Support scoped auto-commit in a with: block."""
  
      def __exit__(self, exc_type, exc_value, traceback):
          """Support scoped auto-commit in a with: block."""
-        if self._sync != SYNC_READONLY and self._has_collection_uuid():
-            self.save()
+        if exc_type is not None:
+            if self.writable() and self._has_collection_uuid():
+                self.save()
          if self._block_manager is not None:
              self._block_manager.stop_threads()
  
          if self._block_manager is not None:
              self._block_manager.stop_threads()
  
+    @synchronized
+    def manifest_locator(self):
+        """Get the manifest locator, if any.
+
+        The manifest locator will be set when the collection is loaded from an
+        API server record or the portable data hash of a manifest.
+
+        The manifest locator will be None if the collection is newly created or
+        was created directly from manifest text.  The method `save_new()` will
+        assign a manifest locator.
+
+        """
+        return self._manifest_locator
+
      @synchronized
      def clone(self, new_parent=None, readonly=False, new_config=None):
          if new_config is None:
      @synchronized
      def clone(self, new_parent=None, readonly=False, new_config=None):
          if new_config is None:
@@ -1178,9 +1208,7 @@ class Collection(SynchronizedCollectionBase):
          else:
              newcollection = Collection(parent=new_parent, apiconfig=new_config)
  
          else:
              newcollection = Collection(parent=new_parent, apiconfig=new_config)
  
-        newcollection._sync = None
-        self._cloneinto(newcollection)
-        newcollection._sync = SYNC_READONLY if readonly else SYNC_EXPLICIT
+        newcollection._clonefrom(self)
          return newcollection
  
      @synchronized
          return newcollection
  
      @synchronized
@@ -1194,21 +1222,21 @@ class Collection(SynchronizedCollectionBase):
          return self._api_response
  
      def find_or_create(self, path, create_type):
          return self._api_response
  
      def find_or_create(self, path, create_type):
-        """See `SynchronizedCollectionBase.find_or_create`"""
+        """See `RichCollectionBase.find_or_create`"""
          if path == ".":
              return self
          else:
              return super(Collection, self).find_or_create(path[2:] if path.startswith("./") else path, create_type)
  
      def find(self, path):
          if path == ".":
              return self
          else:
              return super(Collection, self).find_or_create(path[2:] if path.startswith("./") else path, create_type)
  
      def find(self, path):
-        """See `SynchronizedCollectionBase.find`"""
+        """See `RichCollectionBase.find`"""
          if path == ".":
              return self
          else:
              return super(Collection, self).find(path[2:] if path.startswith("./") else path)
  
      def remove(self, path, recursive=False):
          if path == ".":
              return self
          else:
              return super(Collection, self).find(path[2:] if path.startswith("./") else path)
  
      def remove(self, path, recursive=False):
-        """See `SynchronizedCollectionBase.remove`"""
+        """See `RichCollectionBase.remove`"""
          if path == ".":
              raise errors.ArgumentError("Cannot remove '.'")
          else:
          if path == ".":
              raise errors.ArgumentError("Cannot remove '.'")
          else:
@@ -1218,18 +1246,23 @@ class Collection(SynchronizedCollectionBase):
      @synchronized
      @retry_method
      def save(self, merge=True, num_retries=None):
      @synchronized
      @retry_method
      def save(self, merge=True, num_retries=None):
-        """Commit pending buffer blocks to Keep, merge with remote record (if
-        update=True), write the manifest to Keep, and update the collection
-        record.
+        """Save collection to an existing collection record.
+
+        Commit pending buffer blocks to Keep, merge with remote record (if
+        merge=True, the default), write the manifest to Keep, and update the
+        collection record.
  
          Will raise AssertionError if not associated with a collection record on
          the API server.  If you want to save a manifest to Keep only, see
          `save_new()`.
  
  
          Will raise AssertionError if not associated with a collection record on
          the API server.  If you want to save a manifest to Keep only, see
          `save_new()`.
  
-        :update:
+        :merge:
            Update and merge remote changes before saving.  Otherwise, any
            remote changes will be ignored and overwritten.
  
            Update and merge remote changes before saving.  Otherwise, any
            remote changes will be ignored and overwritten.
  
+        :num_retries:
+          Retry count on API calls (if None,  use the collection default)
+
          """
          if self.modified():
              if not self._has_collection_uuid():
          """
          if self.modified():
              if not self._has_collection_uuid():
@@ -1253,17 +1286,18 @@ class Collection(SynchronizedCollectionBase):
      @synchronized
      @retry_method
      def save_new(self, name=None, create_collection_record=True, owner_uuid=None, ensure_unique_name=False, num_retries=None):
      @synchronized
      @retry_method
      def save_new(self, name=None, create_collection_record=True, owner_uuid=None, ensure_unique_name=False, num_retries=None):
-        """Commit pending buffer blocks to Keep, write the manifest to Keep, and create
-        a new collection record (if create_collection_record True).
+        """Save collection to a new collection record.
  
  
+        Commit pending buffer blocks to Keep, write the manifest to Keep, and
+        create a new collection record (if create_collection_record True).
          After creating a new collection record, this Collection object will be
          associated with the new record used by `save()`.
  
          :name:
            The collection name.
  
          After creating a new collection record, this Collection object will be
          associated with the new record used by `save()`.
  
          :name:
            The collection name.
  
-        :keep_only:
-          Only save the manifest to keep, do not create a collection record.
+        :create_collection_record:
+          If True, create a collection record.  If False, only save the manifest to keep.
  
          :owner_uuid:
            the user, or project uuid that will own this collection.
  
          :owner_uuid:
            the user, or project uuid that will own this collection.
@@ -1274,6 +1308,9 @@ class Collection(SynchronizedCollectionBase):
            if it conflicts with a collection with the same name and owner.  If
            False, a name conflict will result in an error.
  
            if it conflicts with a collection with the same name and owner.  If
            False, a name conflict will result in an error.
  
+        :num_retries:
+          Retry count on API calls (if None,  use the collection default)
+
          """
          self._my_block_manager().commit_all()
          self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
          """
          self._my_block_manager().commit_all()
          self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
@@ -1320,9 +1357,6 @@ class Collection(SynchronizedCollectionBase):
          if len(self) > 0:
              raise ArgumentError("Can only import manifest into an empty collection")
  
          if len(self) > 0:
              raise ArgumentError("Can only import manifest into an empty collection")
  
-        save_sync = self.sync_mode()
-        self._sync = None
-
          STREAM_NAME = 0
          BLOCKS = 1
          SEGMENTS = 2
          STREAM_NAME = 0
          BLOCKS = 1
          SEGMENTS = 2
@@ -1330,9 +1364,9 @@ class Collection(SynchronizedCollectionBase):
          stream_name = None
          state = STREAM_NAME
  
          stream_name = None
          state = STREAM_NAME
  
-        for n in re.finditer(r'(\S+)(\s+|$)', manifest_text):
-            tok = n.group(1)
-            sep = n.group(2)
+        for token_and_separator in re.finditer(r'(\S+)(\s+|$)', manifest_text):
+            tok = token_and_separator.group(1)
+            sep = token_and_separator.group(2)
  
              if state == STREAM_NAME:
                  # starting a new stream
  
              if state == STREAM_NAME:
                  # starting a new stream
@@ -1344,24 +1378,24 @@ class Collection(SynchronizedCollectionBase):
                  continue
  
              if state == BLOCKS:
                  continue
  
              if state == BLOCKS:
-                s = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
-                if s:
-                    blocksize = long(s.group(1))
+                block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
+                if block_locator:
+                    blocksize = long(block_locator.group(1))
                      blocks.append(Range(tok, streamoffset, blocksize))
                      streamoffset += blocksize
                  else:
                      state = SEGMENTS
  
              if state == SEGMENTS:
                      blocks.append(Range(tok, streamoffset, blocksize))
                      streamoffset += blocksize
                  else:
                      state = SEGMENTS
  
              if state == SEGMENTS:
-                s = re.search(r'^(\d+):(\d+):(\S+)', tok)
-                if s:
-                    pos = long(s.group(1))
-                    size = long(s.group(2))
-                    name = s.group(3).replace('\\040', ' ')
+                file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
+                if file_segment:
+                    pos = long(file_segment.group(1))
+                    size = long(file_segment.group(2))
+                    name = file_segment.group(3).replace('\\040', ' ')
                      filepath = os.path.join(stream_name, name)
                      filepath = os.path.join(stream_name, name)
-                    f = self.find_or_create(filepath, FILE)
-                    if isinstance(f, ArvadosFile):
-                        f.add_segment(blocks, pos, size)
+                    afile = self.find_or_create(filepath, FILE)
+                    if isinstance(afile, ArvadosFile):
+                        afile.add_segment(blocks, pos, size)
                      else:
                          raise errors.SyntaxError("File %s conflicts with stream of the same name.", filepath)
                  else:
                      else:
                          raise errors.SyntaxError("File %s conflicts with stream of the same name.", filepath)
                  else:
@@ -1373,10 +1407,9 @@ class Collection(SynchronizedCollectionBase):
                  state = STREAM_NAME
  
          self.set_unmodified()
                  state = STREAM_NAME
  
          self.set_unmodified()
-        self._sync = save_sync
  
  
  
  
-class Subcollection(SynchronizedCollectionBase):
+class Subcollection(RichCollectionBase):
      """This is a subdirectory within a collection that doesn't have its own API
      server record.
  
      """This is a subdirectory within a collection that doesn't have its own API
      server record.
  
@@ -1392,8 +1425,8 @@ class Subcollection(SynchronizedCollectionBase):
      def root_collection(self):
          return self.parent.root_collection()
  
      def root_collection(self):
          return self.parent.root_collection()
  
-    def sync_mode(self):
-        return self.root_collection().sync_mode()
+    def writable(self):
+        return self.root_collection().writable()
  
      def _my_api(self):
          return self.root_collection()._my_api()
  
      def _my_api(self):
          return self.root_collection()._my_api()
@@ -1416,31 +1449,33 @@ class Subcollection(SynchronizedCollectionBase):
      @synchronized
      def clone(self, new_parent):
          c = Subcollection(new_parent)
      @synchronized
      def clone(self, new_parent):
          c = Subcollection(new_parent)
-        self._cloneinto(c)
+        c._clonefrom(self)
          return c
  
  
  class CollectionReader(Collection):
          return c
  
  
  class CollectionReader(Collection):
-    """A read-only collection object from an api collection record locator,
-    a portable data hash of a manifest, or raw manifest text.
+    """A read-only collection object.
  
  
-    See `Collection` constructor for detailed options.
+    Initialize from an api collection record locator, a portable data hash of a
+    manifest, or raw manifest text.  See `Collection` constructor for detailed
+    options.
  
      """
  
      """
-    def __init__(self, *args, **kwargs):
-        if not args and not kwargs.get("manifest_locator_or_text"):
-            raise errors.ArgumentError("Must provide manifest locator or text to initialize ReadOnlyCollection")
-
-        super(CollectionReader, self).__init__(*args, **kwargs)
+    def __init__(self, manifest_locator_or_text, *args, **kwargs):
+        self._in_init = True
+        super(CollectionReader, self).__init__(manifest_locator_or_text, *args, **kwargs)
+        self._in_init = False
  
          # Forego any locking since it should never change once initialized.
          self.lock = NoopLock()
  
          # Forego any locking since it should never change once initialized.
          self.lock = NoopLock()
-        self._sync = SYNC_READONLY
  
          # Backwards compatability with old CollectionReader
          # all_streams() and all_files()
          self._streams = None
  
  
          # Backwards compatability with old CollectionReader
          # all_streams() and all_files()
          self._streams = None
  
+    def writable(self):
+        return self._in_init
+
      def _populate_streams(orig_func):
          @functools.wraps(orig_func)
          def populate_streams_wrapper(self, *args, **kwargs):
      def _populate_streams(orig_func):
          @functools.wraps(orig_func)
          def populate_streams_wrapper(self, *args, **kwargs):