Merge branch 'master' into 14075-uploadfiles
[arvados.git] / sdk / python / arvados / collection.py
index 098a11e1bb12141c9206edd492d1aa46aa4d0991..e38a6bd475c7b8a4aee7787a537a40656fd93b36 100644 (file)
@@ -1,9 +1,20 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from future.utils import listitems, listvalues, viewkeys
+from builtins import str
+from past.builtins import basestring
+from builtins import object
 import functools
 import logging
 import os
 import re
 import errno
 import hashlib
 import functools
 import logging
 import os
 import re
 import errno
 import hashlib
+import datetime
+import ciso8601
 import time
 import threading
 
 import time
 import threading
 
@@ -11,20 +22,22 @@ from collections import deque
 from stat import *
 
 from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
 from stat import *
 
 from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
-from keep import KeepLocator, KeepClient
+from .keep import KeepLocator, KeepClient
 from .stream import StreamReader
 from ._normalize_stream import normalize_stream
 from ._ranges import Range, LocatorAndRange
 from .safeapi import ThreadSafeApiCache
 from .stream import StreamReader
 from ._normalize_stream import normalize_stream
 from ._ranges import Range, LocatorAndRange
 from .safeapi import ThreadSafeApiCache
-import config
-import errors
-import util
-import events
+import arvados.config as config
+import arvados.errors as errors
+import arvados.util
+import arvados.events as events
 from arvados.retry import retry_method
 
 _logger = logging.getLogger('arvados.collection')
 
 class CollectionBase(object):
 from arvados.retry import retry_method
 
 _logger = logging.getLogger('arvados.collection')
 
 class CollectionBase(object):
+    """Abstract base class for Collection classes."""
+
     def __enter__(self):
         return self
 
     def __enter__(self):
         return self
 
@@ -51,7 +64,7 @@ class CollectionBase(object):
             if fields:
                 clean_fields = fields[:1] + [
                     (re.sub(r'\+[^\d][^\+]*', '', x)
             if fields:
                 clean_fields = fields[:1] + [
                     (re.sub(r'\+[^\d][^\+]*', '', x)
-                     if re.match(util.keep_locator_pattern, x)
+                     if re.match(arvados.util.keep_locator_pattern, x)
                      else x)
                     for x in fields[1:]]
                 clean += [' '.join(clean_fields), "\n"]
                      else x)
                     for x in fields[1:]]
                 clean += [' '.join(clean_fields), "\n"]
@@ -82,6 +95,8 @@ class _WriterFile(_FileLikeObjectBase):
 
 
 class CollectionWriter(CollectionBase):
 
 
 class CollectionWriter(CollectionBase):
+    """Deprecated, use Collection instead."""
+
     def __init__(self, api_client=None, num_retries=0, replication=None):
         """Instantiate a CollectionWriter.
 
     def __init__(self, api_client=None, num_retries=0, replication=None):
         """Instantiate a CollectionWriter.
 
@@ -180,7 +195,7 @@ class CollectionWriter(CollectionBase):
 
     def _work_trees(self):
         path, stream_name, max_manifest_depth = self._queued_trees[0]
 
     def _work_trees(self):
         path, stream_name, max_manifest_depth = self._queued_trees[0]
-        d = util.listdir_recursive(
+        d = arvados.util.listdir_recursive(
             path, max_depth = (None if max_manifest_depth == 0 else 0))
         if d:
             self._queue_dirents(stream_name, d)
             path, max_depth = (None if max_manifest_depth == 0 else 0))
         if d:
             self._queue_dirents(stream_name, d)
@@ -216,7 +231,11 @@ class CollectionWriter(CollectionBase):
         self.do_queued_work()
 
     def write(self, newdata):
         self.do_queued_work()
 
     def write(self, newdata):
-        if hasattr(newdata, '__iter__'):
+        if isinstance(newdata, bytes):
+            pass
+        elif isinstance(newdata, str):
+            newdata = newdata.encode()
+        elif hasattr(newdata, '__iter__'):
             for s in newdata:
                 self.write(s)
             return
             for s in newdata:
                 self.write(s)
             return
@@ -256,7 +275,7 @@ class CollectionWriter(CollectionBase):
         return self._last_open
 
     def flush_data(self):
         return self._last_open
 
     def flush_data(self):
-        data_buffer = ''.join(self._data_buffer)
+        data_buffer = b''.join(self._data_buffer)
         if data_buffer:
             self._current_stream_locators.append(
                 self._my_keep().put(
         if data_buffer:
             self._current_stream_locators.append(
                 self._my_keep().put(
@@ -346,11 +365,12 @@ class CollectionWriter(CollectionBase):
         sending manifest_text() to the API server's "create
         collection" endpoint.
         """
         sending manifest_text() to the API server's "create
         collection" endpoint.
         """
-        return self._my_keep().put(self.manifest_text(), copies=self.replication)
+        return self._my_keep().put(self.manifest_text().encode(),
+                                   copies=self.replication)
 
     def portable_data_hash(self):
 
     def portable_data_hash(self):
-        stripped = self.stripped_manifest()
-        return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
+        stripped = self.stripped_manifest().encode()
+        return '{}+{}'.format(hashlib.md5(stripped).hexdigest(), len(stripped))
 
     def manifest_text(self):
         self.finish_current_stream()
 
     def manifest_text(self):
         self.finish_current_stream()
@@ -372,8 +392,18 @@ class CollectionWriter(CollectionBase):
             ret += locators
         return ret
 
             ret += locators
         return ret
 
+    def save_new(self, name=None):
+        return self._api_client.collections().create(
+            ensure_unique_name=True,
+            body={
+                'name': name,
+                'manifest_text': self.manifest_text(),
+            }).execute(num_retries=self.num_retries)
+
 
 class ResumableCollectionWriter(CollectionWriter):
 
 class ResumableCollectionWriter(CollectionWriter):
+    """Deprecated, use Collection instead."""
+
     STATE_PROPS = ['_current_stream_files', '_current_stream_length',
                    '_current_stream_locators', '_current_stream_name',
                    '_current_file_name', '_current_file_pos', '_close_file',
     STATE_PROPS = ['_current_stream_files', '_current_stream_length',
                    '_current_stream_locators', '_current_stream_name',
                    '_current_file_name', '_current_file_pos', '_close_file',
@@ -418,7 +448,7 @@ class ResumableCollectionWriter(CollectionWriter):
         return writer
 
     def check_dependencies(self):
         return writer
 
     def check_dependencies(self):
-        for path, orig_stat in self._dependencies.items():
+        for path, orig_stat in listitems(self._dependencies):
             if not S_ISREG(orig_stat[ST_MODE]):
                 raise errors.StaleWriterStateError("{} not file".format(path))
             try:
             if not S_ISREG(orig_stat[ST_MODE]):
                 raise errors.StaleWriterStateError("{} not file".format(path))
             try:
@@ -612,7 +642,12 @@ class RichCollectionBase(CollectionBase):
         :path:
           path to a file in the collection
         :mode:
         :path:
           path to a file in the collection
         :mode:
-          one of "r", "r+", "w", "w+", "a", "a+"
+          a string consisting of "r", "w", or "a", optionally followed
+          by "b" or "t", optionally followed by "+".
+          :"b":
+            binary mode: write() accepts bytes, read() returns bytes.
+          :"t":
+            text mode (default): write() accepts strings, read() returns strings.
           :"r":
             opens for reading
           :"r+":
           :"r":
             opens for reading
           :"r+":
@@ -624,33 +659,28 @@ class RichCollectionBase(CollectionBase):
             the end of the file.  Writing does not affect the file pointer for
             reading.
         """
             the end of the file.  Writing does not affect the file pointer for
             reading.
         """
-        mode = mode.replace("b", "")
-        if len(mode) == 0 or mode[0] not in ("r", "w", "a"):
-            raise errors.ArgumentError("Bad mode '%s'" % mode)
-        create = (mode != "r")
 
 
-        if create and not self.writable():
-            raise IOError(errno.EROFS, "Collection is read only")
+        if not re.search(r'^[rwa][bt]?\+?$', mode):
+            raise errors.ArgumentError("Invalid mode {!r}".format(mode))
 
 
-        if create:
-            arvfile = self.find_or_create(path, FILE)
-        else:
+        if mode[0] == 'r' and '+' not in mode:
+            fclass = ArvadosFileReader
             arvfile = self.find(path)
             arvfile = self.find(path)
+        elif not self.writable():
+            raise IOError(errno.EROFS, "Collection is read only")
+        else:
+            fclass = ArvadosFileWriter
+            arvfile = self.find_or_create(path, FILE)
 
         if arvfile is None:
             raise IOError(errno.ENOENT, "File not found", path)
         if not isinstance(arvfile, ArvadosFile):
             raise IOError(errno.EISDIR, "Is a directory", path)
 
 
         if arvfile is None:
             raise IOError(errno.ENOENT, "File not found", path)
         if not isinstance(arvfile, ArvadosFile):
             raise IOError(errno.EISDIR, "Is a directory", path)
 
-        if mode[0] == "w":
+        if mode[0] == 'w':
             arvfile.truncate(0)
 
             arvfile.truncate(0)
 
-        name = os.path.basename(path)
-
-        if mode == "r":
-            return ArvadosFileReader(arvfile, num_retries=self.num_retries)
-        else:
-            return ArvadosFileWriter(arvfile, mode, num_retries=self.num_retries)
+        return fclass(arvfile, mode=mode, num_retries=self.num_retries)
 
     def modified(self):
         """Determine if the collection has been modified since last commited."""
 
     def modified(self):
         """Determine if the collection has been modified since last commited."""
@@ -672,7 +702,7 @@ class RichCollectionBase(CollectionBase):
         if value == self._committed:
             return
         if value:
         if value == self._committed:
             return
         if value:
-            for k,v in self._items.items():
+            for k,v in listitems(self._items):
                 v.set_committed(True)
             self._committed = True
         else:
                 v.set_committed(True)
             self._committed = True
         else:
@@ -683,7 +713,7 @@ class RichCollectionBase(CollectionBase):
     @synchronized
     def __iter__(self):
         """Iterate over names of files and collections contained in this collection."""
     @synchronized
     def __iter__(self):
         """Iterate over names of files and collections contained in this collection."""
-        return iter(self._items.keys())
+        return iter(viewkeys(self._items))
 
     @synchronized
     def __getitem__(self, k):
 
     @synchronized
     def __getitem__(self, k):
@@ -720,12 +750,12 @@ class RichCollectionBase(CollectionBase):
     @synchronized
     def values(self):
         """Get a list of files and collection objects directly contained in this collection."""
     @synchronized
     def values(self):
         """Get a list of files and collection objects directly contained in this collection."""
-        return self._items.values()
+        return listvalues(self._items)
 
     @synchronized
     def items(self):
         """Get a list of (name, object) tuples directly contained in this collection."""
 
     @synchronized
     def items(self):
         """Get a list of (name, object) tuples directly contained in this collection."""
-        return self._items.items()
+        return listitems(self._items)
 
     def exists(self, path):
         """Test if there is a file or collection at `path`."""
 
     def exists(self, path):
         """Test if there is a file or collection at `path`."""
@@ -758,7 +788,7 @@ class RichCollectionBase(CollectionBase):
             item.remove(pathcomponents[1])
 
     def _clonefrom(self, source):
             item.remove(pathcomponents[1])
 
     def _clonefrom(self, source):
-        for k,v in source.items():
+        for k,v in listitems(source):
             self._items[k] = v.clone(self, k)
 
     def clone(self):
             self._items[k] = v.clone(self, k)
 
     def clone(self):
@@ -1074,8 +1104,8 @@ class RichCollectionBase(CollectionBase):
             # then return API server's PDH response.
             return self._portable_data_hash
         else:
             # then return API server's PDH response.
             return self._portable_data_hash
         else:
-            stripped = self.portable_manifest_text()
-            return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
+            stripped = self.portable_manifest_text().encode()
+            return '{}+{}'.format(hashlib.md5(stripped).hexdigest(), len(stripped))
 
     @synchronized
     def subscribe(self, callback):
 
     @synchronized
     def subscribe(self, callback):
@@ -1116,7 +1146,7 @@ class RichCollectionBase(CollectionBase):
     @synchronized
     def flush(self):
         """Flush bufferblocks to Keep."""
     @synchronized
     def flush(self):
         """Flush bufferblocks to Keep."""
-        for e in self.values():
+        for e in listvalues(self):
             e.flush()
 
 
             e.flush()
 
 
@@ -1171,8 +1201,9 @@ class Collection(RichCollectionBase):
         """Collection constructor.
 
         :manifest_locator_or_text:
         """Collection constructor.
 
         :manifest_locator_or_text:
-          One of Arvados collection UUID, block locator of
-          a manifest, raw manifest text, or None (to create an empty collection).
+          An Arvados collection UUID, portable data hash, raw manifest
+          text, or (if creating an empty collection) None.
+
         :parent:
           the parent Collection, may be None.
 
         :parent:
           the parent Collection, may be None.
 
@@ -1222,11 +1253,11 @@ class Collection(RichCollectionBase):
         self.events = None
 
         if manifest_locator_or_text:
         self.events = None
 
         if manifest_locator_or_text:
-            if re.match(util.keep_locator_pattern, manifest_locator_or_text):
+            if re.match(arvados.util.keep_locator_pattern, manifest_locator_or_text):
                 self._manifest_locator = manifest_locator_or_text
                 self._manifest_locator = manifest_locator_or_text
-            elif re.match(util.collection_uuid_pattern, manifest_locator_or_text):
+            elif re.match(arvados.util.collection_uuid_pattern, manifest_locator_or_text):
                 self._manifest_locator = manifest_locator_or_text
                 self._manifest_locator = manifest_locator_or_text
-            elif re.match(util.manifest_pattern, manifest_locator_or_text):
+            elif re.match(arvados.util.manifest_pattern, manifest_locator_or_text):
                 self._manifest_text = manifest_locator_or_text
             else:
                 raise errors.ArgumentError(
                 self._manifest_text = manifest_locator_or_text
             else:
                 raise errors.ArgumentError(
@@ -1240,6 +1271,18 @@ class Collection(RichCollectionBase):
     def root_collection(self):
         return self
 
     def root_collection(self):
         return self
 
+    def get_properties(self):
+        if self._api_response and self._api_response["properties"]:
+            return self._api_response["properties"]
+        else:
+            return {}
+
+    def get_trash_at(self):
+        if self._api_response and self._api_response["trash_at"]:
+            return ciso8601.parse_datetime(self._api_response["trash_at"])
+        else:
+            return None
+
     def stream_name(self):
         return "."
 
     def stream_name(self):
         return "."
 
@@ -1311,66 +1354,27 @@ class Collection(RichCollectionBase):
         # it.  If instantiation fails, we'll fall back to the except
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
         # it.  If instantiation fails, we'll fall back to the except
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
-        try:
-            self._remember_api_response(self._my_api().collections().get(
-                uuid=self._manifest_locator).execute(
-                    num_retries=self.num_retries))
-            self._manifest_text = self._api_response['manifest_text']
-            # If not overriden via kwargs, we should try to load the
-            # replication_desired from the API server
-            if self.replication_desired is None:
-                self.replication_desired = self._api_response.get('replication_desired', None)
-            return None
-        except Exception as e:
-            return e
-
-    def _populate_from_keep(self):
-        # Retrieve a manifest directly from Keep. This has a chance of
-        # working if [a] the locator includes a permission signature
-        # or [b] the Keep services are operating in world-readable
-        # mode. Return an exception, or None if successful.
-        try:
-            self._manifest_text = self._my_keep().get(
-                self._manifest_locator, num_retries=self.num_retries)
-        except Exception as e:
-            return e
+        self._remember_api_response(self._my_api().collections().get(
+            uuid=self._manifest_locator).execute(
+                num_retries=self.num_retries))
+        self._manifest_text = self._api_response['manifest_text']
+        self._portable_data_hash = self._api_response['portable_data_hash']
+        # If not overriden via kwargs, we should try to load the
+        # replication_desired from the API server
+        if self.replication_desired is None:
+            self.replication_desired = self._api_response.get('replication_desired', None)
 
     def _populate(self):
 
     def _populate(self):
-        if self._manifest_locator is None and self._manifest_text is None:
-            return
-        error_via_api = None
-        error_via_keep = None
-        should_try_keep = ((self._manifest_text is None) and
-                           util.keep_locator_pattern.match(
-                               self._manifest_locator))
-        if ((self._manifest_text is None) and
-            util.signed_locator_pattern.match(self._manifest_locator)):
-            error_via_keep = self._populate_from_keep()
         if self._manifest_text is None:
         if self._manifest_text is None:
-            error_via_api = self._populate_from_api_server()
-            if error_via_api is not None and not should_try_keep:
-                raise error_via_api
-        if ((self._manifest_text is None) and
-            not error_via_keep and
-            should_try_keep):
-            # Looks like a keep locator, and we didn't already try keep above
-            error_via_keep = self._populate_from_keep()
-        if self._manifest_text is None:
-            # Nothing worked!
-            raise errors.NotFoundError(
-                ("Failed to retrieve collection '{}' " +
-                 "from either API server ({}) or Keep ({})."
-                 ).format(
-                    self._manifest_locator,
-                    error_via_api,
-                    error_via_keep))
-        # populate
+            if self._manifest_locator is None:
+                return
+            else:
+                self._populate_from_api_server()
         self._baseline_manifest = self._manifest_text
         self._import_manifest(self._manifest_text)
 
         self._baseline_manifest = self._manifest_text
         self._import_manifest(self._manifest_text)
 
-
     def _has_collection_uuid(self):
     def _has_collection_uuid(self):
-        return self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator)
+        return self._manifest_locator is not None and re.match(arvados.util.collection_uuid_pattern, self._manifest_locator)
 
     def __enter__(self):
         return self
 
     def __enter__(self):
         return self
@@ -1446,17 +1450,34 @@ class Collection(RichCollectionBase):
     @must_be_writable
     @synchronized
     @retry_method
     @must_be_writable
     @synchronized
     @retry_method
-    def save(self, merge=True, num_retries=None):
+    def save(self,
+             properties=None,
+             storage_classes=None,
+             trash_at=None,
+             merge=True,
+             num_retries=None):
         """Save collection to an existing collection record.
 
         Commit pending buffer blocks to Keep, merge with remote record (if
         """Save collection to an existing collection record.
 
         Commit pending buffer blocks to Keep, merge with remote record (if
-        merge=True, the default), and update the collection record.  Returns
+        merge=True, the default), and update the collection record. Returns
         the current manifest text.
 
         Will raise AssertionError if not associated with a collection record on
         the API server.  If you want to save a manifest to Keep only, see
         `save_new()`.
 
         the current manifest text.
 
         Will raise AssertionError if not associated with a collection record on
         the API server.  If you want to save a manifest to Keep only, see
         `save_new()`.
 
+        :properties:
+          Additional properties of collection. This value will replace any existing
+          properties of collection.
+
+        :storage_classes:
+          Specify desirable storage classes to be used when writing data to Keep.
+
+        :trash_at:
+          A collection is *expiring* when it has a *trash_at* time in the future.
+          An expiring collection can be accessed as normal,
+          but is scheduled to be trashed automatically at the *trash_at* time.
+
         :merge:
           Update and merge remote changes before saving.  Otherwise, any
           remote changes will be ignored and overwritten.
         :merge:
           Update and merge remote changes before saving.  Otherwise, any
           remote changes will be ignored and overwritten.
@@ -1465,6 +1486,24 @@ class Collection(RichCollectionBase):
           Retry count on API calls (if None,  use the collection default)
 
         """
           Retry count on API calls (if None,  use the collection default)
 
         """
+        if properties and type(properties) is not dict:
+            raise errors.ArgumentError("properties must be dictionary type.")
+
+        if storage_classes and type(storage_classes) is not list:
+            raise errors.ArgumentError("storage_classes must be list type.")
+
+        if trash_at and type(trash_at) is not datetime.datetime:
+            raise errors.ArgumentError("trash_at must be datetime type.")
+
+        body={}
+        if properties:
+            body["properties"] = properties
+        if storage_classes:
+            body["storage_classes_desired"] = storage_classes
+        if trash_at:
+            t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+            body["trash_at"] = t
+
         if not self.committed():
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
         if not self.committed():
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
@@ -1475,14 +1514,20 @@ class Collection(RichCollectionBase):
                 self.update()
 
             text = self.manifest_text(strip=False)
                 self.update()
 
             text = self.manifest_text(strip=False)
+            body['manifest_text'] = text
+
             self._remember_api_response(self._my_api().collections().update(
                 uuid=self._manifest_locator,
             self._remember_api_response(self._my_api().collections().update(
                 uuid=self._manifest_locator,
-                body={'manifest_text': text}
-                ).execute(
-                    num_retries=num_retries))
+                body=body
+                ).execute(num_retries=num_retries))
             self._manifest_text = self._api_response["manifest_text"]
             self._portable_data_hash = self._api_response["portable_data_hash"]
             self.set_committed(True)
             self._manifest_text = self._api_response["manifest_text"]
             self._portable_data_hash = self._api_response["portable_data_hash"]
             self.set_committed(True)
+        elif body:
+            self._remember_api_response(self._my_api().collections().update(
+                uuid=self._manifest_locator,
+                body=body
+                ).execute(num_retries=num_retries))
 
         return self._manifest_text
 
 
         return self._manifest_text
 
@@ -1493,6 +1538,9 @@ class Collection(RichCollectionBase):
     def save_new(self, name=None,
                  create_collection_record=True,
                  owner_uuid=None,
     def save_new(self, name=None,
                  create_collection_record=True,
                  owner_uuid=None,
+                 properties=None,
+                 storage_classes=None,
+                 trash_at=None,
                  ensure_unique_name=False,
                  num_retries=None):
         """Save collection to a new collection record.
                  ensure_unique_name=False,
                  num_retries=None):
         """Save collection to a new collection record.
@@ -1500,7 +1548,7 @@ class Collection(RichCollectionBase):
         Commit pending buffer blocks to Keep and, when create_collection_record
         is True (default), create a new collection record.  After creating a
         new collection record, this Collection object will be associated with
         Commit pending buffer blocks to Keep and, when create_collection_record
         is True (default), create a new collection record.  After creating a
         new collection record, this Collection object will be associated with
-        the new record used by `save()`.  Returns the current manifest text.
+        the new record used by `save()`. Returns the current manifest text.
 
         :name:
           The collection name.
 
         :name:
           The collection name.
@@ -1513,6 +1561,18 @@ class Collection(RichCollectionBase):
           the user, or project uuid that will own this collection.
           If None, defaults to the current user.
 
           the user, or project uuid that will own this collection.
           If None, defaults to the current user.
 
+        :properties:
+          Additional properties of collection. This value will replace any existing
+          properties of collection.
+
+        :storage_classes:
+          Specify desirable storage classes to be used when writing data to Keep.
+
+        :trash_at:
+          A collection is *expiring* when it has a *trash_at* time in the future.
+          An expiring collection can be accessed as normal,
+          but is scheduled to be trashed automatically at the *trash_at* time.
+
         :ensure_unique_name:
           If True, ask the API server to rename the collection
           if it conflicts with a collection with the same name and owner.  If
         :ensure_unique_name:
           If True, ask the API server to rename the collection
           if it conflicts with a collection with the same name and owner.  If
@@ -1522,6 +1582,15 @@ class Collection(RichCollectionBase):
           Retry count on API calls (if None,  use the collection default)
 
         """
           Retry count on API calls (if None,  use the collection default)
 
         """
+        if properties and type(properties) is not dict:
+            raise errors.ArgumentError("properties must be dictionary type.")
+
+        if storage_classes and type(storage_classes) is not list:
+            raise errors.ArgumentError("storage_classes must be list type.")
+
+        if trash_at and type(trash_at) is not datetime.datetime:
+            raise errors.ArgumentError("trash_at must be datetime type.")
+
         self._my_block_manager().commit_all()
         text = self.manifest_text(strip=False)
 
         self._my_block_manager().commit_all()
         text = self.manifest_text(strip=False)
 
@@ -1535,6 +1604,13 @@ class Collection(RichCollectionBase):
                     "replication_desired": self.replication_desired}
             if owner_uuid:
                 body["owner_uuid"] = owner_uuid
                     "replication_desired": self.replication_desired}
             if owner_uuid:
                 body["owner_uuid"] = owner_uuid
+            if properties:
+                body["properties"] = properties
+            if storage_classes:
+                body["storage_classes_desired"] = storage_classes
+            if trash_at:
+                t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+                body["trash_at"] = t
 
             self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
             text = self._api_response["manifest_text"]
 
             self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
             text = self._api_response["manifest_text"]
@@ -1547,6 +1623,10 @@ class Collection(RichCollectionBase):
 
         return text
 
 
         return text
 
+    _token_re = re.compile(r'(\S+)(\s+|$)')
+    _block_re = re.compile(r'[0-9a-f]{32}\+(\d+)(\+\S+)*')
+    _segment_re = re.compile(r'(\d+):(\d+):(\S+)')
+
     @synchronized
     def _import_manifest(self, manifest_text):
         """Import a manifest into a `Collection`.
     @synchronized
     def _import_manifest(self, manifest_text):
         """Import a manifest into a `Collection`.
@@ -1565,7 +1645,7 @@ class Collection(RichCollectionBase):
         stream_name = None
         state = STREAM_NAME
 
         stream_name = None
         state = STREAM_NAME
 
-        for token_and_separator in re.finditer(r'(\S+)(\s+|$)', manifest_text):
+        for token_and_separator in self._token_re.finditer(manifest_text):
             tok = token_and_separator.group(1)
             sep = token_and_separator.group(2)
 
             tok = token_and_separator.group(1)
             sep = token_and_separator.group(2)
 
@@ -1574,25 +1654,25 @@ class Collection(RichCollectionBase):
                 stream_name = tok.replace('\\040', ' ')
                 blocks = []
                 segments = []
                 stream_name = tok.replace('\\040', ' ')
                 blocks = []
                 segments = []
-                streamoffset = 0L
+                streamoffset = 0
                 state = BLOCKS
                 self.find_or_create(stream_name, COLLECTION)
                 continue
 
             if state == BLOCKS:
                 state = BLOCKS
                 self.find_or_create(stream_name, COLLECTION)
                 continue
 
             if state == BLOCKS:
-                block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
+                block_locator = self._block_re.match(tok)
                 if block_locator:
                 if block_locator:
-                    blocksize = long(block_locator.group(1))
+                    blocksize = int(block_locator.group(1))
                     blocks.append(Range(tok, streamoffset, blocksize, 0))
                     streamoffset += blocksize
                 else:
                     state = SEGMENTS
 
             if state == SEGMENTS:
                     blocks.append(Range(tok, streamoffset, blocksize, 0))
                     streamoffset += blocksize
                 else:
                     state = SEGMENTS
 
             if state == SEGMENTS:
-                file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
+                file_segment = self._segment_re.match(tok)
                 if file_segment:
                 if file_segment:
-                    pos = long(file_segment.group(1))
-                    size = long(file_segment.group(2))
+                    pos = int(file_segment.group(1))
+                    size = int(file_segment.group(2))
                     name = file_segment.group(3).replace('\\040', ' ')
                     filepath = os.path.join(stream_name, name)
                     afile = self.find_or_create(filepath, FILE)
                     name = file_segment.group(3).replace('\\040', ' ')
                     filepath = os.path.join(stream_name, name)
                     afile = self.find_or_create(filepath, FILE)
@@ -1669,9 +1749,8 @@ class Subcollection(RichCollectionBase):
 class CollectionReader(Collection):
     """A read-only collection object.
 
 class CollectionReader(Collection):
     """A read-only collection object.
 
-    Initialize from an api collection record locator, a portable data hash of a
-    manifest, or raw manifest text.  See `Collection` constructor for detailed
-    options.
+    Initialize from a collection UUID or portable data hash, or raw
+    manifest text.  See `Collection` constructor for detailed options.
 
     """
     def __init__(self, manifest_locator_or_text, *args, **kwargs):
 
     """
     def __init__(self, manifest_locator_or_text, *args, **kwargs):