X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b4808aa380097befefde114a0a87f41270c862bb..c36ec856598f214e340e3335ddd347d131335bf8:/sdk/python/arvados/collection.py diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 44d0325353..a44d42b6ac 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -1262,7 +1262,8 @@ class Collection(RichCollectionBase): block_manager=None, replication_desired=None, storage_classes_desired=None, - put_threads=None): + put_threads=None, + get_threads=None): """Collection constructor. :manifest_locator_or_text: @@ -1296,8 +1297,8 @@ class Collection(RichCollectionBase): :storage_classes_desired: A list of storage class names where to upload the data. If None, - the keepstores are expected to store the data into their default - storage class. + the keep client is expected to store the data into the cluster's + default storage class(es). """ @@ -1311,6 +1312,7 @@ class Collection(RichCollectionBase): self.replication_desired = replication_desired self._storage_classes_desired = storage_classes_desired self.put_threads = put_threads + self.get_threads = get_threads if apiconfig: self._config = apiconfig @@ -1344,8 +1346,8 @@ class Collection(RichCollectionBase): try: self._populate() - except (IOError, errors.SyntaxError) as e: - raise errors.ArgumentError("Error processing manifest text: %s", e) + except errors.SyntaxError as e: + raise errors.ArgumentError("Error processing manifest text: %s", str(e)) from None def storage_classes_desired(self): return self._storage_classes_desired or [] @@ -1395,7 +1397,7 @@ class Collection(RichCollectionBase): # our tokens. return else: - self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash"))) + self._remember_api_response(response) other = CollectionReader(response["manifest_text"]) baseline = CollectionReader(self._manifest_text) self.apply(baseline.diff(other)) @@ -1424,7 +1426,12 @@ class Collection(RichCollectionBase): copies = (self.replication_desired or self._my_api()._rootDesc.get('defaultCollectionReplication', 2)) - self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries, storage_classes_func=self.storage_classes_desired) + self._block_manager = _BlockManager(self._my_keep(), + copies=copies, + put_threads=self.put_threads, + num_retries=self.num_retries, + storage_classes_func=self.storage_classes_desired, + get_threads=self.get_threads,) return self._block_manager def _remember_api_response(self, response): @@ -1546,7 +1553,8 @@ class Collection(RichCollectionBase): storage_classes=None, trash_at=None, merge=True, - num_retries=None): + num_retries=None, + preserve_version=False): """Save collection to an existing collection record. Commit pending buffer blocks to Keep, merge with remote record (if @@ -1576,6 +1584,13 @@ class Collection(RichCollectionBase): :num_retries: Retry count on API calls (if None, use the collection default) + :preserve_version: + If True, indicate that the collection content being saved right now + should be preserved in a version snapshot if the collection record is + updated in the future. Requires that the API server has + Collections.CollectionVersioning enabled, if not, setting this will + raise an exception. + """ if properties and type(properties) is not dict: raise errors.ArgumentError("properties must be dictionary type.") @@ -1588,6 +1603,9 @@ class Collection(RichCollectionBase): if trash_at and type(trash_at) is not datetime.datetime: raise errors.ArgumentError("trash_at must be datetime type.") + if preserve_version and not self._my_api().config()['Collections'].get('CollectionVersioning', False): + raise errors.ArgumentError("preserve_version is not supported when CollectionVersioning is not enabled.") + body={} if properties: body["properties"] = properties @@ -1596,6 +1614,8 @@ class Collection(RichCollectionBase): if trash_at: t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") body["trash_at"] = t + if preserve_version: + body["preserve_version"] = preserve_version if not self.committed(): if self._has_remote_blocks: @@ -1641,7 +1661,8 @@ class Collection(RichCollectionBase): storage_classes=None, trash_at=None, ensure_unique_name=False, - num_retries=None): + num_retries=None, + preserve_version=False): """Save collection to a new collection record. Commit pending buffer blocks to Keep and, when create_collection_record @@ -1680,6 +1701,13 @@ class Collection(RichCollectionBase): :num_retries: Retry count on API calls (if None, use the collection default) + :preserve_version: + If True, indicate that the collection content being saved right now + should be preserved in a version snapshot if the collection record is + updated in the future. Requires that the API server has + Collections.CollectionVersioning enabled, if not, setting this will + raise an exception. + """ if properties and type(properties) is not dict: raise errors.ArgumentError("properties must be dictionary type.") @@ -1690,6 +1718,9 @@ class Collection(RichCollectionBase): if trash_at and type(trash_at) is not datetime.datetime: raise errors.ArgumentError("trash_at must be datetime type.") + if preserve_version and not self._my_api().config()['Collections'].get('CollectionVersioning', False): + raise errors.ArgumentError("preserve_version is not supported when CollectionVersioning is not enabled.") + if self._has_remote_blocks: # Copy any remote blocks to the local cluster. self._copy_remote_blocks(remote_blocks={}) @@ -1718,6 +1749,8 @@ class Collection(RichCollectionBase): if trash_at: t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") body["trash_at"] = t + if preserve_version: + body["preserve_version"] = preserve_version self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)) text = self._api_response["manifest_text"] @@ -1790,7 +1823,13 @@ class Collection(RichCollectionBase): self.find_or_create(os.path.join(stream_name, name[:-2]), COLLECTION) else: filepath = os.path.join(stream_name, name) - afile = self.find_or_create(filepath, FILE) + try: + afile = self.find_or_create(filepath, FILE) + except IOError as e: + if e.errno == errno.ENOTDIR: + raise errors.SyntaxError("Dir part of %s conflicts with file of the same name.", filepath) from None + else: + raise e from None if isinstance(afile, ArvadosFile): afile.add_segment(blocks, pos, size) else: