X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/744bbbafa5dcbba814391eaedfa9489c3614b644..71a0b1d3e4313b4ae4daf28330a2a075d30ed636:/sdk/python/arvados/collection.py diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 3910b22430..ebca15c54b 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -827,7 +827,7 @@ class RichCollectionBase(CollectionBase): self.set_committed(False) self.notify(DEL, self, pathcomponents[0], deleteditem) else: - item.remove(pathcomponents[1]) + item.remove(pathcomponents[1], recursive=recursive) def _clonefrom(self, source): for k,v in listitems(source): @@ -1262,7 +1262,8 @@ class Collection(RichCollectionBase): block_manager=None, replication_desired=None, storage_classes_desired=None, - put_threads=None): + put_threads=None, + get_threads=None): """Collection constructor. :manifest_locator_or_text: @@ -1296,8 +1297,8 @@ class Collection(RichCollectionBase): :storage_classes_desired: A list of storage class names where to upload the data. If None, - the keepstores are expected to store the data into their default - storage class. + the keep client is expected to store the data into the cluster's + default storage class(es). """ @@ -1307,10 +1308,16 @@ class Collection(RichCollectionBase): super(Collection, self).__init__(parent) self._api_client = api_client self._keep_client = keep_client + + # Use the keep client from ThreadSafeApiCache + if self._keep_client is None and isinstance(self._api_client, ThreadSafeApiCache): + self._keep_client = self._api_client.keep + self._block_manager = block_manager self.replication_desired = replication_desired self._storage_classes_desired = storage_classes_desired self.put_threads = put_threads + self.get_threads = get_threads if apiconfig: self._config = apiconfig @@ -1344,8 +1351,8 @@ class Collection(RichCollectionBase): try: self._populate() - except (IOError, errors.SyntaxError) as e: - raise errors.ArgumentError("Error processing manifest text: %s", e) + except errors.SyntaxError as e: + raise errors.ArgumentError("Error processing manifest text: %s", str(e)) from None def storage_classes_desired(self): return self._storage_classes_desired or [] @@ -1395,7 +1402,7 @@ class Collection(RichCollectionBase): # our tokens. return else: - self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash"))) + self._remember_api_response(response) other = CollectionReader(response["manifest_text"]) baseline = CollectionReader(self._manifest_text) self.apply(baseline.diff(other)) @@ -1404,7 +1411,7 @@ class Collection(RichCollectionBase): @synchronized def _my_api(self): if self._api_client is None: - self._api_client = ThreadSafeApiCache(self._config) + self._api_client = ThreadSafeApiCache(self._config, version='v1') if self._keep_client is None: self._keep_client = self._api_client.keep return self._api_client @@ -1424,8 +1431,12 @@ class Collection(RichCollectionBase): copies = (self.replication_desired or self._my_api()._rootDesc.get('defaultCollectionReplication', 2)) - classes = self.storage_classes_desired or [] - self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries, storage_classes_func=self.storage_classes_desired) + self._block_manager = _BlockManager(self._my_keep(), + copies=copies, + put_threads=self.put_threads, + num_retries=self.num_retries, + storage_classes_func=self.storage_classes_desired, + get_threads=self.get_threads,) return self._block_manager def _remember_api_response(self, response): @@ -1547,7 +1558,8 @@ class Collection(RichCollectionBase): storage_classes=None, trash_at=None, merge=True, - num_retries=None): + num_retries=None, + preserve_version=False): """Save collection to an existing collection record. Commit pending buffer blocks to Keep, merge with remote record (if @@ -1577,25 +1589,38 @@ class Collection(RichCollectionBase): :num_retries: Retry count on API calls (if None, use the collection default) + :preserve_version: + If True, indicate that the collection content being saved right now + should be preserved in a version snapshot if the collection record is + updated in the future. Requires that the API server has + Collections.CollectionVersioning enabled, if not, setting this will + raise an exception. + """ if properties and type(properties) is not dict: raise errors.ArgumentError("properties must be dictionary type.") if storage_classes and type(storage_classes) is not list: raise errors.ArgumentError("storage_classes must be list type.") + if storage_classes: + self._storage_classes_desired = storage_classes if trash_at and type(trash_at) is not datetime.datetime: raise errors.ArgumentError("trash_at must be datetime type.") + if preserve_version and not self._my_api().config()['Collections'].get('CollectionVersioning', False): + raise errors.ArgumentError("preserve_version is not supported when CollectionVersioning is not enabled.") + body={} if properties: body["properties"] = properties - if storage_classes: - self._storage_classes_desired = storage_classes + if self.storage_classes_desired(): body["storage_classes_desired"] = self.storage_classes_desired() if trash_at: t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") body["trash_at"] = t + if preserve_version: + body["preserve_version"] = preserve_version if not self.committed(): if self._has_remote_blocks: @@ -1641,7 +1666,8 @@ class Collection(RichCollectionBase): storage_classes=None, trash_at=None, ensure_unique_name=False, - num_retries=None): + num_retries=None, + preserve_version=False): """Save collection to a new collection record. Commit pending buffer blocks to Keep and, when create_collection_record @@ -1680,6 +1706,13 @@ class Collection(RichCollectionBase): :num_retries: Retry count on API calls (if None, use the collection default) + :preserve_version: + If True, indicate that the collection content being saved right now + should be preserved in a version snapshot if the collection record is + updated in the future. Requires that the API server has + Collections.CollectionVersioning enabled, if not, setting this will + raise an exception. + """ if properties and type(properties) is not dict: raise errors.ArgumentError("properties must be dictionary type.") @@ -1690,6 +1723,9 @@ class Collection(RichCollectionBase): if trash_at and type(trash_at) is not datetime.datetime: raise errors.ArgumentError("trash_at must be datetime type.") + if preserve_version and not self._my_api().config()['Collections'].get('CollectionVersioning', False): + raise errors.ArgumentError("preserve_version is not supported when CollectionVersioning is not enabled.") + if self._has_remote_blocks: # Copy any remote blocks to the local cluster. self._copy_remote_blocks(remote_blocks={}) @@ -1718,6 +1754,8 @@ class Collection(RichCollectionBase): if trash_at: t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") body["trash_at"] = t + if preserve_version: + body["preserve_version"] = preserve_version self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)) text = self._api_response["manifest_text"] @@ -1790,7 +1828,13 @@ class Collection(RichCollectionBase): self.find_or_create(os.path.join(stream_name, name[:-2]), COLLECTION) else: filepath = os.path.join(stream_name, name) - afile = self.find_or_create(filepath, FILE) + try: + afile = self.find_or_create(filepath, FILE) + except IOError as e: + if e.errno == errno.ENOTDIR: + raise errors.SyntaxError("Dir part of %s conflicts with file of the same name.", filepath) from None + else: + raise e from None if isinstance(afile, ArvadosFile): afile.add_segment(blocks, pos, size) else: