def __init__(self, parent=None):
self.parent = parent
self._modified = True
+ self._callback = None
self._items = {}
def _my_api(self):
if item is None:
# create new file
if create_type == COLLECTION:
- item = Subcollection(self)
+ item = Subcollection(self, pathcomponents[0])
else:
- item = ArvadosFile(self)
+ item = ArvadosFile(self, pathcomponents[0])
self._items[pathcomponents[0]] = item
self._modified = True
self.notify(ADD, self, pathcomponents[0], item)
else:
if item is None:
# create new collection
- item = Subcollection(self)
+ item = Subcollection(self, pathcomponents[0])
self._items[pathcomponents[0]] = item
self._modified = True
self.notify(ADD, self, pathcomponents[0], item)
if isinstance(item, RichCollectionBase):
return item.find_or_create(pathcomponents[1], create_type)
else:
- raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
+ raise IOError(errno.ENOTDIR, "Interior path components must be subcollection")
else:
return self
else:
return item
else:
- raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
+ raise IOError(errno.ENOTDIR, "Interior path components must be subcollection")
- def mkdirs(path):
+ def mkdirs(self, path):
"""Recursive subcollection create.
Like `os.mkdirs()`. Will create intermediate subcollections needed to
create = (mode != "r")
if create and not self.writable():
- raise IOError((errno.EROFS, "Collection is read only"))
+ raise IOError(errno.EROFS, "Collection is read only")
if create:
arvfile = self.find_or_create(path, FILE)
arvfile = self.find(path)
if arvfile is None:
- raise IOError((errno.ENOENT, "File not found"))
+ raise IOError(errno.ENOENT, "File not found")
if not isinstance(arvfile, ArvadosFile):
- raise IOError((errno.EISDIR, "Path must refer to a file."))
+ raise IOError(errno.EISDIR, "Path must refer to a file.")
if mode[0] == "w":
arvfile.truncate(0)
name = os.path.basename(path)
if mode == "r":
- return ArvadosFileReader(arvfile, name, mode, num_retries=self.num_retries)
+ return ArvadosFileReader(arvfile, mode, num_retries=self.num_retries)
else:
- return ArvadosFileWriter(arvfile, name, mode, num_retries=self.num_retries)
+ return ArvadosFileWriter(arvfile, mode, num_retries=self.num_retries)
@synchronized
def modified(self):
pathcomponents = path.split("/", 1)
item = self._items.get(pathcomponents[0])
if item is None:
- raise IOError((errno.ENOENT, "File not found"))
+ raise IOError(errno.ENOENT, "File not found")
if len(pathcomponents) == 1:
if isinstance(self._items[pathcomponents[0]], RichCollectionBase) and len(self._items[pathcomponents[0]]) > 0 and not recursive:
- raise IOError((errno.ENOTEMPTY, "Subcollection not empty"))
+ raise IOError(errno.ENOTEMPTY, "Subcollection not empty")
deleteditem = self._items[pathcomponents[0]]
del self._items[pathcomponents[0]]
self._modified = True
def _clonefrom(self, source):
for k,v in source.items():
- self._items[k] = v.clone(self)
+ self._items[k] = v.clone(self, k)
def clone(self):
raise NotImplementedError()
@must_be_writable
@synchronized
- def add(self, source_obj, target_name, overwrite=False):
- """Copy a file or subcollection to this collection.
+ def add(self, source_obj, target_name, overwrite=False, reparent=False):
+ """Copy or move a file or subcollection to this collection.
:source_obj:
An ArvadosFile, or Subcollection object
:overwrite:
Whether to overwrite target file if it already exists.
+ :reparent:
+ If True, source_obj will be moved from its parent collection to this collection.
+ If False, source_obj will be copied and the parent collection will be
+ unmodified.
+
"""
if target_name in self and not overwrite:
- raise IOError((errno.EEXIST, "File already exists"))
+ raise IOError(errno.EEXIST, "File already exists")
modified_from = None
if target_name in self:
modified_from = self[target_name]
- # Actually make the copy.
- dup = source_obj.clone(self)
- self._items[target_name] = dup
+ # Actually make the move or copy.
+ if reparent:
+ source_obj.reparent(self, target_name)
+ item = source_obj
+ else:
+ item = source_obj.clone(self, target_name)
+
+ self._items[target_name] = item
self._modified = True
if modified_from:
- self.notify(MOD, self, target_name, (modified_from, dup))
+ self.notify(MOD, self, target_name, (modified_from, item))
+ else:
+ self.notify(ADD, self, target_name, item)
+
+ def _get_src_target(self, source, target_path, source_collection, create_dest):
+ if source_collection is None:
+ source_collection = self
+
+ # Find the object
+ if isinstance(source, basestring):
+ source_obj = source_collection.find(source)
+ if source_obj is None:
+ raise IOError(errno.ENOENT, "File not found")
+ sourcecomponents = source.split("/")
else:
- self.notify(ADD, self, target_name, dup)
+ source_obj = source
+ sourcecomponents = None
+
+ # Find parent collection the target path
+ targetcomponents = target_path.split("/")
+
+ # Determine the name to use.
+ target_name = targetcomponents[-1] if targetcomponents[-1] else sourcecomponents[-1]
+
+ if not target_name:
+ raise errors.ArgumentError("Target path is empty and source is an object. Cannot determine destination filename to use.")
+
+ if create_dest:
+ target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
+ else:
+ if len(targetcomponents) > 1:
+ target_dir = self.find("/".join(targetcomponents[0:-1]))
+ else:
+ target_dir = self
+
+ if target_dir is None:
+ raise IOError(errno.ENOENT, "Target directory not found.")
+
+ if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
+ target_dir = target_dir[target_name]
+ target_name = sourcecomponents[-1]
+
+ return (source_obj, target_dir, target_name)
@must_be_writable
@synchronized
:overwrite:
Whether to overwrite target file if it already exists.
"""
- if source_collection is None:
- source_collection = self
- # Find the object to copy
- if isinstance(source, basestring):
- source_obj = source_collection.find(source)
- if source_obj is None:
- raise IOError((errno.ENOENT, "File not found"))
- sourcecomponents = source.split("/")
- else:
- source_obj = source
- sourcecomponents = None
+ source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
+ target_dir.add(source_obj, target_name, overwrite, False)
- # Find parent collection the target path
- targetcomponents = target_path.split("/")
+ @must_be_writable
+ @synchronized
+ def rename(self, source, target_path, source_collection=None, overwrite=False):
+ """Move a file or subcollection from `source_collection` to a new path in this collection.
- # Determine the name to use.
- target_name = targetcomponents[-1] if targetcomponents[-1] else (sourcecomponents[-1] if sourcecomponents else None)
+ :source:
+ A string with a path to source file or subcollection.
- if not target_name:
- raise errors.ArgumentError("Target path is empty and source is an object. Cannot determine destination filename to use.")
+ :target_path:
+ Destination file or path. If the target path already exists and is a
+ subcollection, the item will be placed inside the subcollection. If
+ the target path already exists and is a file, this will raise an error
+ unless you specify `overwrite=True`.
- target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
+ :source_collection:
+ Collection to copy `source_path` from (default `self`)
- if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
- target_dir = target_dir[target_name]
- target_name = sourcecomponents[-1]
+ :overwrite:
+ Whether to overwrite target file if it already exists.
+ """
- target_dir.add(source_obj, target_name, overwrite)
+ source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, False)
+ if not source_obj.writable():
+ raise IOError(errno.EROFS, "Source collection must be writable.")
+ target_dir.add(source_obj, target_name, overwrite, True)
def portable_manifest_text(self, stream_name="."):
"""Get the manifest text for this collection, sub collections and files.
- This method does not flush outstanding to Keep. It will return a
- normalized manifest with access tokens stripped.
+ This method does not flush outstanding blocks to Keep. It will return
+ a normalized manifest with access tokens stripped.
:stream_name:
Name to use for this stream (directory)
"""
- return self.manifest_text(stream_name, strip=True, normalize=True, flush=False)
+ return self._get_manifest_text(stream_name, True, True)
- @synchronized
- def manifest_text(self, stream_name=".", strip=False, normalize=False, flush=True):
+ def manifest_text(self, stream_name=".", strip=False, normalize=False):
"""Get the manifest text for this collection, sub collections and files.
- By default, this method will flush outstanding blocksto Keep. By
- default it will not normalize the manifest or strip access tokens.
+ This method will flush outstanding blocks to Keep. By default, it will
+ not normalize an unmodified manifest or strip access tokens.
:stream_name:
Name to use for this stream (directory)
is not modified, return the original manifest text even if it is not
in normalized form.
- :flush:
- If true (default) write any outstanding blocks.
+ """
+
+ self._my_block_manager().commit_all()
+ return self._get_manifest_text(stream_name, strip, normalize)
+
+ @synchronized
+ def _get_manifest_text(self, stream_name, strip, normalize):
+ """Get the manifest text for this collection, sub collections and files.
+
+ :stream_name:
+ Name to use for this stream (directory)
+
+ :strip:
+ If True, remove signing tokens from block locators if present.
+ If False (default), block locators are left unchanged.
+
+ :normalize:
+ If True, always export the manifest text in normalized form
+ even if the Collection is not modified. If False (default) and the collection
+ is not modified, return the original manifest text even if it is not
+ in normalized form.
"""
if self.modified() or self._manifest_text is None or normalize:
- if flush:
- self._my_block_manager().commit_all()
-
stream = {}
buf = []
sorted_keys = sorted(self.keys())
if stream:
buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
- buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip))
+ buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True))
return "".join(buf)
else:
if strip:
holding_collection = Collection(api_client=self._my_api(), keep_client=self._my_keep())
for k in self:
if k not in end_collection:
- changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection)))
+ changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection, "")))
for k in end_collection:
if k in self:
if isinstance(end_collection[k], Subcollection) and isinstance(self[k], Subcollection):
changes.extend(self[k].diff(end_collection[k], os.path.join(prefix, k), holding_collection))
elif end_collection[k] != self[k]:
- changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection), end_collection[k].clone(holding_collection)))
+ changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection, ""), end_collection[k].clone(holding_collection, "")))
else:
- changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection)))
+ changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection, "")))
return changes
@must_be_writable
stripped = self.portable_manifest_text()
return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
+ @synchronized
+ def subscribe(self, callback):
+ if self._callback is None:
+ self._callback = callback
+ else:
+ raise errors.ArgumentError("A callback is already set on this collection.")
+
+ @synchronized
+ def unsubscribe(self):
+ if self._callback is not None:
+ self._callback = None
+
+ @synchronized
+ def notify(self, event, collection, name, item):
+ if self._callback:
+ self._callback(event, collection, name, item)
+ self.root_collection().notify(event, collection, name, item)
+
@synchronized
def __eq__(self, other):
if other is self:
self._api_response = None
self.lock = threading.RLock()
- self.callbacks = []
self.events = None
if manifest_locator_or_text:
def __exit__(self, exc_type, exc_value, traceback):
"""Support scoped auto-commit in a with: block."""
- if exc_type is not None:
+ if exc_type is None:
if self.writable() and self._has_collection_uuid():
self.save()
if self._block_manager is not None:
return self._manifest_locator
@synchronized
- def clone(self, new_parent=None, readonly=False, new_config=None):
+ def clone(self, new_parent=None, new_name=None, readonly=False, new_config=None):
if new_config is None:
new_config = self._config
if readonly:
if create_collection_record:
if name is None:
name = "Collection created %s" % (time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()))
+ ensure_unique_name = True
body = {"manifest_text": text,
"name": name}
return text
- @synchronized
- def subscribe(self, callback):
- self.callbacks.append(callback)
-
- @synchronized
- def unsubscribe(self, callback):
- self.callbacks.remove(callback)
-
- @synchronized
- def notify(self, event, collection, name, item):
- for c in self.callbacks:
- c(event, collection, name, item)
-
@synchronized
def _import_manifest(self, manifest_text):
"""Import a manifest into a `Collection`.
segments = []
streamoffset = 0L
state = BLOCKS
+ self.mkdirs(stream_name)
continue
if state == BLOCKS:
block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
if block_locator:
blocksize = long(block_locator.group(1))
- blocks.append(Range(tok, streamoffset, blocksize))
+ blocks.append(Range(tok, streamoffset, blocksize, 0))
streamoffset += blocksize
else:
state = SEGMENTS
self.set_unmodified()
+ @synchronized
+ def notify(self, event, collection, name, item):
+ if self._callback:
+ self._callback(event, collection, name, item)
+
class Subcollection(RichCollectionBase):
"""This is a subdirectory within a collection that doesn't have its own API
"""
- def __init__(self, parent):
+ def __init__(self, parent, name):
super(Subcollection, self).__init__(parent)
self.lock = self.root_collection().lock
self._manifest_text = None
+ self.name = name
+ self.num_retries = parent.num_retries
def root_collection(self):
return self.parent.root_collection()
def _my_block_manager(self):
return self.root_collection()._my_block_manager()
- def notify(self, event, collection, name, item):
- return self.root_collection().notify(event, collection, name, item)
-
def stream_name(self):
- for k, v in self.parent.items():
- if v is self:
- return os.path.join(self.parent.stream_name(), k)
- return '.'
+ return os.path.join(self.parent.stream_name(), self.name)
@synchronized
- def clone(self, new_parent):
- c = Subcollection(new_parent)
+ def clone(self, new_parent, new_name):
+ c = Subcollection(new_parent, new_name)
c._clonefrom(self)
return c