10383: Remove files from local_collection which don't exist locally, so that the...
authorLucas Di Pentima <lucas@curoverse.com>
Thu, 15 Dec 2016 17:57:48 +0000 (14:57 -0300)
committerLucas Di Pentima <lucas@curoverse.com>
Thu, 15 Dec 2016 17:57:48 +0000 (14:57 -0300)
sdk/python/arvados/commands/put.py

index 479c72a5ed0699132c9c6195e802984facf605ca..133b36382788eaf9de42603d744ff04f950879f5 100644 (file)
@@ -423,6 +423,12 @@ class ArvPutUploadJob(object):
             # there aren't any file to upload.
             if self.dry_run:
                 raise ArvPutUploadNotPending()
+            # Remove local_collection's files that don't exist locally anymore, so the
+            # bytes_written count is correct.
+            for f in self.collection_file_paths(self._local_collection,
+                                                path_prefix=""):
+                if f != 'stdin' and not f in self._file_paths:
+                    self._local_collection.remove(f)
             # Update bytes_written from current local collection and
             # report initial progress.
             self._update()
@@ -659,6 +665,17 @@ class ArvPutUploadJob(object):
             # Load the previous manifest so we can check if files were modified remotely.
             self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired)
 
+    def collection_file_paths(self, col, path_prefix='.'):
+        """Return a list of file paths by recursively go through the entire collection `col`"""
+        file_paths = []
+        for name, item in col.items():
+            if isinstance(item, arvados.arvfile.ArvadosFile):
+                file_paths.append(os.path.join(path_prefix, name))
+            elif isinstance(item, arvados.collection.Subcollection):
+                new_prefix = os.path.join(path_prefix, name)
+                file_paths += self.collection_file_paths(item, path_prefix=new_prefix)
+        return file_paths
+
     def _lock_file(self, fileobj):
         try:
             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)