14008: Merge branch 'master' into 14008-containers-index
[arvados.git] / sdk / python / arvados / commands / run.py
index ca51de1fe33517c8001fcaef53efabcf7771aeca..96f5bdd44a12ae42c25fbe64f68b342cb0356fcf 100644 (file)
@@ -150,20 +150,21 @@ def statfile(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)",
 
     return prefix+fn
 
-def write_file(collection, pathprefix, fn):
+def write_file(collection, pathprefix, fn, flush=False):
     with open(os.path.join(pathprefix, fn)) as src:
         dst = collection.open(fn, "w")
         r = src.read(1024*128)
         while r:
             dst.write(r)
             r = src.read(1024*128)
-        dst.close(flush=False)
+        dst.close(flush=flush)
 
 def uploadfiles(files, api, dry_run=False, num_retries=0,
                 project=None,
                 fnPattern="$(file %s/%s)",
                 name=None,
-                collection=None):
+                collection=None,
+                packed=True):
     # Find the smallest path prefix that includes all the files that need to be uploaded.
     # This starts at the root and iteratively removes common parent directory prefixes
     # until all file paths no longer have a common parent.
@@ -213,12 +214,12 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
                 continue
             prev = localpath
             if os.path.isfile(localpath):
-                write_file(collection, pathprefix, f.fn)
+                write_file(collection, pathprefix, f.fn, not packed)
             elif os.path.isdir(localpath):
                 for root, dirs, iterfiles in os.walk(localpath):
                     root = root[len(pathprefix):]
                     for src in iterfiles:
-                        write_file(collection, pathprefix, os.path.join(root, src))
+                        write_file(collection, pathprefix, os.path.join(root, src), not packed)
 
         pdh = None
         if len(collection) > 0:
@@ -248,10 +249,19 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
                         logger.info("Uploaded to %s (%s)", pdh, collection.manifest_locator())
                     except arvados.errors.ApiError as ae:
                         tries -= 1
+            if pdh is None:
+                # Something weird going on here, probably a collection
+                # with a conflicting name but wrong PDH.  We won't
+                # able to reuse it but we still need to save our
+                # collection, so so save it with unique name.
+                logger.info("Name conflict on '%s', existing collection has an unexpected portable data hash", name_pdh)
+                collection.save_new(name=name_pdh, owner_uuid=project, ensure_unique_name=True)
+                pdh = collection.portable_data_hash()
+                logger.info("Uploaded to %s (%s)", pdh, collection.manifest_locator())
         else:
             # empty collection
             pdh = collection.portable_data_hash()
-            assert (pdh == config.EMPTY_BLOCK_LOCATOR), "Empty collection portable_data_hash did not have expected locator"
+            assert (pdh == config.EMPTY_BLOCK_LOCATOR), "Empty collection portable_data_hash did not have expected locator, was %s" % pdh
             logger.info("Using empty collection %s", pdh)
 
     for c in files: