5787: run-command uploading uses new collection API and supports symlinks into Keep
[arvados.git] / crunch_scripts / crunchutil / vwd.py
index 3d54c9c2b32c1fc05c2ce536a5ece807df59d49b..9d5578a30ed017b5e8d49f515c9efbb9a056dd8a 100644 (file)
@@ -2,6 +2,7 @@ import arvados
 import os
 import robust_put
 import stat
+import arvados.command.run
 
 # Implements "Virtual Working Directory"
 # Provides a way of emulating a shared writable directory in Keep based
@@ -32,23 +33,47 @@ def checkout(source_collection, target_dir, keepmount=None):
         for f in files:
             os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
 
+def is_collection(fn):
+    if os.path.exists
+
 # Delete all symlinks and check in any remaining normal files.
 # If merge == True, merge the manifest with source_collection and return a
 # CollectionReader for the combined collection.
-def checkin(source_collection, target_dir, merge=True):
+def checkin(target_dir):
     # delete symlinks, commit directory, merge manifests and return combined
     # collection.
+
+    outputcollection = arvados.collection.Collection(num_retries=5)
+
+    if target_dir[-1:] != '/':
+        target_dir += '/'
+
+    collections = {}
+
     for root, dirs, files in os.walk(target_dir):
         for f in files:
             s = os.lstat(os.path.join(root, f))
             if stat.S_ISLNK(s.st_mode):
-                os.unlink(os.path.join(root, f))
-
-    uuid = robust_put.upload(target_dir)
-    if merge:
-        cr1 = arvados.CollectionReader(source_collection)
-        cr2 = arvados.CollectionReader(uuid)
-        combined = arvados.CollectionReader(cr1.manifest_text() + cr2.manifest_text())
-        return combined
-    else:
-        return arvados.CollectionReader(uuid)
+                # 1. check if it is a link into a collection
+                real = os.path.split(os.path.realpath(os.path.join(root, f)))
+                (pdh, branch) = arvados.command.run.is_in_collection(real[0], real[1])
+                if pdh is not None:
+                    # 2. load collection
+                    if pdh not in collections:
+                        collections[pdh] = arvados.collection.CollectionReader(pdh,
+                                                                               api_client=outputcollection._my_api(),
+                                                                               keep_client=outputcollection._my_keep(),
+                                                                               num_retries=5)
+                    # 3. copy arvfile to new collection
+                    outputcollection.copy(branch, branch, source_collection=collections[pdh])
+
+            elif stat.S_ISREG(s.st_mode):
+                reldir = root[len(target_dir):]
+                with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
+                    with open(os.path.join(root, f), "rb") as reader:
+                        dat = reader.read(64*1024)
+                        while dat:
+                            writer.write(dat)
+                            dat = reader.read(64*1024)
+
+    return outputcollection.manifest_text()