import os
import robust_put
import stat
+import arvados.command.run
# Implements "Virtual Working Directory"
# Provides a way of emulating a shared writable directory in Keep based
for f in files:
os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
+def is_collection(fn):
+ if os.path.exists
+
# Delete all symlinks and check in any remaining normal files.
# If merge == True, merge the manifest with source_collection and return a
# CollectionReader for the combined collection.
-def checkin(source_collection, target_dir, merge=True):
+def checkin(target_dir):
# delete symlinks, commit directory, merge manifests and return combined
# collection.
+
+ outputcollection = arvados.collection.Collection(num_retries=5)
+
+ if target_dir[-1:] != '/':
+ target_dir += '/'
+
+ collections = {}
+
for root, dirs, files in os.walk(target_dir):
for f in files:
s = os.lstat(os.path.join(root, f))
if stat.S_ISLNK(s.st_mode):
- os.unlink(os.path.join(root, f))
-
- uuid = robust_put.upload(target_dir)
- if merge:
- cr1 = arvados.CollectionReader(source_collection)
- cr2 = arvados.CollectionReader(uuid)
- combined = arvados.CollectionReader(cr1.manifest_text() + cr2.manifest_text())
- return combined
- else:
- return arvados.CollectionReader(uuid)
+ # 1. check if it is a link into a collection
+ real = os.path.split(os.path.realpath(os.path.join(root, f)))
+ (pdh, branch) = arvados.command.run.is_in_collection(real[0], real[1])
+ if pdh is not None:
+ # 2. load collection
+ if pdh not in collections:
+ collections[pdh] = arvados.collection.CollectionReader(pdh,
+ api_client=outputcollection._my_api(),
+ keep_client=outputcollection._my_keep(),
+ num_retries=5)
+ # 3. copy arvfile to new collection
+ outputcollection.copy(branch, branch, source_collection=collections[pdh])
+
+ elif stat.S_ISREG(s.st_mode):
+ reldir = root[len(target_dir):]
+ with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
+ with open(os.path.join(root, f), "rb") as reader:
+ dat = reader.read(64*1024)
+ while dat:
+ writer.write(dat)
+ dat = reader.read(64*1024)
+
+ return outputcollection.manifest_text()