Merge branch '2873-permission-links-ownership'
[arvados.git] / crunch_scripts / collection-merge
old mode 100644 (file)
new mode 100755 (executable)
index b377d01..f16d624
@@ -1,13 +1,57 @@
 #!/usr/bin/env python
 
 import arvados
+import md5
+import subst
+import subprocess
+import os
+import hashlib
 
-inputs = arvados.current_job()['script_parameters']['input']
-if not isinstance(inputs, (list,tuple)):
-    inputs = [inputs]
+p = arvados.current_job()['script_parameters']
 
-out_manifest = ''
-for locator in inputs:
-    out_manifest += arvados.CollectionReader(locator).manifest_text()
+merged = ""
+src = []
+for c in p["input"]:
+    c = subst.do_substitution(p, c)
+    i = c.find('/')
+    if i == -1:
+        src.append(c)
+        merged += arvados.CollectionReader(c).manifest_text()
+    else:
+        src.append(c[0:i])
+        cr = arvados.CollectionReader(c[0:i])
+        j = c.rfind('/')
+        stream = c[i+1:j]
+        if stream == "":
+            stream = "."
+        fn = c[(j+1):]
+        for s in cr.all_streams():
+            if s.name() == stream:
+                if fn in s.files():
+                    merged += s.files()[fn].as_manifest()
 
-arvados.current_task().set_output(Keep.put(out_manifest))
+crm = arvados.CollectionReader(merged)
+
+combined = crm.manifest_text(strip=True)
+
+m = hashlib.new('md5')
+m.update(combined)
+
+uuid = "{}+{}".format(m.hexdigest(), len(combined))
+
+collection = arvados.api().collections().create(
+    body={
+        'uuid': uuid,
+        'manifest_text': crm.manifest_text(),
+    }).execute()
+
+for s in src:
+    l = arvados.api().links().create(body={
+        "link": {
+            "tail_uuid": s,
+            "head_uuid": uuid,
+            "link_class": "provenance",
+            "name": "provided"
+        }}).execute()
+
+arvados.current_task().set_output(uuid)