Merge branch 'master' into 2659-anonymous-server-side
[arvados.git] / crunch_scripts / collection-merge
1 #!/usr/bin/env python
2
3 import arvados
4 import md5
5 import subst
6 import subprocess
7 import os
8 import hashlib
9
10 p = arvados.current_job()['script_parameters']
11
12 merged = ""
13 src = []
14 for c in p["input"]:
15     c = subst.do_substitution(p, c)
16     i = c.find('/')
17     if i == -1:
18         src.append(c)
19         merged += arvados.CollectionReader(c).manifest_text()
20     else:
21         src.append(c[0:i])
22         cr = arvados.CollectionReader(c[0:i])
23         j = c.rfind('/')
24         stream = c[i+1:j]
25         if stream == "":
26             stream = "."
27         fn = c[(j+1):]
28         for s in cr.all_streams():
29             if s.name() == stream:
30                 if fn in s.files():
31                     merged += s.files()[fn].as_manifest()
32
33 crm = arvados.CollectionReader(merged)
34
35 combined = crm.manifest_text(strip=True)
36
37 m = hashlib.new('md5')
38 m.update(combined)
39
40 uuid = "{}+{}".format(m.hexdigest(), len(combined))
41
42 collection = arvados.api().collections().create(
43     body={
44         'uuid': uuid,
45         'manifest_text': crm.manifest_text(),
46     }).execute()
47
48 for s in src:
49     l = arvados.api().links().create(body={
50         "link": {
51             "tail_uuid": s,
52             "head_uuid": uuid,
53             "link_class": "provenance",
54             "name": "provided"
55         }}).execute()
56
57 arvados.current_task().set_output(uuid)