#!/usr/bin/env python

# collection-merge
#
# Merge two or more collections together.  Can also be used to extract specific
# files from a collection to produce a new collection.
#
# input:
# An array of collections or collection/file paths in script_parameter["input"]
#
# output:
# A manifest with the collections merged.  Duplicate file names will
# have their contents concatenated in the order that they appear in the input
# array.

import arvados
import md5
import crunchutil.subst as subst
import subprocess
import os
import hashlib

p = arvados.current_job()['script_parameters']

merged = ""
src = []
for c in p["input"]:
    c = subst.do_substitution(p, c)
    i = c.find('/')
    if i == -1:
        src.append(c)
        merged += arvados.CollectionReader(c).manifest_text()
    else:
        src.append(c[0:i])
        cr = arvados.CollectionReader(c[0:i])
        j = c.rfind('/')
        stream = c[i+1:j]
        if stream == "":
            stream = "."
        fn = c[(j+1):]
        for s in cr.all_streams():
            if s.name() == stream:
                if fn in s.files():
                    merged += s.files()[fn].as_manifest()

arvados.current_task().set_output(merged)