#!/usr/bin/env python import arvados import md5 import subst import subprocess import os import hashlib p = arvados.current_job()['script_parameters'] merged = "" src = [] for c in p["input"]: c = subst.do_substitution(p, c) i = c.find('/') if i == -1: src.append(c) merged += arvados.CollectionReader(c).manifest_text() else: src.append(c[0:i]) cr = arvados.CollectionReader(c[0:i]) j = c.rfind('/') stream = c[i+1:j] if stream == "": stream = "." fn = c[(j+1):] for s in cr.all_streams(): if s.name() == stream: if fn in s.files(): merged += s.files()[fn].as_manifest() crm = arvados.CollectionReader(merged) combined = crm.manifest_text(strip=True) m = hashlib.new('md5') m.update(combined) uuid = "{}+{}".format(m.hexdigest(), len(combined)) collection = arvados.api().collections().create( body={ 'uuid': uuid, 'manifest_text': crm.manifest_text(), }).execute() for s in src: l = arvados.api().links().create(body={ "link": { "tail_uuid": s, "head_uuid": uuid, "link_class": "provenance", "name": "provided" }}).execute() arvados.current_task().set_output(uuid)