#!/usr/bin/env python # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # collection-merge # # Merge two or more collections together. Can also be used to extract specific # files from a collection to produce a new collection. # # input: # An array of collections or collection/file paths in script_parameter["input"] # # output: # A manifest with the collections merged. Duplicate file names will # have their contents concatenated in the order that they appear in the input # array. import arvados import md5 import crunchutil.subst as subst import subprocess import os import hashlib p = arvados.current_job()['script_parameters'] merged = "" src = [] for c in p["input"]: c = subst.do_substitution(p, c) i = c.find('/') if i == -1: src.append(c) merged += arvados.CollectionReader(c).manifest_text() else: src.append(c[0:i]) cr = arvados.CollectionReader(c[0:i]) j = c.rfind('/') stream = c[i+1:j] if stream == "": stream = "." fn = c[(j+1):] for s in cr.all_streams(): if s.name() == stream: if fn in s.files(): merged += s.files()[fn].as_manifest() arvados.current_task().set_output(merged)