#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# collection-merge
+#
+# Merge two or more collections together. Can also be used to extract specific
+# files from a collection to produce a new collection.
+#
+# input:
+# An array of collections or collection/file paths in script_parameter["input"]
+#
+# output:
+# A manifest with the collections merged. Duplicate file names will
+# have their contents concatenated in the order that they appear in the input
+# array.
import arvados
+import md5
+import crunchutil.subst as subst
+import subprocess
+import os
+import hashlib
-inputs = arvados.current_job()['script_parameters']['input']
-if not isinstance(inputs, (list,tuple)):
- inputs = [inputs]
+p = arvados.current_job()['script_parameters']
-out_manifest = ''
-for locator in inputs:
- out_manifest += arvados.CollectionReader(locator).manifest_text()
+merged = ""
+src = []
+for c in p["input"]:
+ c = subst.do_substitution(p, c)
+ i = c.find('/')
+ if i == -1:
+ src.append(c)
+ merged += arvados.CollectionReader(c).manifest_text()
+ else:
+ src.append(c[0:i])
+ cr = arvados.CollectionReader(c[0:i])
+ j = c.rfind('/')
+ stream = c[i+1:j]
+ if stream == "":
+ stream = "."
+ fn = c[(j+1):]
+ for s in cr.all_streams():
+ if s.name() == stream:
+ if fn in s.files():
+ merged += s.files()[fn].as_manifest()
-arvados.current_task().set_output(Keep.put(out_manifest))
+arvados.current_task().set_output(merged)