6 # Decompress all compressed files in the collection using the "dtrx" tool and
7 # produce a new collection with the contents. Uncompressed files
11 # A collection at script_parameters["input"]
14 # A manifest of the uncompressed contents of the input collection.
21 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
24 task = arvados.current_task()
26 input_file = task['parameters']['input']
28 result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/[^/]+)$", input_file)
30 outdir = os.path.join(task.tmpdir, "output")
35 cr = arvados.CollectionReader(result.group(1))
36 streamname = result.group(3)[1:]
37 filename = result.group(4)[1:]
39 subprocess.call(["mkdir", "-p", streamname])
41 streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
42 filereader = streamreader.files()[filename]
43 rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
45 out = arvados.CollectionWriter()
46 out.write_directory_tree(outdir, max_manifest_depth=0)
47 task.set_output(out.finish())
49 task.set_output(streamname + filereader.as_manifest()[1:])