6 # Decompress all compressed files in the collection using the "dtrx" tool and
7 # produce a new collection with the contents. Uncompressed files
11 # A collection at script_parameters["input"]
14 # A manifest of the uncompressed contents of the input collection.
21 import crunchutil.robust_put as robust_put
23 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
26 task = arvados.current_task()
28 input_file = task['parameters']['input']
30 infile_parts = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)?(/[^/]+)$", input_file)
32 outdir = os.path.join(task.tmpdir, "output")
36 if infile_parts is None:
37 print >>sys.stderr, "Failed to parse input filename '%s' as a Keep file\n" % input_file
40 cr = arvados.CollectionReader(infile_parts.group(1))
41 streamname = infile_parts.group(3)[1:]
42 filename = infile_parts.group(4)[1:]
44 if streamname is not None:
45 subprocess.call(["mkdir", "-p", streamname])
50 m = re.match(r'.*\.(gz|Z|bz2|tgz|tbz|zip|rar|7z|cab|deb|rpm|cpio|gem)$', arvados.get_task_param_mount('input'), re.IGNORECASE)
53 rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
55 task.set_output(robust_put.upload(outdir))
59 streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
60 filereader = streamreader.files()[filename]
61 task.set_output(streamname + filereader.as_manifest()[1:])