5087: API server uses a crunch-job with bugfix.
[arvados.git] / crunch_scripts / grep
1 #!/usr/bin/env python
2
3 import arvados
4 import re
5
6 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
7
8 this_job = arvados.current_job()
9 this_task = arvados.current_task()
10 this_task_input = this_task['parameters']['input']
11 pattern = re.compile(this_job['script_parameters']['pattern'])
12
13 input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
14 out = arvados.CollectionWriter()
15 out.set_current_file_name(input_file.decompressed_name())
16 out.set_current_stream_name(input_file.stream_name())
17 for line in input_file.readlines():
18     if pattern.search(line):
19         out.write(line)
20
21 this_task.set_output(out.finish())