X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/26174557d40f4643b8c506c0cdfa66503b18544e..1479cedd5fb5504058e9cb1f4664474a5335d64b:/crunch_scripts/grep diff --git a/crunch_scripts/grep b/crunch_scripts/grep index 908cea2048..a84c0f671c 100755 --- a/crunch_scripts/grep +++ b/crunch_scripts/grep @@ -1,21 +1,24 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import arvados -import string +import re arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True) this_job = arvados.current_job() this_task = arvados.current_task() this_task_input = this_task['parameters']['input'] -search_string = this_job['script_parameters']['pattern'] +pattern = re.compile(this_job['script_parameters']['pattern']) input_file = list(arvados.CollectionReader(this_task_input).all_files())[0] out = arvados.CollectionWriter() out.set_current_file_name(input_file.decompressed_name()) out.set_current_stream_name(input_file.stream_name()) for line in input_file.readlines(): - if 0 <= string.find(line, search_string): + if pattern.search(line): out.write(line) this_task.set_output(out.finish())