11843: Do not fail pipelines on transient API failures.
[arvados.git] / crunch_scripts / grep
index 908cea20482562618d8134ef6e15255769f65322..a84c0f671c86ef447b87374c9072b7da1226b506 100755 (executable)
@@ -1,21 +1,24 @@
 #!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 import arvados
-import string
+import re
 
 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
 
 this_job = arvados.current_job()
 this_task = arvados.current_task()
 this_task_input = this_task['parameters']['input']
-search_string = this_job['script_parameters']['pattern']
+pattern = re.compile(this_job['script_parameters']['pattern'])
 
 input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
 out = arvados.CollectionWriter()
 out.set_current_file_name(input_file.decompressed_name())
 out.set_current_stream_name(input_file.stream_name())
 for line in input_file.readlines():
-    if 0 <= string.find(line, search_string):
+    if pattern.search(line):
         out.write(line)
 
 this_task.set_output(out.finish())