From 6b4f035a93296d8d39c674fe4fdc181dadd98e78 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 24 Jun 2013 23:07:11 -0400 Subject: [PATCH] hash and grep examples for new reader/writer tools --- crunch_scripts/grep | 20 ++++++++++++++++++++ crunch_scripts/hash | 18 ++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) create mode 100644 crunch_scripts/grep diff --git a/crunch_scripts/grep b/crunch_scripts/grep new file mode 100644 index 0000000000..6392e0c9e7 --- /dev/null +++ b/crunch_scripts/grep @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import arvados + +arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True) + +this_job = arvados.current_job() +this_task = arvados.current_task() +this_task_input = this_task['parameters']['input'] +search_string = this_task['parameters']['pattern'] + +input_file = list(arvados.CollectionReader(this_task_input).all_files())[0] +out = arvados.CollectionWriter() +out.set_current_file_name(input_file.name()) +out.set_current_stream_name(input_file.stream_name()) +for line in input_file.readlines(): + if string.find(line, search_string): + out.write(line) + +this_task.set_output(out.finish()) diff --git a/crunch_scripts/hash b/crunch_scripts/hash index 6d6c88f64d..3c157163d1 100755 --- a/crunch_scripts/hash +++ b/crunch_scripts/hash @@ -2,7 +2,6 @@ import arvados import hashlib -import re arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True) @@ -11,22 +10,25 @@ this_task = arvados.current_task() this_task_input = this_task['parameters']['input'] if 'algorithm' in this_job['script_parameters']: - alg = this_job['script_parameters'] + alg = this_job['script_parameters']['algorithm'] else: alg = 'md5' digestor = hashlib.new(alg) -input_stream = arvados.DataReader(this_task_input) +input_file = list(arvados.CollectionReader(this_task_input).all_files())[0] while True: - buf = input_stream.read(2**20) + buf = input_file.read(2**20) if len(buf) == 0: break digestor.update(buf) hexdigest = digestor.hexdigest() -file_name = re.sub(r'^[^/]+/', '', this_task_input) -input_stream.close() - -this_task.set_output("%s %s\n" % (hexdigest, file_name)) +file_name = input_file.name() +if input_file.stream_name() != '.': + file_name = os.join(input_file.stream_name(), file_name) +out = arvados.CollectionWriter() +out.set_current_file_name("md5sum.txt") +out.write("%s %s\n" % (hexdigest, file_name)) +this_task.set_output(out.finish()) -- 2.39.5