X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6c3e6273fa29b8004c17a7b619b3818f8e8bd84e..d25db7c02aa07e9d4812a029753c2b8606cf35b1:/crunch_scripts/hash diff --git a/crunch_scripts/hash b/crunch_scripts/hash index 6d6c88f64d..cdd5ebaff2 100755 --- a/crunch_scripts/hash +++ b/crunch_scripts/hash @@ -1,32 +1,34 @@ -#!/usr/bin/env python +#!/usr/bin/env python import arvados import hashlib -import re +import os -arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True) +arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True) this_job = arvados.current_job() this_task = arvados.current_task() -this_task_input = this_task['parameters']['input'] if 'algorithm' in this_job['script_parameters']: - alg = this_job['script_parameters'] + alg = this_job['script_parameters']['algorithm'] else: alg = 'md5' digestor = hashlib.new(alg) -input_stream = arvados.DataReader(this_task_input) -while True: - buf = input_stream.read(2**20) - if len(buf) == 0: - break - digestor.update(buf) +input_file = arvados.get_task_param_mount('input') -hexdigest = digestor.hexdigest() -file_name = re.sub(r'^[^/]+/', '', this_task_input) +with open(input_file) as f: + while True: + buf = f.read(2**20) + if len(buf) == 0: + break + digestor.update(buf) -input_stream.close() +hexdigest = digestor.hexdigest() -this_task.set_output("%s %s\n" % (hexdigest, file_name)) +file_name = '/'.join(this_task['parameters']['input'].split('/')[1:]) +out = arvados.CollectionWriter() +out.set_current_file_name("md5sum.txt") +out.write("%s %s\n" % (hexdigest, file_name)) +this_task.set_output(out.finish())