#!/usr/bin/env python import arvados import hashlib arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True) this_job = arvados.current_job() this_task = arvados.current_task() this_task_input = this_task['parameters']['input'] if 'algorithm' in this_job['script_parameters']: alg = this_job['script_parameters']['algorithm'] else: alg = 'md5' digestor = hashlib.new(alg) input_file = list(arvados.CollectionReader(this_task_input).all_files())[0] while True: buf = input_file.read(2**20) if len(buf) == 0: break digestor.update(buf) hexdigest = digestor.hexdigest() file_name = input_file.name() if input_file.stream_name() != '.': file_name = os.join(input_file.stream_name(), file_name) out = arvados.CollectionWriter() out.set_current_file_name("md5sum.txt") out.write("%s %s\n" % (hexdigest, file_name)) this_task.set_output(out.finish())