Reorganized Volume code for easier comprehension.
[arvados.git] / crunch_scripts / hash
1 #!/usr/bin/env python                                                                                                                                                                            
2
3 import arvados
4 import hashlib
5 import os
6
7 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True)
8
9 this_job = arvados.current_job()
10 this_task = arvados.current_task()
11
12 if 'algorithm' in this_job['script_parameters']:
13     alg = this_job['script_parameters']['algorithm']
14 else:
15     alg = 'md5'
16 digestor = hashlib.new(alg)
17
18 input_file = arvados.get_task_param_mount('input')
19
20 with open(input_file) as f:
21     while True:
22         buf = f.read(2**20)
23         if len(buf) == 0:
24             break
25         digestor.update(buf)
26
27 hexdigest = digestor.hexdigest()
28
29 file_name = '/'.join(this_task['parameters']['input'].split('/')[1:])
30
31 out = arvados.CollectionWriter()
32 out.set_current_file_name("md5sum.txt")
33 out.write("%s %s\n" % (hexdigest, file_name))
34 this_task.set_output(out.finish())