hash and grep examples for new reader/writer tools
[arvados.git] / crunch_scripts / hash
1 #!/usr/bin/env python
2
3 import arvados
4 import hashlib
5
6 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
7
8 this_job = arvados.current_job()
9 this_task = arvados.current_task()
10 this_task_input = this_task['parameters']['input']
11
12 if 'algorithm' in this_job['script_parameters']:
13     alg = this_job['script_parameters']['algorithm']
14 else:
15     alg = 'md5'
16 digestor = hashlib.new(alg)
17
18 input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
19 while True:
20     buf = input_file.read(2**20)
21     if len(buf) == 0:
22         break
23     digestor.update(buf)
24
25 hexdigest = digestor.hexdigest()
26
27 file_name = input_file.name()
28 if input_file.stream_name() != '.':
29     file_name = os.join(input_file.stream_name(), file_name)
30
31 out = arvados.CollectionWriter()
32 out.set_current_file_name("md5sum.txt")
33 out.write("%s %s\n" % (hexdigest, file_name))
34 this_task.set_output(out.finish())