-#!/usr/bin/env python
+#!/usr/bin/env python
import arvados
import hashlib
-import re
+import os
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
+arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True)
this_job = arvados.current_job()
this_task = arvados.current_task()
-this_task_input = this_task['parameters']['input']
if 'algorithm' in this_job['script_parameters']:
- alg = this_job['script_parameters']
+ alg = this_job['script_parameters']['algorithm']
else:
alg = 'md5'
digestor = hashlib.new(alg)
-input_stream = arvados.DataReader(this_task_input)
-while True:
- buf = input_stream.read(2**20)
- if len(buf) == 0:
- break
- digestor.update(buf)
+input_file = arvados.get_task_param_mount('input')
-hexdigest = digestor.hexdigest()
-file_name = re.sub(r'^[^/]+/', '', this_task_input)
+with open(input_file) as f:
+ while True:
+ buf = f.read(2**20)
+ if len(buf) == 0:
+ break
+ digestor.update(buf)
-input_stream.close()
+hexdigest = digestor.hexdigest()
-this_task.set_output("%s %s\n" % (hexdigest, file_name))
+file_name = '/'.join(this_task['parameters']['input'].split('/')[1:])
+out = arvados.CollectionWriter()
+out.set_current_file_name("md5sum.txt")
+out.write("%s %s\n" % (hexdigest, file_name))
+this_task.set_output(out.finish())