6218: add performance profiling and a sample test in python sdk.
[arvados.git] / crunch_scripts / pyrtg.py
1 import arvados
2 import re
3 import os
4 import sys
5
6 rtg_install_path = None
7
8 def setup():
9     global rtg_install_path
10     if rtg_install_path:
11         return rtg_install_path
12     rtg_path = arvados.util.zipball_extract(
13         zipball = arvados.current_job()['script_parameters']['rtg_binary_zip'],
14         path = 'rtg')
15     rtg_license_path = arvados.util.collection_extract(
16         collection = arvados.current_job()['script_parameters']['rtg_license'],
17         path = 'license',
18         decompress = False)
19
20     # symlink to rtg-license.txt
21     license_txt_path = os.path.join(rtg_license_path, 'rtg-license.txt')
22     try:
23         os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
24     except OSError:
25         if not os.path.exists(os.path.join(rtg_path,'rtg-license.txt')):
26             os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
27
28     rtg_install_path = rtg_path
29     return rtg_path
30
31 def run_rtg(command, output_dir, command_args, **kwargs):
32     global rtg_install_path
33     execargs = [os.path.join(rtg_install_path, 'rtg'),
34                 command,
35                 '-o', output_dir]
36     execargs += command_args
37     sys.stderr.write("run_rtg: exec %s\n" % str(execargs))
38     arvados.util.run_command(
39         execargs,
40         cwd=arvados.current_task().tmpdir,
41         stderr=sys.stderr,
42         stdout=sys.stderr)
43
44     # Exit status cannot be trusted in rtg 1.1.1.
45     assert_done(output_dir)
46
47     # Copy log files to stderr and delete them to avoid storing them
48     # in Keep with the output data.
49     for dirent in arvados.util.listdir_recursive(output_dir):
50         if is_log_file(dirent):
51             log_file = os.path.join(output_dir, dirent)
52             sys.stderr.write(' '.join(['==>', dirent, '<==\n']))
53             with open(log_file, 'rb') as f:
54                 while True:
55                     buf = f.read(2**20)
56                     if len(buf) == 0:
57                         break
58                     sys.stderr.write(buf)
59             sys.stderr.write('\n') # in case log does not end in newline
60             os.unlink(log_file)
61
62 def assert_done(output_dir):
63     # Sanity-check exit code.
64     done_file = os.path.join(output_dir, 'done')
65     if not os.path.exists(done_file):
66         raise Exception("rtg exited 0 but %s does not exist. abort.\n" % done_file)
67
68 def is_log_file(filename):
69     return re.search(r'^(.*/)?(progress|done|\S+.log)$', filename)
70
71 setup()