13924: Add index for PDH lookups.
[arvados.git] / crunch_scripts / pyrtg.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import arvados
6 import re
7 import os
8 import sys
9
10 rtg_install_path = None
11
12 def setup():
13     global rtg_install_path
14     if rtg_install_path:
15         return rtg_install_path
16     rtg_path = arvados.util.zipball_extract(
17         zipball = arvados.current_job()['script_parameters']['rtg_binary_zip'],
18         path = 'rtg')
19     rtg_license_path = arvados.util.collection_extract(
20         collection = arvados.current_job()['script_parameters']['rtg_license'],
21         path = 'license',
22         decompress = False)
23
24     # symlink to rtg-license.txt
25     license_txt_path = os.path.join(rtg_license_path, 'rtg-license.txt')
26     try:
27         os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
28     except OSError:
29         if not os.path.exists(os.path.join(rtg_path,'rtg-license.txt')):
30             os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
31
32     rtg_install_path = rtg_path
33     return rtg_path
34
35 def run_rtg(command, output_dir, command_args, **kwargs):
36     global rtg_install_path
37     execargs = [os.path.join(rtg_install_path, 'rtg'),
38                 command,
39                 '-o', output_dir]
40     execargs += command_args
41     sys.stderr.write("run_rtg: exec %s\n" % str(execargs))
42     arvados.util.run_command(
43         execargs,
44         cwd=arvados.current_task().tmpdir,
45         stderr=sys.stderr,
46         stdout=sys.stderr)
47
48     # Exit status cannot be trusted in rtg 1.1.1.
49     assert_done(output_dir)
50
51     # Copy log files to stderr and delete them to avoid storing them
52     # in Keep with the output data.
53     for dirent in arvados.util.listdir_recursive(output_dir):
54         if is_log_file(dirent):
55             log_file = os.path.join(output_dir, dirent)
56             sys.stderr.write(' '.join(['==>', dirent, '<==\n']))
57             with open(log_file, 'rb') as f:
58                 while True:
59                     buf = f.read(2**20)
60                     if len(buf) == 0:
61                         break
62                     sys.stderr.write(buf)
63             sys.stderr.write('\n') # in case log does not end in newline
64             os.unlink(log_file)
65
66 def assert_done(output_dir):
67     # Sanity-check exit code.
68     done_file = os.path.join(output_dir, 'done')
69     if not os.path.exists(done_file):
70         raise Exception("rtg exited 0 but %s does not exist. abort.\n" % done_file)
71
72 def is_log_file(filename):
73     return re.search(r'^(.*/)?(progress|done|\S+.log)$', filename)
74
75 setup()