run_upload_packages: Improve names around run_and_grep.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 import argparse
4 import functools
5 import glob
6 import locale
7 import logging
8 import os
9 import pipes
10 import re
11 import shutil
12 import subprocess
13 import sys
14 import time
15
16 def run_and_grep(cmd, read_output, *regexps,
17                  encoding=locale.getpreferredencoding(), **popen_kwargs):
18     """Run a subprocess and capture output lines matching regexps.
19
20     Arguments:
21     * cmd: The command to run, as a list or string, as for subprocess.Popen.
22     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
23     Remaining arguments are regexps to match output, as strings or compiled
24     regexp objects.  Output lines matching any regexp will be captured.
25
26     Keyword arguments:
27     * encoding: The encoding used to decode the subprocess output.
28     Remaining keyword arguments are passed directly to subprocess.Popen.
29
30     Returns 2-tuple (subprocess returncode, list of matched output lines).
31     """
32     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
33                for regexp in regexps]
34     popen_kwargs[read_output] = subprocess.PIPE
35     proc = subprocess.Popen(cmd, **popen_kwargs)
36     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
37         matched_lines = [line for line in output
38                          if any(regexp.search(line) for regexp in regexps)]
39     return proc.wait(), matched_lines
40
41
42 class TimestampFile:
43     def __init__(self, path):
44         self.path = path
45         self.start_time = time.time()
46
47     def last_upload(self):
48         try:
49             return os.path.getmtime(self.path)
50         except EnvironmentError:
51             return -1
52
53     def update(self):
54         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
55         os.utime(self.path, (time.time(), self.start_time))
56
57
58 class PackageSuite:
59     NEED_SSH = False
60
61     def __init__(self, glob_root, rel_globs):
62         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
63         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
64         self.globs = [os.path.join(glob_root, rel_glob)
65                       for rel_glob in rel_globs]
66
67     def files_to_upload(self, since_timestamp):
68         for abs_glob in self.globs:
69             for path in glob.glob(abs_glob):
70                 if os.path.getmtime(path) >= since_timestamp:
71                     yield path
72
73     def upload_file(self, path):
74         raise NotImplementedError("PackageSuite.upload_file")
75
76     def upload_files(self, paths):
77         for path in paths:
78             self.logger.info("Uploading %s", path)
79             self.upload_file(path)
80
81     def post_uploads(self, paths):
82         pass
83
84     def update_packages(self, since_timestamp):
85         upload_paths = list(self.files_to_upload(since_timestamp))
86         if upload_paths:
87             self.upload_files(upload_paths)
88             self.post_uploads(upload_paths)
89
90
91 class PythonPackageSuite(PackageSuite):
92     LOGGER_PART = 'python'
93     REUPLOAD_REGEXP = re.compile(
94         r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b')
95
96     def __init__(self, glob_root, rel_globs):
97         super().__init__(glob_root, rel_globs)
98         self.seen_packages = set()
99
100     def upload_file(self, path):
101         src_dir = os.path.dirname(os.path.dirname(path))
102         if src_dir in self.seen_packages:
103             return
104         self.seen_packages.add(src_dir)
105         # NOTE: If we ever start uploading Python 3 packages, we'll need to
106         # figure out some way to adapt cmd to match.  It might be easiest
107         # to give all our setup.py files the executable bit, and run that
108         # directly.
109         # We also must run `sdist` before `upload`: `upload` uploads any
110         # distributions previously generated in the command.  It doesn't
111         # know how to upload distributions already on disk.  We write the
112         # result to a dedicated directory to avoid interfering with our
113         # timestamp tracking.
114         cmd = ['python2.7', 'setup.py']
115         if not self.logger.isEnabledFor(logging.INFO):
116             cmd.append('--quiet')
117         cmd.extend(['sdist', '--dist-dir', '.upload_dist', 'upload'])
118         upload_returncode, repushed = run_and_grep(
119             cmd, 'stderr', self.REUPLOAD_REGEXP, cwd=src_dir)
120         if (upload_returncode != 0) and not repushed:
121             raise subprocess.CalledProcessError(upload_returncode, cmd)
122         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
123
124
125 class GemPackageSuite(PackageSuite):
126     LOGGER_PART = 'gems'
127     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
128
129     def upload_file(self, path):
130         cmd = ['gem', 'push', path]
131         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
132         if (push_returncode != 0) and not repushed:
133             raise subprocess.CalledProcessError(push_returncode, cmd)
134
135
136 class DistroPackageSuite(PackageSuite):
137     NEED_SSH = True
138     REMOTE_DEST_DIR = 'tmp'
139
140     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
141         super().__init__(glob_root, rel_globs)
142         self.target = target
143         self.ssh_host = ssh_host
144         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
145         if not self.logger.isEnabledFor(logging.INFO):
146             self.ssh_opts.append('-q')
147
148     def _build_cmd(self, base_cmd, *args):
149         cmd = [base_cmd]
150         cmd.extend(self.ssh_opts)
151         cmd.extend(args)
152         return cmd
153
154     def _paths_basenames(self, paths):
155         return (os.path.basename(path) for path in paths)
156
157     def _run_script(self, script, *args):
158         # SSH will use a shell to run our bash command, so we have to
159         # quote our arguments.
160         # self.__class__.__name__ provides $0 for the script, which makes a
161         # nicer message if there's an error.
162         subprocess.check_call(self._build_cmd(
163                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
164                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
165
166     def upload_files(self, paths):
167         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
168         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
169         subprocess.check_call(mkdir)
170         cmd = self._build_cmd('scp', *paths)
171         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
172         subprocess.check_call(cmd)
173
174
175 class DebianPackageSuite(DistroPackageSuite):
176     FREIGHT_SCRIPT = """
177 cd "$1"; shift
178 DISTNAME=$1; shift
179 freight add "$@" "apt/$DISTNAME"
180 freight cache "apt/$DISTNAME"
181 rm "$@"
182 """
183     TARGET_DISTNAMES = {
184         'debian7': 'wheezy',
185         'debian8': 'jessie',
186         'ubuntu1204': 'precise',
187         'ubuntu1404': 'trusty',
188         }
189
190     def post_uploads(self, paths):
191         self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
192                          self.TARGET_DISTNAMES[self.target],
193                          *self._paths_basenames(paths))
194
195
196 class RedHatPackageSuite(DistroPackageSuite):
197     CREATEREPO_SCRIPT = """
198 cd "$1"; shift
199 REPODIR=$1; shift
200 rpmsign --addsign "$@" </dev/null
201 mv "$@" "$REPODIR"
202 createrepo "$REPODIR"
203 """
204     REPO_ROOT = '/var/www/rpm.arvados.org/'
205     TARGET_REPODIRS = {
206         'centos6': 'CentOS/6/os/x86_64/',
207         'centos7': 'CentOS/7/os/x86_64/',
208         }
209
210     def post_uploads(self, paths):
211         repo_dir = os.path.join(self.REPO_ROOT,
212                                 self.TARGET_REPODIRS[self.target])
213         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
214                          repo_dir, *self._paths_basenames(paths))
215
216
217 def _define_suite(suite_class, *rel_globs, **kwargs):
218     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
219
220 PACKAGE_SUITES = {
221     'python': _define_suite(PythonPackageSuite,
222                             'sdk/pam/dist/*.tar.gz',
223                             'sdk/python/dist/*.tar.gz',
224                             'sdk/cwl/dist/*.tar.gz',
225                             'services/nodemanager/dist/*.tar.gz',
226                             'services/fuse/dist/*.tar.gz',
227                         ),
228     'gems': _define_suite(GemPackageSuite,
229                           'sdk/ruby/*.gem',
230                           'sdk/cli/*.gem',
231                           'services/login-sync/*.gem',
232                       ),
233     }
234 for target in ['debian7', 'debian8', 'ubuntu1204', 'ubuntu1404']:
235     PACKAGE_SUITES[target] = _define_suite(
236         DebianPackageSuite, os.path.join('packages', target, '*.deb'),
237         target=target)
238 for target in ['centos6', 'centos7']:
239     PACKAGE_SUITES[target] = _define_suite(
240         RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
241         target=target)
242
243 def parse_arguments(arguments):
244     parser = argparse.ArgumentParser(
245         prog="run_upload_packages.py",
246         description="Upload Arvados packages to various repositories")
247     parser.add_argument(
248         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
249         help="Arvados source directory with built packages to upload")
250     parser.add_argument(
251         '--ssh-host', '-H',
252         help="Host specification for distribution repository server")
253     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
254                          metavar='OPTION', help="Pass option to `ssh -o`")
255     parser.add_argument('--verbose', '-v', action='count', default=0,
256                         help="Log more information and subcommand output")
257     parser.add_argument(
258         'targets', nargs='*', default=['all'], metavar='target',
259         help="Upload packages to these targets (default all)\nAvailable targets: " +
260         ', '.join(sorted(PACKAGE_SUITES.keys())))
261     args = parser.parse_args(arguments)
262     if 'all' in args.targets:
263         args.targets = list(PACKAGE_SUITES.keys())
264
265     if args.workspace is None:
266         parser.error("workspace not set from command line or environment")
267     for target in args.targets:
268         try:
269             suite_class = PACKAGE_SUITES[target].func
270         except KeyError:
271             parser.error("unrecognized target {!r}".format(target))
272         if suite_class.NEED_SSH and (args.ssh_host is None):
273             parser.error(
274                 "--ssh-host must be specified to upload distribution packages")
275     return args
276
277 def setup_logger(stream_dest, args):
278     log_handler = logging.StreamHandler(stream_dest)
279     log_handler.setFormatter(logging.Formatter(
280             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
281             '%Y-%m-%d %H:%M:%S'))
282     logger = logging.getLogger('arvados-dev.upload')
283     logger.addHandler(log_handler)
284     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
285
286 def build_suite_and_upload(target, since_timestamp, args):
287     suite_def = PACKAGE_SUITES[target]
288     kwargs = {}
289     if suite_def.func.NEED_SSH:
290         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
291     suite = suite_def(args.workspace, **kwargs)
292     suite.update_packages(since_timestamp)
293
294 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
295     args = parse_arguments(arguments)
296     setup_logger(stderr, args)
297     for target in args.targets:
298         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
299                                              '.last_upload_%s' % target))
300         last_upload_ts = ts_file.last_upload()
301         build_suite_and_upload(target, last_upload_ts, args)
302         ts_file.update()
303
304 if __name__ == '__main__':
305     main(sys.argv[1:])