Add regexp to recognize new error text.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import functools
9 import glob
10 import locale
11 import logging
12 import os
13 import pipes
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import time
19
20 def run_and_grep(cmd, read_output, *regexps,
21                  encoding=locale.getpreferredencoding(), **popen_kwargs):
22     """Run a subprocess and capture output lines matching regexps.
23
24     Arguments:
25     * cmd: The command to run, as a list or string, as for subprocess.Popen.
26     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
27     Remaining arguments are regexps to match output, as strings or compiled
28     regexp objects.  Output lines matching any regexp will be captured.
29
30     Keyword arguments:
31     * encoding: The encoding used to decode the subprocess output.
32     Remaining keyword arguments are passed directly to subprocess.Popen.
33
34     Returns 2-tuple (subprocess returncode, list of matched output lines).
35     """
36     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
37                for regexp in regexps]
38     popen_kwargs[read_output] = subprocess.PIPE
39     proc = subprocess.Popen(cmd, **popen_kwargs)
40     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
41         matched_lines = []
42         for line in output:
43             if any(regexp.search(line) for regexp in regexps):
44                 matched_lines.append(line)
45             if read_output == 'stderr':
46                 print(line, file=sys.stderr, end='')
47     return proc.wait(), matched_lines
48
49
50 class TimestampFile:
51     def __init__(self, path):
52         self.path = path
53         self.start_time = time.time()
54
55     def last_upload(self):
56         try:
57             return os.path.getmtime(self.path)
58         except EnvironmentError:
59             return -1
60
61     def update(self):
62         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
63         os.utime(self.path, (time.time(), self.start_time))
64
65
66 class PackageSuite:
67     NEED_SSH = False
68
69     def __init__(self, glob_root, rel_globs):
70         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
71         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
72         self.globs = [os.path.join(glob_root, rel_glob)
73                       for rel_glob in rel_globs]
74
75     def files_to_upload(self, since_timestamp):
76         for abs_glob in self.globs:
77             for path in glob.glob(abs_glob):
78                 if os.path.getmtime(path) >= since_timestamp:
79                     yield path
80
81     def upload_file(self, path):
82         raise NotImplementedError("PackageSuite.upload_file")
83
84     def upload_files(self, paths):
85         for path in paths:
86             self.logger.info("Uploading %s", path)
87             self.upload_file(path)
88
89     def post_uploads(self, paths):
90         pass
91
92     def update_packages(self, since_timestamp):
93         upload_paths = list(self.files_to_upload(since_timestamp))
94         if upload_paths:
95             self.upload_files(upload_paths)
96             self.post_uploads(upload_paths)
97
98
99 class PythonPackageSuite(PackageSuite):
100     LOGGER_PART = 'python'
101     REUPLOAD_REGEXPS = [
102         re.compile(
103             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
104         re.compile(
105             r'^error: Upload failed \(400\): File already exists\b'),
106     ]
107
108     def __init__(self, glob_root, rel_globs):
109         super().__init__(glob_root, rel_globs)
110         self.seen_packages = set()
111
112     def upload_file(self, path):
113         src_dir = os.path.dirname(os.path.dirname(path))
114         if src_dir in self.seen_packages:
115             return
116         self.seen_packages.add(src_dir)
117         # NOTE: If we ever start uploading Python 3 packages, we'll need to
118         # figure out some way to adapt cmd to match.  It might be easiest
119         # to give all our setup.py files the executable bit, and run that
120         # directly.
121         # We also must run `sdist` before `upload`: `upload` uploads any
122         # distributions previously generated in the command.  It doesn't
123         # know how to upload distributions already on disk.  We write the
124         # result to a dedicated directory to avoid interfering with our
125         # timestamp tracking.
126         cmd = ['python2.7', 'setup.py']
127         if not self.logger.isEnabledFor(logging.INFO):
128             cmd.append('--quiet')
129         cmd.extend(['sdist', '--dist-dir', '.upload_dist', 'upload'])
130         upload_returncode, repushed = run_and_grep(
131             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
132         if (upload_returncode != 0) and not repushed:
133             raise subprocess.CalledProcessError(upload_returncode, cmd)
134         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
135
136
137 class GemPackageSuite(PackageSuite):
138     LOGGER_PART = 'gems'
139     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
140
141     def upload_file(self, path):
142         cmd = ['gem', 'push', path]
143         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
144         if (push_returncode != 0) and not repushed:
145             raise subprocess.CalledProcessError(push_returncode, cmd)
146
147
148 class DistroPackageSuite(PackageSuite):
149     NEED_SSH = True
150     REMOTE_DEST_DIR = 'tmp'
151
152     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
153         super().__init__(glob_root, rel_globs)
154         self.target = target
155         self.ssh_host = ssh_host
156         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
157         if not self.logger.isEnabledFor(logging.INFO):
158             self.ssh_opts.append('-q')
159
160     def _build_cmd(self, base_cmd, *args):
161         cmd = [base_cmd]
162         cmd.extend(self.ssh_opts)
163         cmd.extend(args)
164         return cmd
165
166     def _paths_basenames(self, paths):
167         return (os.path.basename(path) for path in paths)
168
169     def _run_script(self, script, *args):
170         # SSH will use a shell to run our bash command, so we have to
171         # quote our arguments.
172         # self.__class__.__name__ provides $0 for the script, which makes a
173         # nicer message if there's an error.
174         subprocess.check_call(self._build_cmd(
175                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
176                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
177
178     def upload_files(self, paths):
179         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
180         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
181         subprocess.check_call(mkdir)
182         cmd = self._build_cmd('scp', *paths)
183         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
184         subprocess.check_call(cmd)
185
186
187 class DebianPackageSuite(DistroPackageSuite):
188     FREIGHT_SCRIPT = """
189 cd "$1"; shift
190 DISTNAME=$1; shift
191 freight add "$@" "apt/$DISTNAME"
192 freight cache "apt/$DISTNAME"
193 rm "$@"
194 """
195     TARGET_DISTNAMES = {
196         'debian8': 'jessie',
197         'debian9': 'stretch',
198         'ubuntu1204': 'precise',
199         'ubuntu1404': 'trusty',
200         'ubuntu1604': 'xenial',
201         }
202
203     def post_uploads(self, paths):
204         self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
205                          self.TARGET_DISTNAMES[self.target],
206                          *self._paths_basenames(paths))
207
208
209 class RedHatPackageSuite(DistroPackageSuite):
210     CREATEREPO_SCRIPT = """
211 cd "$1"; shift
212 REPODIR=$1; shift
213 rpmsign --addsign "$@" </dev/null
214 mv "$@" "$REPODIR"
215 createrepo "$REPODIR"
216 """
217     REPO_ROOT = '/var/www/rpm.arvados.org/'
218     TARGET_REPODIRS = {
219         'centos7': 'CentOS/7/os/x86_64/',
220         }
221
222     def post_uploads(self, paths):
223         repo_dir = os.path.join(self.REPO_ROOT,
224                                 self.TARGET_REPODIRS[self.target])
225         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
226                          repo_dir, *self._paths_basenames(paths))
227
228
229 def _define_suite(suite_class, *rel_globs, **kwargs):
230     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
231
232 PACKAGE_SUITES = {
233     'python': _define_suite(PythonPackageSuite,
234                             'sdk/pam/dist/*.tar.gz',
235                             'sdk/python/dist/*.tar.gz',
236                             'sdk/cwl/dist/*.tar.gz',
237                             'services/nodemanager/dist/*.tar.gz',
238                             'services/fuse/dist/*.tar.gz',
239                         ),
240     'gems': _define_suite(GemPackageSuite,
241                           'sdk/ruby/*.gem',
242                           'sdk/cli/*.gem',
243                           'services/login-sync/*.gem',
244                       ),
245     }
246 for target in ['debian8', 'debian9', 'ubuntu1204', 'ubuntu1404', 'ubuntu1604']:
247     PACKAGE_SUITES[target] = _define_suite(
248         DebianPackageSuite, os.path.join('packages', target, '*.deb'),
249         target=target)
250 for target in ['centos7']:
251     PACKAGE_SUITES[target] = _define_suite(
252         RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
253         target=target)
254
255 def parse_arguments(arguments):
256     parser = argparse.ArgumentParser(
257         prog="run_upload_packages.py",
258         description="Upload Arvados packages to various repositories")
259     parser.add_argument(
260         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
261         help="Arvados source directory with built packages to upload")
262     parser.add_argument(
263         '--ssh-host', '-H',
264         help="Host specification for distribution repository server")
265     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
266                          metavar='OPTION', help="Pass option to `ssh -o`")
267     parser.add_argument('--verbose', '-v', action='count', default=0,
268                         help="Log more information and subcommand output")
269     parser.add_argument(
270         'targets', nargs='*', default=['all'], metavar='target',
271         help="Upload packages to these targets (default all)\nAvailable targets: " +
272         ', '.join(sorted(PACKAGE_SUITES.keys())))
273     args = parser.parse_args(arguments)
274     if 'all' in args.targets:
275         args.targets = list(PACKAGE_SUITES.keys())
276
277     if args.workspace is None:
278         parser.error("workspace not set from command line or environment")
279     for target in args.targets:
280         try:
281             suite_class = PACKAGE_SUITES[target].func
282         except KeyError:
283             parser.error("unrecognized target {!r}".format(target))
284         if suite_class.NEED_SSH and (args.ssh_host is None):
285             parser.error(
286                 "--ssh-host must be specified to upload distribution packages")
287     return args
288
289 def setup_logger(stream_dest, args):
290     log_handler = logging.StreamHandler(stream_dest)
291     log_handler.setFormatter(logging.Formatter(
292             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
293             '%Y-%m-%d %H:%M:%S'))
294     logger = logging.getLogger('arvados-dev.upload')
295     logger.addHandler(log_handler)
296     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
297
298 def build_suite_and_upload(target, since_timestamp, args):
299     suite_def = PACKAGE_SUITES[target]
300     kwargs = {}
301     if suite_def.func.NEED_SSH:
302         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
303     suite = suite_def(args.workspace, **kwargs)
304     suite.update_packages(since_timestamp)
305
306 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
307     args = parse_arguments(arguments)
308     setup_logger(stderr, args)
309     for target in args.targets:
310         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
311                                              '.last_upload_%s' % target))
312         last_upload_ts = ts_file.last_upload()
313         build_suite_and_upload(target, last_upload_ts, args)
314         ts_file.update()
315
316 if __name__ == '__main__':
317     main(sys.argv[1:])