Make the run_upload_packages.py script more verbose.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import functools
9 import glob
10 import locale
11 import logging
12 import os
13 import pipes
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import time
19
20 def run_and_grep(cmd, read_output, *regexps,
21                  encoding=locale.getpreferredencoding(), **popen_kwargs):
22     """Run a subprocess and capture output lines matching regexps.
23
24     Arguments:
25     * cmd: The command to run, as a list or string, as for subprocess.Popen.
26     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
27     Remaining arguments are regexps to match output, as strings or compiled
28     regexp objects.  Output lines matching any regexp will be captured.
29
30     Keyword arguments:
31     * encoding: The encoding used to decode the subprocess output.
32     Remaining keyword arguments are passed directly to subprocess.Popen.
33
34     Returns 2-tuple (subprocess returncode, list of matched output lines).
35     """
36     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
37                for regexp in regexps]
38     popen_kwargs[read_output] = subprocess.PIPE
39     proc = subprocess.Popen(cmd, **popen_kwargs)
40     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
41         matched_lines = []
42         for line in output:
43             if any(regexp.search(line) for regexp in regexps):
44                 matched_lines.append(line)
45             if read_output == 'stderr':
46                 print(line, file=sys.stderr, end='')
47     return proc.wait(), matched_lines
48
49
50 class TimestampFile:
51     def __init__(self, path):
52         self.path = path
53         self.start_time = time.time()
54
55     def last_upload(self):
56         try:
57             return os.path.getmtime(self.path)
58         except EnvironmentError:
59             return -1
60
61     def update(self):
62         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
63         os.utime(self.path, (time.time(), self.start_time))
64
65
66 class PackageSuite:
67     NEED_SSH = False
68
69     def __init__(self, glob_root, rel_globs):
70         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
71         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
72         self.globs = [os.path.join(glob_root, rel_glob)
73                       for rel_glob in rel_globs]
74
75     def files_to_upload(self, since_timestamp):
76         for abs_glob in self.globs:
77             for path in glob.glob(abs_glob):
78                 if os.path.getmtime(path) >= since_timestamp:
79                     yield path
80
81     def upload_file(self, path):
82         raise NotImplementedError("PackageSuite.upload_file")
83
84     def upload_files(self, paths):
85         for path in paths:
86             self.logger.info("Uploading %s", path)
87             self.upload_file(path)
88
89     def post_uploads(self, paths):
90         pass
91
92     def update_packages(self, since_timestamp):
93         upload_paths = list(self.files_to_upload(since_timestamp))
94         if upload_paths:
95             self.upload_files(upload_paths)
96             self.post_uploads(upload_paths)
97
98
99 class PythonPackageSuite(PackageSuite):
100     LOGGER_PART = 'python'
101     REUPLOAD_REGEXPS = [
102         re.compile(
103             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
104         re.compile(
105             r'^error: Upload failed \(400\): File already exists\b'),
106         re.compile(
107             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
108     ]
109
110     def __init__(self, glob_root, rel_globs):
111         super().__init__(glob_root, rel_globs)
112         self.seen_packages = set()
113
114     def upload_file(self, path):
115         src_dir = os.path.dirname(os.path.dirname(path))
116         if src_dir in self.seen_packages:
117             return
118         self.seen_packages.add(src_dir)
119         # We also must run `sdist` before `upload`: `upload` uploads any
120         # distributions previously generated in the command.  It doesn't
121         # know how to upload distributions already on disk.  We write the
122         # result to a dedicated directory to avoid interfering with our
123         # timestamp tracking.
124         cmd = ['python3', 'setup.py']
125         if not self.logger.isEnabledFor(logging.INFO):
126             cmd.append('--quiet')
127         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
128         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
129         cmd.extend(['upload'])
130         upload_returncode, repushed = run_and_grep(
131             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
132         if (upload_returncode != 0) and not repushed:
133             raise subprocess.CalledProcessError(upload_returncode, cmd)
134         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
135
136
137 class GemPackageSuite(PackageSuite):
138     LOGGER_PART = 'gems'
139     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
140
141     def upload_file(self, path):
142         cmd = ['gem', 'push', path]
143         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
144         if (push_returncode != 0) and not repushed:
145             raise subprocess.CalledProcessError(push_returncode, cmd)
146
147
148 class DistroPackageSuite(PackageSuite):
149     NEED_SSH = True
150     REMOTE_DEST_DIR = 'tmp'
151
152     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
153         super().__init__(glob_root, rel_globs)
154         self.target = target
155         self.ssh_host = ssh_host
156         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
157         if not self.logger.isEnabledFor(logging.INFO):
158             self.ssh_opts.append('-q')
159
160     def _build_cmd(self, base_cmd, *args):
161         cmd = [base_cmd]
162         cmd.extend(self.ssh_opts)
163         cmd.extend(args)
164         return cmd
165
166     def _paths_basenames(self, paths):
167         return (os.path.basename(path) for path in paths)
168
169     def _run_script(self, script, *args):
170         # SSH will use a shell to run our bash command, so we have to
171         # quote our arguments.
172         # self.__class__.__name__ provides $0 for the script, which makes a
173         # nicer message if there's an error.
174         subprocess.check_call(self._build_cmd(
175                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
176                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
177
178     def upload_files(self, paths):
179         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
180         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
181         subprocess.check_call(mkdir)
182         cmd = self._build_cmd('scp', *paths)
183         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
184         subprocess.check_call(cmd)
185
186
187 class DebianPackageSuite(DistroPackageSuite):
188     APT_SCRIPT = """
189 set -x
190 cd "$1"; shift
191 DISTNAME=$1; shift
192 set +e
193 aptly repo search "$DISTNAME" "${@%.deb}" >/dev/null 2>&1
194 RET=$?
195 set -e
196 if [[ $RET -eq 0 ]]; then
197   echo "Not adding $@, it is already present in repo $DISTNAME"
198   rm "$@"
199 else
200   aptly repo add -remove-files "$DISTNAME" "$@"
201   aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
202 fi
203 """
204
205     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
206         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
207         self.TARGET_DISTNAMES = {
208             'debian8': 'jessie-'+repo,
209             'debian9': 'stretch-'+repo,
210             'debian10': 'buster-'+repo,
211             'ubuntu1404': 'trusty-'+repo,
212             'ubuntu1604': 'xenial-'+repo,
213             'ubuntu1804': 'bionic-'+repo,
214             'ubuntu2004': 'focal-'+repo,
215             }
216
217     def post_uploads(self, paths):
218         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
219                          self.TARGET_DISTNAMES[self.target],
220                          *self._paths_basenames(paths))
221
222
223 class RedHatPackageSuite(DistroPackageSuite):
224     CREATEREPO_SCRIPT = """
225 cd "$1"; shift
226 REPODIR=$1; shift
227 rpmsign --addsign "$@" </dev/null
228 mv "$@" "$REPODIR"
229 createrepo "$REPODIR"
230 """
231     REPO_ROOT = '/var/www/rpm.arvados.org/'
232
233     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
234         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
235         self.TARGET_REPODIRS = {
236             'centos7': 'CentOS/7/%s/x86_64/' % repo,
237         }
238
239     def post_uploads(self, paths):
240         repo_dir = os.path.join(self.REPO_ROOT,
241                                 self.TARGET_REPODIRS[self.target])
242         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
243                          repo_dir, *self._paths_basenames(paths))
244
245
246 def _define_suite(suite_class, *rel_globs, **kwargs):
247     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
248
249 PACKAGE_SUITES = {
250     'python': _define_suite(PythonPackageSuite,
251                             'sdk/python/dist/*.tar.gz',
252                             'sdk/cwl/dist/*.tar.gz',
253                             'services/fuse/dist/*.tar.gz',
254                         ),
255     'gems': _define_suite(GemPackageSuite,
256                           'sdk/ruby/*.gem',
257                           'sdk/cli/*.gem',
258                           'services/login-sync/*.gem',
259                       ),
260     }
261
262 def parse_arguments(arguments):
263     parser = argparse.ArgumentParser(
264         description="Upload Arvados packages to various repositories")
265     parser.add_argument(
266         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
267         help="Arvados source directory with built packages to upload")
268     parser.add_argument(
269         '--ssh-host', '-H',
270         help="Host specification for distribution repository server")
271     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
272                          metavar='OPTION', help="Pass option to `ssh -o`")
273     parser.add_argument('--verbose', '-v', action='count', default=0,
274                         help="Log more information and subcommand output")
275     parser.add_argument(
276         '--repo', choices=['dev', 'testing'],
277         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
278
279     parser.add_argument(
280         'targets', nargs='*', default=['all'], metavar='target',
281         help="Upload packages to these targets (default all)\nAvailable targets: " +
282         ', '.join(sorted(PACKAGE_SUITES.keys())))
283     args = parser.parse_args(arguments)
284     if 'all' in args.targets:
285         args.targets = list(PACKAGE_SUITES.keys())
286
287     if args.workspace is None:
288         parser.error("workspace not set from command line or environment")
289
290     for target in ['debian8', 'debian9', 'debian10', 'ubuntu1404', 'ubuntu1604', 'ubuntu1804', 'ubuntu2004']:
291         PACKAGE_SUITES[target] = _define_suite(
292             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
293             target=target, repo=args.repo)
294     for target in ['centos7']:
295         PACKAGE_SUITES[target] = _define_suite(
296             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
297             target=target, repo=args.repo)
298
299     for target in args.targets:
300         try:
301             suite_class = PACKAGE_SUITES[target].func
302         except KeyError:
303             parser.error("unrecognized target {!r}".format(target))
304         if suite_class.NEED_SSH and (args.ssh_host is None):
305             parser.error(
306                 "--ssh-host must be specified to upload distribution packages")
307     return args
308
309 def setup_logger(stream_dest, args):
310     log_handler = logging.StreamHandler(stream_dest)
311     log_handler.setFormatter(logging.Formatter(
312             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
313             '%Y-%m-%d %H:%M:%S'))
314     logger = logging.getLogger('arvados-dev.upload')
315     logger.addHandler(log_handler)
316     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
317
318 def build_suite_and_upload(target, since_timestamp, args):
319     suite_def = PACKAGE_SUITES[target]
320     kwargs = {}
321     if suite_def.func.NEED_SSH:
322         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
323     suite = suite_def(args.workspace, **kwargs)
324     suite.update_packages(since_timestamp)
325
326 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
327     args = parse_arguments(arguments)
328     setup_logger(stderr, args)
329
330     for target in args.targets:
331         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
332                                              '.last_upload_%s' % target))
333         last_upload_ts = ts_file.last_upload()
334         build_suite_and_upload(target, last_upload_ts, args)
335         ts_file.update()
336
337 if __name__ == '__main__':
338     main(sys.argv[1:])