We use Python3 now, exclusively.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import functools
9 import glob
10 import locale
11 import logging
12 import os
13 import pipes
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import time
19
20 def run_and_grep(cmd, read_output, *regexps,
21                  encoding=locale.getpreferredencoding(), **popen_kwargs):
22     """Run a subprocess and capture output lines matching regexps.
23
24     Arguments:
25     * cmd: The command to run, as a list or string, as for subprocess.Popen.
26     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
27     Remaining arguments are regexps to match output, as strings or compiled
28     regexp objects.  Output lines matching any regexp will be captured.
29
30     Keyword arguments:
31     * encoding: The encoding used to decode the subprocess output.
32     Remaining keyword arguments are passed directly to subprocess.Popen.
33
34     Returns 2-tuple (subprocess returncode, list of matched output lines).
35     """
36     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
37                for regexp in regexps]
38     popen_kwargs[read_output] = subprocess.PIPE
39     proc = subprocess.Popen(cmd, **popen_kwargs)
40     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
41         matched_lines = []
42         for line in output:
43             if any(regexp.search(line) for regexp in regexps):
44                 matched_lines.append(line)
45             if read_output == 'stderr':
46                 print(line, file=sys.stderr, end='')
47     return proc.wait(), matched_lines
48
49
50 class TimestampFile:
51     def __init__(self, path):
52         self.path = path
53         self.start_time = time.time()
54
55     def last_upload(self):
56         try:
57             return os.path.getmtime(self.path)
58         except EnvironmentError:
59             return -1
60
61     def update(self):
62         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
63         os.utime(self.path, (time.time(), self.start_time))
64
65
66 class PackageSuite:
67     NEED_SSH = False
68
69     def __init__(self, glob_root, rel_globs):
70         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
71         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
72         self.globs = [os.path.join(glob_root, rel_glob)
73                       for rel_glob in rel_globs]
74
75     def files_to_upload(self, since_timestamp):
76         for abs_glob in self.globs:
77             for path in glob.glob(abs_glob):
78                 if os.path.getmtime(path) >= since_timestamp:
79                     yield path
80
81     def upload_file(self, path):
82         raise NotImplementedError("PackageSuite.upload_file")
83
84     def upload_files(self, paths):
85         for path in paths:
86             self.logger.info("Uploading %s", path)
87             self.upload_file(path)
88
89     def post_uploads(self, paths):
90         pass
91
92     def update_packages(self, since_timestamp):
93         upload_paths = list(self.files_to_upload(since_timestamp))
94         if upload_paths:
95             self.upload_files(upload_paths)
96             self.post_uploads(upload_paths)
97
98
99 class PythonPackageSuite(PackageSuite):
100     LOGGER_PART = 'python'
101     REUPLOAD_REGEXPS = [
102         re.compile(
103             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
104         re.compile(
105             r'^error: Upload failed \(400\): File already exists\b'),
106         re.compile(
107             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
108     ]
109
110     def __init__(self, glob_root, rel_globs):
111         super().__init__(glob_root, rel_globs)
112         self.seen_packages = set()
113
114     def upload_file(self, path):
115         src_dir = os.path.dirname(os.path.dirname(path))
116         if src_dir in self.seen_packages:
117             return
118         self.seen_packages.add(src_dir)
119         # We also must run `sdist` before `upload`: `upload` uploads any
120         # distributions previously generated in the command.  It doesn't
121         # know how to upload distributions already on disk.  We write the
122         # result to a dedicated directory to avoid interfering with our
123         # timestamp tracking.
124         cmd = ['python3', 'setup.py']
125         if not self.logger.isEnabledFor(logging.INFO):
126             cmd.append('--quiet')
127         cmd.extend(['sdist', '--dist-dir', '.upload_dist', 'upload'])
128         upload_returncode, repushed = run_and_grep(
129             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
130         if (upload_returncode != 0) and not repushed:
131             raise subprocess.CalledProcessError(upload_returncode, cmd)
132         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
133
134
135 class GemPackageSuite(PackageSuite):
136     LOGGER_PART = 'gems'
137     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
138
139     def upload_file(self, path):
140         cmd = ['gem', 'push', path]
141         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
142         if (push_returncode != 0) and not repushed:
143             raise subprocess.CalledProcessError(push_returncode, cmd)
144
145
146 class DistroPackageSuite(PackageSuite):
147     NEED_SSH = True
148     REMOTE_DEST_DIR = 'tmp'
149
150     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
151         super().__init__(glob_root, rel_globs)
152         self.target = target
153         self.ssh_host = ssh_host
154         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
155         if not self.logger.isEnabledFor(logging.INFO):
156             self.ssh_opts.append('-q')
157
158     def _build_cmd(self, base_cmd, *args):
159         cmd = [base_cmd]
160         cmd.extend(self.ssh_opts)
161         cmd.extend(args)
162         return cmd
163
164     def _paths_basenames(self, paths):
165         return (os.path.basename(path) for path in paths)
166
167     def _run_script(self, script, *args):
168         # SSH will use a shell to run our bash command, so we have to
169         # quote our arguments.
170         # self.__class__.__name__ provides $0 for the script, which makes a
171         # nicer message if there's an error.
172         subprocess.check_call(self._build_cmd(
173                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
174                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
175
176     def upload_files(self, paths):
177         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
178         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
179         subprocess.check_call(mkdir)
180         cmd = self._build_cmd('scp', *paths)
181         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
182         subprocess.check_call(cmd)
183
184
185 class DebianPackageSuite(DistroPackageSuite):
186     FREIGHT_SCRIPT = """
187 cd "$1"; shift
188 DISTNAME=$1; shift
189 freight add "$@" "apt/$DISTNAME"
190 freight cache "apt/$DISTNAME"
191 rm "$@"
192 """
193
194     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
195         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
196         self.TARGET_DISTNAMES = {
197             'debian8': 'jessie-'+repo,
198             'debian9': 'stretch-'+repo,
199             'debian10': 'buster-'+repo,
200             'ubuntu1404': 'trusty-'+repo,
201             'ubuntu1604': 'xenial-'+repo,
202             'ubuntu1804': 'bionic-'+repo,
203             'ubuntu2004': 'focal-'+repo,
204             }
205
206     def post_uploads(self, paths):
207         self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
208                          self.TARGET_DISTNAMES[self.target],
209                          *self._paths_basenames(paths))
210
211
212 class RedHatPackageSuite(DistroPackageSuite):
213     CREATEREPO_SCRIPT = """
214 cd "$1"; shift
215 REPODIR=$1; shift
216 rpmsign --addsign "$@" </dev/null
217 mv "$@" "$REPODIR"
218 createrepo "$REPODIR"
219 """
220     REPO_ROOT = '/var/www/rpm.arvados.org/'
221
222     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
223         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
224         self.TARGET_REPODIRS = {
225             'centos7': 'CentOS/7/%s/x86_64/' % repo,
226         }
227
228     def post_uploads(self, paths):
229         repo_dir = os.path.join(self.REPO_ROOT,
230                                 self.TARGET_REPODIRS[self.target])
231         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
232                          repo_dir, *self._paths_basenames(paths))
233
234
235 def _define_suite(suite_class, *rel_globs, **kwargs):
236     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
237
238 PACKAGE_SUITES = {
239     'python': _define_suite(PythonPackageSuite,
240                             'sdk/python/dist/*.tar.gz',
241                             'sdk/cwl/dist/*.tar.gz',
242                             'services/fuse/dist/*.tar.gz',
243                         ),
244     'gems': _define_suite(GemPackageSuite,
245                           'sdk/ruby/*.gem',
246                           'sdk/cli/*.gem',
247                           'services/login-sync/*.gem',
248                       ),
249     }
250
251 def parse_arguments(arguments):
252     parser = argparse.ArgumentParser(
253         description="Upload Arvados packages to various repositories")
254     parser.add_argument(
255         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
256         help="Arvados source directory with built packages to upload")
257     parser.add_argument(
258         '--ssh-host', '-H',
259         help="Host specification for distribution repository server")
260     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
261                          metavar='OPTION', help="Pass option to `ssh -o`")
262     parser.add_argument('--verbose', '-v', action='count', default=0,
263                         help="Log more information and subcommand output")
264     parser.add_argument(
265         '--repo', choices=['dev', 'testing'],
266         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
267
268     parser.add_argument(
269         'targets', nargs='*', default=['all'], metavar='target',
270         help="Upload packages to these targets (default all)\nAvailable targets: " +
271         ', '.join(sorted(PACKAGE_SUITES.keys())))
272     args = parser.parse_args(arguments)
273     if 'all' in args.targets:
274         args.targets = list(PACKAGE_SUITES.keys())
275
276     if args.workspace is None:
277         parser.error("workspace not set from command line or environment")
278
279     for target in ['debian8', 'debian9', 'debian10', 'ubuntu1404', 'ubuntu1604', 'ubuntu1804', 'ubuntu2004']:
280         PACKAGE_SUITES[target] = _define_suite(
281             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
282             target=target, repo=args.repo)
283     for target in ['centos7']:
284         PACKAGE_SUITES[target] = _define_suite(
285             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
286             target=target, repo=args.repo)
287
288     for target in args.targets:
289         try:
290             suite_class = PACKAGE_SUITES[target].func
291         except KeyError:
292             parser.error("unrecognized target {!r}".format(target))
293         if suite_class.NEED_SSH and (args.ssh_host is None):
294             parser.error(
295                 "--ssh-host must be specified to upload distribution packages")
296     return args
297
298 def setup_logger(stream_dest, args):
299     log_handler = logging.StreamHandler(stream_dest)
300     log_handler.setFormatter(logging.Formatter(
301             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
302             '%Y-%m-%d %H:%M:%S'))
303     logger = logging.getLogger('arvados-dev.upload')
304     logger.addHandler(log_handler)
305     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
306
307 def build_suite_and_upload(target, since_timestamp, args):
308     suite_def = PACKAGE_SUITES[target]
309     kwargs = {}
310     if suite_def.func.NEED_SSH:
311         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
312     suite = suite_def(args.workspace, **kwargs)
313     suite.update_packages(since_timestamp)
314
315 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
316     args = parse_arguments(arguments)
317     setup_logger(stderr, args)
318
319     for target in args.targets:
320         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
321                                              '.last_upload_%s' % target))
322         last_upload_ts = ts_file.last_upload()
323         build_suite_and_upload(target, last_upload_ts, args)
324         ts_file.update()
325
326 if __name__ == '__main__':
327     main(sys.argv[1:])