Iterate over packages one by one in run_upload_packages.py
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import functools
9 import glob
10 import locale
11 import logging
12 import os
13 import pipes
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import time
19
20 def run_and_grep(cmd, read_output, *regexps,
21                  encoding=locale.getpreferredencoding(), **popen_kwargs):
22     """Run a subprocess and capture output lines matching regexps.
23
24     Arguments:
25     * cmd: The command to run, as a list or string, as for subprocess.Popen.
26     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
27     Remaining arguments are regexps to match output, as strings or compiled
28     regexp objects.  Output lines matching any regexp will be captured.
29
30     Keyword arguments:
31     * encoding: The encoding used to decode the subprocess output.
32     Remaining keyword arguments are passed directly to subprocess.Popen.
33
34     Returns 2-tuple (subprocess returncode, list of matched output lines).
35     """
36     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
37                for regexp in regexps]
38     popen_kwargs[read_output] = subprocess.PIPE
39     proc = subprocess.Popen(cmd, **popen_kwargs)
40     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
41         matched_lines = []
42         for line in output:
43             if any(regexp.search(line) for regexp in regexps):
44                 matched_lines.append(line)
45             if read_output == 'stderr':
46                 print(line, file=sys.stderr, end='')
47     return proc.wait(), matched_lines
48
49
50 class TimestampFile:
51     def __init__(self, path):
52         self.path = path
53         self.start_time = time.time()
54
55     def last_upload(self):
56         try:
57             return os.path.getmtime(self.path)
58         except EnvironmentError:
59             return -1
60
61     def update(self):
62         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
63         os.utime(self.path, (time.time(), self.start_time))
64
65
66 class PackageSuite:
67     NEED_SSH = False
68
69     def __init__(self, glob_root, rel_globs):
70         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
71         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
72         self.globs = [os.path.join(glob_root, rel_glob)
73                       for rel_glob in rel_globs]
74
75     def files_to_upload(self, since_timestamp):
76         for abs_glob in self.globs:
77             for path in glob.glob(abs_glob):
78                 if os.path.getmtime(path) >= since_timestamp:
79                     yield path
80
81     def upload_file(self, path):
82         raise NotImplementedError("PackageSuite.upload_file")
83
84     def upload_files(self, paths):
85         for path in paths:
86             self.logger.info("Uploading %s", path)
87             self.upload_file(path)
88
89     def post_uploads(self, paths):
90         pass
91
92     def update_packages(self, since_timestamp):
93         upload_paths = list(self.files_to_upload(since_timestamp))
94         if upload_paths:
95             self.upload_files(upload_paths)
96             self.post_uploads(upload_paths)
97
98
99 class PythonPackageSuite(PackageSuite):
100     LOGGER_PART = 'python'
101     REUPLOAD_REGEXPS = [
102         re.compile(
103             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
104         re.compile(
105             r'^error: Upload failed \(400\): File already exists\b'),
106         re.compile(
107             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
108     ]
109
110     def __init__(self, glob_root, rel_globs):
111         super().__init__(glob_root, rel_globs)
112         self.seen_packages = set()
113
114     def upload_file(self, path):
115         src_dir = os.path.dirname(os.path.dirname(path))
116         if src_dir in self.seen_packages:
117             return
118         self.seen_packages.add(src_dir)
119         # We also must run `sdist` before `upload`: `upload` uploads any
120         # distributions previously generated in the command.  It doesn't
121         # know how to upload distributions already on disk.  We write the
122         # result to a dedicated directory to avoid interfering with our
123         # timestamp tracking.
124         cmd = ['python3', 'setup.py']
125         if not self.logger.isEnabledFor(logging.INFO):
126             cmd.append('--quiet')
127         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
128         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
129         cmd.extend(['upload'])
130         upload_returncode, repushed = run_and_grep(
131             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
132         if (upload_returncode != 0) and not repushed:
133             raise subprocess.CalledProcessError(upload_returncode, cmd)
134         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
135
136
137 class GemPackageSuite(PackageSuite):
138     LOGGER_PART = 'gems'
139     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
140
141     def upload_file(self, path):
142         cmd = ['gem', 'push', path]
143         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
144         if (push_returncode != 0) and not repushed:
145             raise subprocess.CalledProcessError(push_returncode, cmd)
146
147
148 class DistroPackageSuite(PackageSuite):
149     NEED_SSH = True
150     REMOTE_DEST_DIR = 'tmp'
151
152     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
153         super().__init__(glob_root, rel_globs)
154         self.target = target
155         self.ssh_host = ssh_host
156         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
157         if not self.logger.isEnabledFor(logging.INFO):
158             self.ssh_opts.append('-q')
159
160     def _build_cmd(self, base_cmd, *args):
161         cmd = [base_cmd]
162         cmd.extend(self.ssh_opts)
163         cmd.extend(args)
164         return cmd
165
166     def _paths_basenames(self, paths):
167         return (os.path.basename(path) for path in paths)
168
169     def _run_script(self, script, *args):
170         # SSH will use a shell to run our bash command, so we have to
171         # quote our arguments.
172         # self.__class__.__name__ provides $0 for the script, which makes a
173         # nicer message if there's an error.
174         subprocess.check_call(self._build_cmd(
175                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
176                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
177
178     def upload_files(self, paths):
179         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
180         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
181         subprocess.check_call(mkdir)
182         cmd = self._build_cmd('scp', *paths)
183         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
184         subprocess.check_call(cmd)
185
186
187 class DebianPackageSuite(DistroPackageSuite):
188     APT_SCRIPT = """
189 cd "$1"; shift
190 DISTNAME=$1; shift
191 for package in "$@"; do
192   set +e
193   aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
194   RET=$?
195   set -e
196   if [[ $RET -eq 0 ]]; then
197     echo "Not adding $package, it is already present in repo $DISTNAME"
198     rm "$package"
199   else
200     aptly repo add -remove-files "$DISTNAME" "$package"
201   fi
202 done
203 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
204 """
205
206     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
207         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
208         self.TARGET_DISTNAMES = {
209             'debian8': 'jessie-'+repo,
210             'debian9': 'stretch-'+repo,
211             'debian10': 'buster-'+repo,
212             'ubuntu1404': 'trusty-'+repo,
213             'ubuntu1604': 'xenial-'+repo,
214             'ubuntu1804': 'bionic-'+repo,
215             'ubuntu2004': 'focal-'+repo,
216             }
217
218     def post_uploads(self, paths):
219         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
220                          self.TARGET_DISTNAMES[self.target],
221                          *self._paths_basenames(paths))
222
223
224 class RedHatPackageSuite(DistroPackageSuite):
225     CREATEREPO_SCRIPT = """
226 cd "$1"; shift
227 REPODIR=$1; shift
228 rpmsign --addsign "$@" </dev/null
229 mv "$@" "$REPODIR"
230 createrepo "$REPODIR"
231 """
232     REPO_ROOT = '/var/www/rpm.arvados.org/'
233
234     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
235         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
236         self.TARGET_REPODIRS = {
237             'centos7': 'CentOS/7/%s/x86_64/' % repo,
238         }
239
240     def post_uploads(self, paths):
241         repo_dir = os.path.join(self.REPO_ROOT,
242                                 self.TARGET_REPODIRS[self.target])
243         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
244                          repo_dir, *self._paths_basenames(paths))
245
246
247 def _define_suite(suite_class, *rel_globs, **kwargs):
248     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
249
250 PACKAGE_SUITES = {
251     'python': _define_suite(PythonPackageSuite,
252                             'sdk/python/dist/*.tar.gz',
253                             'sdk/cwl/dist/*.tar.gz',
254                             'services/fuse/dist/*.tar.gz',
255                         ),
256     'gems': _define_suite(GemPackageSuite,
257                           'sdk/ruby/*.gem',
258                           'sdk/cli/*.gem',
259                           'services/login-sync/*.gem',
260                       ),
261     }
262
263 def parse_arguments(arguments):
264     parser = argparse.ArgumentParser(
265         description="Upload Arvados packages to various repositories")
266     parser.add_argument(
267         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
268         help="Arvados source directory with built packages to upload")
269     parser.add_argument(
270         '--ssh-host', '-H',
271         help="Host specification for distribution repository server")
272     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
273                          metavar='OPTION', help="Pass option to `ssh -o`")
274     parser.add_argument('--verbose', '-v', action='count', default=0,
275                         help="Log more information and subcommand output")
276     parser.add_argument(
277         '--repo', choices=['dev', 'testing'],
278         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
279
280     parser.add_argument(
281         'targets', nargs='*', default=['all'], metavar='target',
282         help="Upload packages to these targets (default all)\nAvailable targets: " +
283         ', '.join(sorted(PACKAGE_SUITES.keys())))
284     args = parser.parse_args(arguments)
285     if 'all' in args.targets:
286         args.targets = list(PACKAGE_SUITES.keys())
287
288     if args.workspace is None:
289         parser.error("workspace not set from command line or environment")
290
291     for target in ['debian8', 'debian9', 'debian10', 'ubuntu1404', 'ubuntu1604', 'ubuntu1804', 'ubuntu2004']:
292         PACKAGE_SUITES[target] = _define_suite(
293             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
294             target=target, repo=args.repo)
295     for target in ['centos7']:
296         PACKAGE_SUITES[target] = _define_suite(
297             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
298             target=target, repo=args.repo)
299
300     for target in args.targets:
301         try:
302             suite_class = PACKAGE_SUITES[target].func
303         except KeyError:
304             parser.error("unrecognized target {!r}".format(target))
305         if suite_class.NEED_SSH and (args.ssh_host is None):
306             parser.error(
307                 "--ssh-host must be specified to upload distribution packages")
308     return args
309
310 def setup_logger(stream_dest, args):
311     log_handler = logging.StreamHandler(stream_dest)
312     log_handler.setFormatter(logging.Formatter(
313             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
314             '%Y-%m-%d %H:%M:%S'))
315     logger = logging.getLogger('arvados-dev.upload')
316     logger.addHandler(log_handler)
317     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
318
319 def build_suite_and_upload(target, since_timestamp, args):
320     suite_def = PACKAGE_SUITES[target]
321     kwargs = {}
322     if suite_def.func.NEED_SSH:
323         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
324     suite = suite_def(args.workspace, **kwargs)
325     suite.update_packages(since_timestamp)
326
327 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
328     args = parse_arguments(arguments)
329     setup_logger(stderr, args)
330
331     for target in args.targets:
332         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
333                                              '.last_upload_%s' % target))
334         last_upload_ts = ts_file.last_upload()
335         build_suite_and_upload(target, last_upload_ts, args)
336         ts_file.update()
337
338 if __name__ == '__main__':
339     main(sys.argv[1:])