Do not try to add packages to the repository that are already in it.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import functools
9 import glob
10 import locale
11 import logging
12 import os
13 import pipes
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import time
19
20 def run_and_grep(cmd, read_output, *regexps,
21                  encoding=locale.getpreferredencoding(), **popen_kwargs):
22     """Run a subprocess and capture output lines matching regexps.
23
24     Arguments:
25     * cmd: The command to run, as a list or string, as for subprocess.Popen.
26     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
27     Remaining arguments are regexps to match output, as strings or compiled
28     regexp objects.  Output lines matching any regexp will be captured.
29
30     Keyword arguments:
31     * encoding: The encoding used to decode the subprocess output.
32     Remaining keyword arguments are passed directly to subprocess.Popen.
33
34     Returns 2-tuple (subprocess returncode, list of matched output lines).
35     """
36     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
37                for regexp in regexps]
38     popen_kwargs[read_output] = subprocess.PIPE
39     proc = subprocess.Popen(cmd, **popen_kwargs)
40     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
41         matched_lines = []
42         for line in output:
43             if any(regexp.search(line) for regexp in regexps):
44                 matched_lines.append(line)
45             if read_output == 'stderr':
46                 print(line, file=sys.stderr, end='')
47     return proc.wait(), matched_lines
48
49
50 class TimestampFile:
51     def __init__(self, path):
52         self.path = path
53         self.start_time = time.time()
54
55     def last_upload(self):
56         try:
57             return os.path.getmtime(self.path)
58         except EnvironmentError:
59             return -1
60
61     def update(self):
62         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
63         os.utime(self.path, (time.time(), self.start_time))
64
65
66 class PackageSuite:
67     NEED_SSH = False
68
69     def __init__(self, glob_root, rel_globs):
70         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
71         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
72         self.globs = [os.path.join(glob_root, rel_glob)
73                       for rel_glob in rel_globs]
74
75     def files_to_upload(self, since_timestamp):
76         for abs_glob in self.globs:
77             for path in glob.glob(abs_glob):
78                 if os.path.getmtime(path) >= since_timestamp:
79                     yield path
80
81     def upload_file(self, path):
82         raise NotImplementedError("PackageSuite.upload_file")
83
84     def upload_files(self, paths):
85         for path in paths:
86             self.logger.info("Uploading %s", path)
87             self.upload_file(path)
88
89     def post_uploads(self, paths):
90         pass
91
92     def update_packages(self, since_timestamp):
93         upload_paths = list(self.files_to_upload(since_timestamp))
94         if upload_paths:
95             self.upload_files(upload_paths)
96             self.post_uploads(upload_paths)
97
98
99 class PythonPackageSuite(PackageSuite):
100     LOGGER_PART = 'python'
101     REUPLOAD_REGEXPS = [
102         re.compile(
103             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
104         re.compile(
105             r'^error: Upload failed \(400\): File already exists\b'),
106         re.compile(
107             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
108     ]
109
110     def __init__(self, glob_root, rel_globs):
111         super().__init__(glob_root, rel_globs)
112         self.seen_packages = set()
113
114     def upload_file(self, path):
115         src_dir = os.path.dirname(os.path.dirname(path))
116         if src_dir in self.seen_packages:
117             return
118         self.seen_packages.add(src_dir)
119         # We also must run `sdist` before `upload`: `upload` uploads any
120         # distributions previously generated in the command.  It doesn't
121         # know how to upload distributions already on disk.  We write the
122         # result to a dedicated directory to avoid interfering with our
123         # timestamp tracking.
124         cmd = ['python3', 'setup.py']
125         if not self.logger.isEnabledFor(logging.INFO):
126             cmd.append('--quiet')
127         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
128         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
129         cmd.extend(['upload'])
130         upload_returncode, repushed = run_and_grep(
131             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
132         if (upload_returncode != 0) and not repushed:
133             raise subprocess.CalledProcessError(upload_returncode, cmd)
134         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
135
136
137 class GemPackageSuite(PackageSuite):
138     LOGGER_PART = 'gems'
139     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
140
141     def upload_file(self, path):
142         cmd = ['gem', 'push', path]
143         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
144         if (push_returncode != 0) and not repushed:
145             raise subprocess.CalledProcessError(push_returncode, cmd)
146
147
148 class DistroPackageSuite(PackageSuite):
149     NEED_SSH = True
150     REMOTE_DEST_DIR = 'tmp'
151
152     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
153         super().__init__(glob_root, rel_globs)
154         self.target = target
155         self.ssh_host = ssh_host
156         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
157         if not self.logger.isEnabledFor(logging.INFO):
158             self.ssh_opts.append('-q')
159
160     def _build_cmd(self, base_cmd, *args):
161         cmd = [base_cmd]
162         cmd.extend(self.ssh_opts)
163         cmd.extend(args)
164         return cmd
165
166     def _paths_basenames(self, paths):
167         return (os.path.basename(path) for path in paths)
168
169     def _run_script(self, script, *args):
170         # SSH will use a shell to run our bash command, so we have to
171         # quote our arguments.
172         # self.__class__.__name__ provides $0 for the script, which makes a
173         # nicer message if there's an error.
174         subprocess.check_call(self._build_cmd(
175                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
176                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
177
178     def upload_files(self, paths):
179         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
180         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
181         subprocess.check_call(mkdir)
182         cmd = self._build_cmd('scp', *paths)
183         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
184         subprocess.check_call(cmd)
185
186
187 class DebianPackageSuite(DistroPackageSuite):
188     FREIGHT_SCRIPT = """
189 cd "$1"; shift
190 DISTNAME=$1; shift
191 set +e
192 aptly repo search "$DISTNAME" "${@%.deb}" >/dev/null 2>&1
193 RET=$?
194 set -e
195 if [[ $RET -eq 0 ]]; then
196   echo "Not adding $@, it is already present in repo $DISTNAME"
197   rm "$@"
198 else
199   aptly repo add -remove-files "$DISTNAME" "$@"
200   aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
201 fi
202 """
203
204     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
205         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
206         self.TARGET_DISTNAMES = {
207             'debian8': 'jessie-'+repo,
208             'debian9': 'stretch-'+repo,
209             'debian10': 'buster-'+repo,
210             'ubuntu1404': 'trusty-'+repo,
211             'ubuntu1604': 'xenial-'+repo,
212             'ubuntu1804': 'bionic-'+repo,
213             'ubuntu2004': 'focal-'+repo,
214             }
215
216     def post_uploads(self, paths):
217         self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
218                          self.TARGET_DISTNAMES[self.target],
219                          *self._paths_basenames(paths))
220
221
222 class RedHatPackageSuite(DistroPackageSuite):
223     CREATEREPO_SCRIPT = """
224 cd "$1"; shift
225 REPODIR=$1; shift
226 rpmsign --addsign "$@" </dev/null
227 mv "$@" "$REPODIR"
228 createrepo "$REPODIR"
229 """
230     REPO_ROOT = '/var/www/rpm.arvados.org/'
231
232     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
233         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
234         self.TARGET_REPODIRS = {
235             'centos7': 'CentOS/7/%s/x86_64/' % repo,
236         }
237
238     def post_uploads(self, paths):
239         repo_dir = os.path.join(self.REPO_ROOT,
240                                 self.TARGET_REPODIRS[self.target])
241         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
242                          repo_dir, *self._paths_basenames(paths))
243
244
245 def _define_suite(suite_class, *rel_globs, **kwargs):
246     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
247
248 PACKAGE_SUITES = {
249     'python': _define_suite(PythonPackageSuite,
250                             'sdk/python/dist/*.tar.gz',
251                             'sdk/cwl/dist/*.tar.gz',
252                             'services/fuse/dist/*.tar.gz',
253                         ),
254     'gems': _define_suite(GemPackageSuite,
255                           'sdk/ruby/*.gem',
256                           'sdk/cli/*.gem',
257                           'services/login-sync/*.gem',
258                       ),
259     }
260
261 def parse_arguments(arguments):
262     parser = argparse.ArgumentParser(
263         description="Upload Arvados packages to various repositories")
264     parser.add_argument(
265         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
266         help="Arvados source directory with built packages to upload")
267     parser.add_argument(
268         '--ssh-host', '-H',
269         help="Host specification for distribution repository server")
270     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
271                          metavar='OPTION', help="Pass option to `ssh -o`")
272     parser.add_argument('--verbose', '-v', action='count', default=0,
273                         help="Log more information and subcommand output")
274     parser.add_argument(
275         '--repo', choices=['dev', 'testing'],
276         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
277
278     parser.add_argument(
279         'targets', nargs='*', default=['all'], metavar='target',
280         help="Upload packages to these targets (default all)\nAvailable targets: " +
281         ', '.join(sorted(PACKAGE_SUITES.keys())))
282     args = parser.parse_args(arguments)
283     if 'all' in args.targets:
284         args.targets = list(PACKAGE_SUITES.keys())
285
286     if args.workspace is None:
287         parser.error("workspace not set from command line or environment")
288
289     for target in ['debian8', 'debian9', 'debian10', 'ubuntu1404', 'ubuntu1604', 'ubuntu1804', 'ubuntu2004']:
290         PACKAGE_SUITES[target] = _define_suite(
291             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
292             target=target, repo=args.repo)
293     for target in ['centos7']:
294         PACKAGE_SUITES[target] = _define_suite(
295             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
296             target=target, repo=args.repo)
297
298     for target in args.targets:
299         try:
300             suite_class = PACKAGE_SUITES[target].func
301         except KeyError:
302             parser.error("unrecognized target {!r}".format(target))
303         if suite_class.NEED_SSH and (args.ssh_host is None):
304             parser.error(
305                 "--ssh-host must be specified to upload distribution packages")
306     return args
307
308 def setup_logger(stream_dest, args):
309     log_handler = logging.StreamHandler(stream_dest)
310     log_handler.setFormatter(logging.Formatter(
311             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
312             '%Y-%m-%d %H:%M:%S'))
313     logger = logging.getLogger('arvados-dev.upload')
314     logger.addHandler(log_handler)
315     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
316
317 def build_suite_and_upload(target, since_timestamp, args):
318     suite_def = PACKAGE_SUITES[target]
319     kwargs = {}
320     if suite_def.func.NEED_SSH:
321         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
322     suite = suite_def(args.workspace, **kwargs)
323     suite.update_packages(since_timestamp)
324
325 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
326     args = parse_arguments(arguments)
327     setup_logger(stderr, args)
328
329     for target in args.targets:
330         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
331                                              '.last_upload_%s' % target))
332         last_upload_ts = ts_file.last_upload()
333         build_suite_and_upload(target, last_upload_ts, args)
334         ts_file.update()
335
336 if __name__ == '__main__':
337     main(sys.argv[1:])