run_upload_packages.py: give createrepo a cache directory for its
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import pipes
15 import re
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
73         os.utime(self.path, (time.time(), self.start_time))
74
75
76 class PackageSuite:
77     NEED_SSH = False
78
79     def __init__(self, glob_root, rel_globs):
80         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
81         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
82         self.globs = [os.path.join(glob_root, rel_glob)
83                       for rel_glob in rel_globs]
84
85     def files_to_upload(self, since_timestamp):
86         for abs_glob in self.globs:
87             for path in glob.glob(abs_glob):
88                 if os.path.getmtime(path) >= since_timestamp:
89                     yield path
90
91     def upload_file(self, path):
92         raise NotImplementedError("PackageSuite.upload_file")
93
94     def upload_files(self, paths):
95         for path in paths:
96             self.logger.info("Uploading %s", path)
97             self.upload_file(path)
98
99     def post_uploads(self, paths):
100         pass
101
102     def update_packages(self, since_timestamp):
103         upload_paths = list(self.files_to_upload(since_timestamp))
104         if upload_paths:
105             self.upload_files(upload_paths)
106             self.post_uploads(upload_paths)
107
108
109 class PythonPackageSuite(PackageSuite):
110     LOGGER_PART = 'python'
111     REUPLOAD_REGEXPS = [
112         re.compile(
113             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
114         re.compile(
115             r'^error: Upload failed \(400\): File already exists\b'),
116         re.compile(
117             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
118     ]
119
120     def __init__(self, glob_root, rel_globs):
121         super().__init__(glob_root, rel_globs)
122         self.seen_packages = set()
123
124     def upload_file(self, path):
125         src_dir = os.path.dirname(os.path.dirname(path))
126         if src_dir in self.seen_packages:
127             return
128         self.seen_packages.add(src_dir)
129         # We also must run `sdist` before `upload`: `upload` uploads any
130         # distributions previously generated in the command.  It doesn't
131         # know how to upload distributions already on disk.  We write the
132         # result to a dedicated directory to avoid interfering with our
133         # timestamp tracking.
134         cmd = ['python3', 'setup.py']
135         if not self.logger.isEnabledFor(logging.INFO):
136             cmd.append('--quiet')
137         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
138         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
139         cmd.extend(['upload'])
140         upload_returncode, repushed = run_and_grep(
141             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
142         if (upload_returncode != 0) and not repushed:
143             raise subprocess.CalledProcessError(upload_returncode, cmd)
144         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
145
146
147 class GemPackageSuite(PackageSuite):
148     LOGGER_PART = 'gems'
149     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
150
151     def upload_file(self, path):
152         cmd = ['gem', 'push', path]
153         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
154         if (push_returncode != 0) and not repushed:
155             raise subprocess.CalledProcessError(push_returncode, cmd)
156
157
158 class DistroPackageSuite(PackageSuite):
159     NEED_SSH = True
160     REMOTE_DEST_DIR = 'tmp'
161
162     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
163         super().__init__(glob_root, rel_globs)
164         self.target = target
165         self.ssh_host = ssh_host
166         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
167         if not self.logger.isEnabledFor(logging.INFO):
168             self.ssh_opts.append('-q')
169
170     def _build_cmd(self, base_cmd, *args):
171         cmd = [base_cmd]
172         cmd.extend(self.ssh_opts)
173         cmd.extend(args)
174         return cmd
175
176     def _paths_basenames(self, paths):
177         return (os.path.basename(path) for path in paths)
178
179     def _run_script(self, script, *args):
180         # SSH will use a shell to run our bash command, so we have to
181         # quote our arguments.
182         # self.__class__.__name__ provides $0 for the script, which makes a
183         # nicer message if there's an error.
184         subprocess.check_call(self._build_cmd(
185                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
186                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
187
188     def upload_files(self, paths):
189         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
190         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
191         subprocess.check_call(mkdir)
192         cmd = self._build_cmd('scp', *paths)
193         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
194         subprocess.check_call(cmd)
195
196
197 class DebianPackageSuite(DistroPackageSuite):
198     APT_SCRIPT = """
199 cd "$1"; shift
200 DISTNAME=$1; shift
201 for package in "$@"; do
202   set +e
203   aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
204   RET=$?
205   set -e
206   if [[ $RET -eq 0 ]]; then
207     echo "Not adding $package, it is already present in repo $DISTNAME"
208     rm "$package"
209   else
210     aptly repo add -remove-files "$DISTNAME" "$package"
211   fi
212 done
213 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
214 """
215
216     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
217         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
218         self.TARGET_DISTNAMES = {
219             'debian8': 'jessie-'+repo,
220             'debian9': 'stretch-'+repo,
221             'debian10': 'buster-'+repo,
222             'ubuntu1404': 'trusty-'+repo,
223             'ubuntu1604': 'xenial-'+repo,
224             'ubuntu1804': 'bionic-'+repo,
225             'ubuntu2004': 'focal-'+repo,
226             }
227
228     def post_uploads(self, paths):
229         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
230                          self.TARGET_DISTNAMES[self.target],
231                          *self._paths_basenames(paths))
232
233
234 class RedHatPackageSuite(DistroPackageSuite):
235     CREATEREPO_SCRIPT = """
236 cd "$1"; shift
237 REPODIR=$1; shift
238 rpmsign --addsign "$@" </dev/null
239 mv "$@" "$REPODIR"
240 createrepo -c ~/.createrepo-cache --update "$REPODIR"
241 """
242     REPO_ROOT = '/var/www/rpm.arvados.org/'
243
244     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
245         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
246         self.TARGET_REPODIRS = {
247             'centos7': 'CentOS/7/%s/x86_64/' % repo,
248         }
249
250     def post_uploads(self, paths):
251         repo_dir = os.path.join(self.REPO_ROOT,
252                                 self.TARGET_REPODIRS[self.target])
253         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
254                          repo_dir, *self._paths_basenames(paths))
255
256
257 def _define_suite(suite_class, *rel_globs, **kwargs):
258     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
259
260 PACKAGE_SUITES = {
261     'python': _define_suite(PythonPackageSuite,
262                             'sdk/python/dist/*.tar.gz',
263                             'sdk/cwl/dist/*.tar.gz',
264                             'services/fuse/dist/*.tar.gz',
265                         ),
266     'gems': _define_suite(GemPackageSuite,
267                           'sdk/ruby/*.gem',
268                           'sdk/cli/*.gem',
269                           'services/login-sync/*.gem',
270                       ),
271     }
272
273 def parse_arguments(arguments):
274     parser = argparse.ArgumentParser(
275         description="Upload Arvados packages to various repositories")
276     parser.add_argument(
277         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
278         help="Arvados source directory with built packages to upload")
279     parser.add_argument(
280         '--ssh-host', '-H',
281         help="Host specification for distribution repository server")
282     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
283                          metavar='OPTION', help="Pass option to `ssh -o`")
284     parser.add_argument('--verbose', '-v', action='count', default=0,
285                         help="Log more information and subcommand output")
286     parser.add_argument(
287         '--repo', choices=['dev', 'testing'],
288         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
289
290     parser.add_argument(
291         'targets', nargs='*', default=['all'], metavar='target',
292         help="Upload packages to these targets (default all)\nAvailable targets: " +
293         ', '.join(sorted(PACKAGE_SUITES.keys())))
294     args = parser.parse_args(arguments)
295     if 'all' in args.targets:
296         args.targets = list(PACKAGE_SUITES.keys())
297
298     if args.workspace is None:
299         parser.error("workspace not set from command line or environment")
300
301     for target in ['debian8', 'debian9', 'debian10', 'ubuntu1404', 'ubuntu1604', 'ubuntu1804', 'ubuntu2004']:
302         PACKAGE_SUITES[target] = _define_suite(
303             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
304             target=target, repo=args.repo)
305     for target in ['centos7']:
306         PACKAGE_SUITES[target] = _define_suite(
307             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
308             target=target, repo=args.repo)
309
310     for target in args.targets:
311         try:
312             suite_class = PACKAGE_SUITES[target].func
313         except KeyError:
314             parser.error("unrecognized target {!r}".format(target))
315         if suite_class.NEED_SSH and (args.ssh_host is None):
316             parser.error(
317                 "--ssh-host must be specified to upload distribution packages")
318     return args
319
320 def setup_logger(stream_dest, args):
321     log_handler = logging.StreamHandler(stream_dest)
322     log_handler.setFormatter(logging.Formatter(
323             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
324             '%Y-%m-%d %H:%M:%S'))
325     logger = logging.getLogger('arvados-dev.upload')
326     logger.addHandler(log_handler)
327     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
328
329 def build_suite_and_upload(target, since_timestamp, args):
330     suite_def = PACKAGE_SUITES[target]
331     kwargs = {}
332     if suite_def.func.NEED_SSH:
333         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
334     suite = suite_def(args.workspace, **kwargs)
335     suite.update_packages(since_timestamp)
336
337 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
338     args = parse_arguments(arguments)
339     setup_logger(stderr, args)
340
341     for target in args.targets:
342         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
343                                              '.last_upload_%s' % target))
344         last_upload_ts = ts_file.last_upload()
345         build_suite_and_upload(target, last_upload_ts, args)
346         ts_file.update()
347
348 if __name__ == '__main__':
349     main(sys.argv[1:])