Merge branch '20797-rh8-packages'
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import pipes
15 import re
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         try:
73             os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74             os.utime(self.path, (time.time(), self.start_time))
75         except:
76             # when the packages directory is created/populated by a build in a
77             # docker container, as root, the script that runs the upload
78             # doesn't always have permission to touch a timestamp file there.
79             # In production, we build/upload from ephemeral machines, which
80             # means that the timestamp mechanism is not used. We print a
81             # warning and move on without erroring out.
82             print("Warning: unable to update timestamp file",self.path,"permission problem?")
83             pass
84
85 class PackageSuite:
86     NEED_SSH = False
87
88     def __init__(self, glob_root, rel_globs):
89         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91         self.globs = [os.path.join(glob_root, rel_glob)
92                       for rel_glob in rel_globs]
93
94     def files_to_upload(self, since_timestamp):
95         for abs_glob in self.globs:
96             for path in glob.glob(abs_glob):
97                 if os.path.getmtime(path) >= since_timestamp:
98                     yield path
99
100     def upload_file(self, path):
101         raise NotImplementedError("PackageSuite.upload_file")
102
103     def upload_files(self, paths):
104         for path in paths:
105             self.logger.info("Uploading %s", path)
106             self.upload_file(path)
107
108     def post_uploads(self, paths):
109         pass
110
111     def update_packages(self, since_timestamp):
112         upload_paths = list(self.files_to_upload(since_timestamp))
113         if upload_paths:
114             self.upload_files(upload_paths)
115             self.post_uploads(upload_paths)
116
117
118 class PythonPackageSuite(PackageSuite):
119     LOGGER_PART = 'python'
120     REUPLOAD_REGEXPS = [
121         re.compile(
122             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
123         re.compile(
124             r'^error: Upload failed \(400\): File already exists\b'),
125         re.compile(
126             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
127     ]
128
129     def __init__(self, glob_root, rel_globs):
130         super().__init__(glob_root, rel_globs)
131         self.seen_packages = set()
132
133     def upload_file(self, path):
134         src_dir = os.path.dirname(os.path.dirname(path))
135         if src_dir in self.seen_packages:
136             return
137         self.seen_packages.add(src_dir)
138         # We also must run `sdist` before `upload`: `upload` uploads any
139         # distributions previously generated in the command.  It doesn't
140         # know how to upload distributions already on disk.  We write the
141         # result to a dedicated directory to avoid interfering with our
142         # timestamp tracking.
143         cmd = ['python3', 'setup.py']
144         if not self.logger.isEnabledFor(logging.INFO):
145             cmd.append('--quiet')
146         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
147         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
148         cmd.extend(['upload'])
149         upload_returncode, repushed = run_and_grep(
150             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
151         if (upload_returncode != 0) and not repushed:
152             raise subprocess.CalledProcessError(upload_returncode, cmd)
153         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
154
155
156 class GemPackageSuite(PackageSuite):
157     LOGGER_PART = 'gems'
158     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
159
160     def upload_file(self, path):
161         cmd = ['gem', 'push', path]
162         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
163         if (push_returncode != 0) and not repushed:
164             raise subprocess.CalledProcessError(push_returncode, cmd)
165
166
167 class DistroPackageSuite(PackageSuite):
168     NEED_SSH = True
169     REMOTE_DEST_DIR = 'tmp'
170
171     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
172         super().__init__(glob_root, rel_globs)
173         self.target = target
174         self.ssh_host = ssh_host
175         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
176         if not self.logger.isEnabledFor(logging.INFO):
177             self.ssh_opts.append('-q')
178
179     def _build_cmd(self, base_cmd, *args):
180         cmd = [base_cmd]
181         cmd.extend(self.ssh_opts)
182         cmd.extend(args)
183         return cmd
184
185     def _paths_basenames(self, paths):
186         return (os.path.basename(path) for path in paths)
187
188     def _run_script(self, script, *args):
189         # SSH will use a shell to run our bash command, so we have to
190         # quote our arguments.
191         # self.__class__.__name__ provides $0 for the script, which makes a
192         # nicer message if there's an error.
193         subprocess.check_call(self._build_cmd(
194                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
195                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
196
197     def upload_files(self, paths):
198         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
199         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
200         subprocess.check_call(mkdir)
201         cmd = self._build_cmd('scp', *paths)
202         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
203         subprocess.check_call(cmd)
204
205
206 class DebianPackageSuite(DistroPackageSuite):
207     APT_SCRIPT = """
208 cd "$1"; shift
209 DISTNAME=$1; shift
210 for package in "$@"; do
211   set +e
212   aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
213   RET=$?
214   set -e
215   if [[ $RET -eq 0 ]]; then
216     echo "Not adding $package, it is already present in repo $DISTNAME"
217     rm "$package"
218   else
219     aptly repo add -remove-files "$DISTNAME" "$package"
220   fi
221 done
222 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
223 """
224
225     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
226         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
227         self.TARGET_DISTNAMES = {
228             'debian10': 'buster-'+repo,
229             'debian11': 'bullseye-'+repo,
230             'ubuntu1804': 'bionic-'+repo,
231             'ubuntu2004': 'focal-'+repo,
232             }
233
234     def post_uploads(self, paths):
235         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
236                          self.TARGET_DISTNAMES[self.target],
237                          *self._paths_basenames(paths))
238
239
240 class RedHatPackageSuite(DistroPackageSuite):
241     CREATEREPO_SCRIPT = """
242 cd "$1"; shift
243 REPODIR=$1; shift
244 rpmsign --addsign "$@" </dev/null
245 mv "$@" "$REPODIR"
246 createrepo -c ~/.createrepo-cache --update "$REPODIR"
247 """
248     REPO_ROOT = '/var/www/rpm.arvados.org/'
249
250     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
251         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
252         self.TARGET_REPODIRS = {
253             'centos7': 'CentOS/7/%s/x86_64/' % repo,
254             'rocky8': 'CentOS/8/%s/x86_64/' % repo,
255         }
256
257     def post_uploads(self, paths):
258         repo_dir = os.path.join(self.REPO_ROOT,
259                                 self.TARGET_REPODIRS[self.target])
260         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
261                          repo_dir, *self._paths_basenames(paths))
262
263
264 def _define_suite(suite_class, *rel_globs, **kwargs):
265     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
266
267 PACKAGE_SUITES = {
268     'python': _define_suite(PythonPackageSuite,
269                             'sdk/python/dist/*.tar.gz',
270                             'sdk/cwl/dist/*.tar.gz',
271                             'services/fuse/dist/*.tar.gz',
272                             'tools/crunchstat-summary/dist/*.tar.gz',
273                         ),
274     'gems': _define_suite(GemPackageSuite,
275                           'sdk/ruby/*.gem',
276                           'sdk/cli/*.gem',
277                           'services/login-sync/*.gem',
278                       ),
279     }
280
281 def parse_arguments(arguments):
282     parser = argparse.ArgumentParser(
283         description="Upload Arvados packages to various repositories")
284     parser.add_argument(
285         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
286         help="Arvados source directory with built packages to upload")
287     parser.add_argument(
288         '--ssh-host', '-H',
289         help="Host specification for distribution repository server")
290     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
291                          metavar='OPTION', help="Pass option to `ssh -o`")
292     parser.add_argument('--verbose', '-v', action='count', default=0,
293                         help="Log more information and subcommand output")
294     parser.add_argument(
295         '--repo', choices=['dev', 'testing'],
296         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
297
298     parser.add_argument(
299         'targets', nargs='*', default=['all'], metavar='target',
300         help="Upload packages to these targets (default all)\nAvailable targets: " +
301         ', '.join(sorted(PACKAGE_SUITES.keys())))
302     args = parser.parse_args(arguments)
303     if 'all' in args.targets:
304         args.targets = list(PACKAGE_SUITES.keys())
305
306     if args.workspace is None:
307         parser.error("workspace not set from command line or environment")
308
309     for target in ['debian10', 'debian11', 'ubuntu1804', 'ubuntu2004']:
310         PACKAGE_SUITES[target] = _define_suite(
311             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
312             target=target, repo=args.repo)
313     for target in ['centos7', 'rocky8']:
314         PACKAGE_SUITES[target] = _define_suite(
315             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
316             target=target, repo=args.repo)
317
318     for target in args.targets:
319         try:
320             suite_class = PACKAGE_SUITES[target].func
321         except KeyError:
322             parser.error("unrecognized target {!r}".format(target))
323         if suite_class.NEED_SSH and (args.ssh_host is None):
324             parser.error(
325                 "--ssh-host must be specified to upload distribution packages")
326     return args
327
328 def setup_logger(stream_dest, args):
329     log_handler = logging.StreamHandler(stream_dest)
330     log_handler.setFormatter(logging.Formatter(
331             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
332             '%Y-%m-%d %H:%M:%S'))
333     logger = logging.getLogger('arvados-dev.upload')
334     logger.addHandler(log_handler)
335     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
336
337 def build_suite_and_upload(target, since_timestamp, args):
338     suite_def = PACKAGE_SUITES[target]
339     kwargs = {}
340     if suite_def.func.NEED_SSH:
341         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
342     suite = suite_def(args.workspace, **kwargs)
343     suite.update_packages(since_timestamp)
344
345 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
346     args = parse_arguments(arguments)
347     setup_logger(stderr, args)
348
349     for target in args.targets:
350         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
351                                              '.last_upload_%s' % target))
352         last_upload_ts = ts_file.last_upload()
353         build_suite_and_upload(target, last_upload_ts, args)
354         ts_file.update()
355
356 if __name__ == '__main__':
357     main(sys.argv[1:])