991d821acffe2824254fcddfade6c9e30a8b11b3
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import re
15 import shlex
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         try:
73             os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74             os.utime(self.path, (time.time(), self.start_time))
75         except:
76             # when the packages directory is created/populated by a build in a
77             # docker container, as root, the script that runs the upload
78             # doesn't always have permission to touch a timestamp file there.
79             # In production, we build/upload from ephemeral machines, which
80             # means that the timestamp mechanism is not used. We print a
81             # warning and move on without erroring out.
82             print("Warning: unable to update timestamp file",self.path,"permission problem?")
83             pass
84
85 class PackageSuite:
86     NEED_SSH = False
87
88     def __init__(self, glob_root, rel_globs):
89         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91         self.globs = [os.path.join(glob_root, rel_glob)
92                       for rel_glob in rel_globs]
93
94     def files_to_upload(self, since_timestamp):
95         for abs_glob in self.globs:
96             for path in glob.glob(abs_glob):
97                 if os.path.getmtime(path) >= since_timestamp:
98                     yield path
99
100     def upload_file(self, path):
101         raise NotImplementedError("PackageSuite.upload_file")
102
103     def upload_files(self, paths):
104         for path in paths:
105             self.logger.info("Uploading %s", path)
106             self.upload_file(path)
107
108     def post_uploads(self, paths):
109         pass
110
111     def update_packages(self, since_timestamp):
112         upload_paths = list(self.files_to_upload(since_timestamp))
113         if upload_paths:
114             self.upload_files(upload_paths)
115             self.post_uploads(upload_paths)
116
117
118 class PythonPackageSuite(PackageSuite):
119     LOGGER_PART = 'python'
120     REUPLOAD_REGEXPS = [
121         re.compile(
122             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
123         re.compile(
124             r'^error: Upload failed \(400\): File already exists\b'),
125         re.compile(
126             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
127     ]
128
129     def __init__(self, glob_root, rel_globs):
130         super().__init__(glob_root, rel_globs)
131         self.seen_packages = set()
132
133     def upload_file(self, path):
134         src_dir = os.path.dirname(os.path.dirname(path))
135         if src_dir in self.seen_packages:
136             return
137         self.seen_packages.add(src_dir)
138         # We also must run `sdist` before `upload`: `upload` uploads any
139         # distributions previously generated in the command.  It doesn't
140         # know how to upload distributions already on disk.  We write the
141         # result to a dedicated directory to avoid interfering with our
142         # timestamp tracking.
143         cmd = ['python3', 'setup.py']
144         if not self.logger.isEnabledFor(logging.INFO):
145             cmd.append('--quiet')
146         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
147         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
148         cmd.extend(['upload'])
149         upload_returncode, repushed = run_and_grep(
150             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
151         if (upload_returncode != 0) and not repushed:
152             raise subprocess.CalledProcessError(upload_returncode, cmd)
153         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
154
155
156 class GemPackageSuite(PackageSuite):
157     LOGGER_PART = 'gems'
158     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
159
160     def upload_file(self, path):
161         cmd = ['gem', 'push', path]
162         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
163         if (push_returncode != 0) and not repushed:
164             raise subprocess.CalledProcessError(push_returncode, cmd)
165
166
167 class DistroPackageSuite(PackageSuite):
168     NEED_SSH = True
169     REMOTE_DEST_DIR = 'tmp'
170
171     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
172         super().__init__(glob_root, rel_globs)
173         self.target = target
174         self.ssh_host = ssh_host
175         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
176         if not self.logger.isEnabledFor(logging.INFO):
177             self.ssh_opts.append('-q')
178
179     def _build_cmd(self, base_cmd, *args):
180         cmd = [base_cmd]
181         cmd.extend(self.ssh_opts)
182         cmd.extend(args)
183         return cmd
184
185     def _paths_basenames(self, paths):
186         return (os.path.basename(path) for path in paths)
187
188     def _run_script(self, script, *args):
189         # SSH will use a shell to run our bash command, so we have to
190         # quote our arguments.
191         # self.__class__.__name__ provides $0 for the script, which makes a
192         # nicer message if there's an error.
193         subprocess.check_call(self._build_cmd(
194                 'ssh', self.ssh_host, 'bash', '-ec', shlex.quote(script),
195                 self.__class__.__name__, *(shlex.quote(s) for s in args)))
196
197     def upload_files(self, paths):
198         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
199         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
200         subprocess.check_call(mkdir)
201         cmd = self._build_cmd('scp', *paths)
202         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
203         subprocess.check_call(cmd)
204
205
206 class DebianPackageSuite(DistroPackageSuite):
207     APT_SCRIPT = """
208 set -e
209 cd "$1"; shift
210 DISTNAME=$1; shift
211 # aptly implements its own locking, but its wait strategy as of April 2024 is
212 # not patient enough to accommodate multiple simultaneous uploads.
213 APTLY_LOCK="${XDG_RUNTIME_DIR:-/tmp}/aptly-upload.lock"
214 aptly() {
215   flock --wait=300 "$APTLY_LOCK" aptly "$@"
216 }
217 for package in "$@"; do
218   if aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1; then
219     echo "Not adding $package, it is already present in repo $DISTNAME"
220     rm "$package"
221   else
222     aptly repo add -remove-files "$DISTNAME" "$package"
223   fi
224 done
225 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
226 """
227
228     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
229         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
230         self.TARGET_DISTNAMES = {
231             'debian10': 'buster-'+repo,
232             'debian11': 'bullseye-'+repo,
233             'debian12': 'bookworm-'+repo,
234             'ubuntu1804': 'bionic-'+repo,
235             'ubuntu2004': 'focal-'+repo,
236             'ubuntu2204': 'jammy-'+repo,
237             }
238
239     def post_uploads(self, paths):
240         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
241                          self.TARGET_DISTNAMES[self.target],
242                          *self._paths_basenames(paths))
243
244
245 class RedHatPackageSuite(DistroPackageSuite):
246     CREATEREPO_SCRIPT = """
247 cd "$1"; shift
248 REPODIR=$1; shift
249 rpmsign --addsign "$@" </dev/null
250 mv "$@" "$REPODIR"
251 createrepo_c -c ~/.createrepo-cache --update "$REPODIR"
252 """
253     REPO_ROOT = '/var/www/rpm.arvados.org/'
254
255     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
256         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
257         self.TARGET_REPODIRS = {
258             'centos7': 'CentOS/7/%s/x86_64/' % repo,
259             'rocky8': 'CentOS/8/%s/x86_64/' % repo,
260         }
261
262     def post_uploads(self, paths):
263         repo_dir = os.path.join(self.REPO_ROOT,
264                                 self.TARGET_REPODIRS[self.target])
265         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
266                          repo_dir, *self._paths_basenames(paths))
267
268
269 def _define_suite(suite_class, *rel_globs, **kwargs):
270     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
271
272 PACKAGE_SUITES = {
273     'python': _define_suite(PythonPackageSuite,
274                             'sdk/python/dist/*.tar.gz',
275                             'sdk/cwl/dist/*.tar.gz',
276                             'services/fuse/dist/*.tar.gz',
277                             'tools/crunchstat-summary/dist/*.tar.gz',
278                         ),
279     'gems': _define_suite(GemPackageSuite,
280                           'sdk/ruby/*.gem',
281                           'sdk/cli/*.gem',
282                           'services/login-sync/*.gem',
283                       ),
284     }
285
286 def parse_arguments(arguments):
287     parser = argparse.ArgumentParser(
288         description="Upload Arvados packages to various repositories")
289     parser.add_argument(
290         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
291         help="Arvados source directory with built packages to upload")
292     parser.add_argument(
293         '--ssh-host', '-H',
294         help="Host specification for distribution repository server")
295     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
296                          metavar='OPTION', help="Pass option to `ssh -o`")
297     parser.add_argument('--verbose', '-v', action='count', default=0,
298                         help="Log more information and subcommand output")
299     parser.add_argument(
300         '--repo', choices=['dev', 'testing'],
301         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
302
303     parser.add_argument(
304         'targets', nargs='*', default=['all'], metavar='target',
305         help="Upload packages to these targets (default all)\nAvailable targets: " +
306         ', '.join(sorted(PACKAGE_SUITES.keys())))
307     args = parser.parse_args(arguments)
308     if 'all' in args.targets:
309         args.targets = list(PACKAGE_SUITES.keys())
310
311     if args.workspace is None:
312         parser.error("workspace not set from command line or environment")
313
314     for target in [
315             'debian10', 'debian11', 'debian12',
316             'ubuntu1804', 'ubuntu2004', 'ubuntu2204',
317     ]:
318         PACKAGE_SUITES[target] = _define_suite(
319             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
320             target=target, repo=args.repo)
321     for target in ['centos7', 'rocky8']:
322         PACKAGE_SUITES[target] = _define_suite(
323             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
324             target=target, repo=args.repo)
325
326     for target in args.targets:
327         try:
328             suite_class = PACKAGE_SUITES[target].func
329         except KeyError:
330             parser.error("unrecognized target {!r}".format(target))
331         if suite_class.NEED_SSH and (args.ssh_host is None):
332             parser.error(
333                 "--ssh-host must be specified to upload distribution packages")
334     return args
335
336 def setup_logger(stream_dest, args):
337     log_handler = logging.StreamHandler(stream_dest)
338     log_handler.setFormatter(logging.Formatter(
339             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
340             '%Y-%m-%d %H:%M:%S'))
341     logger = logging.getLogger('arvados-dev.upload')
342     logger.addHandler(log_handler)
343     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
344
345 def build_suite_and_upload(target, since_timestamp, args):
346     suite_def = PACKAGE_SUITES[target]
347     kwargs = {}
348     if suite_def.func.NEED_SSH:
349         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
350     suite = suite_def(args.workspace, **kwargs)
351     suite.update_packages(since_timestamp)
352
353 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
354     args = parse_arguments(arguments)
355     setup_logger(stderr, args)
356
357     for target in args.targets:
358         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
359                                              '.last_upload_%s' % target))
360         last_upload_ts = ts_file.last_upload()
361         build_suite_and_upload(target, last_upload_ts, args)
362         ts_file.update()
363
364 if __name__ == '__main__':
365     main(sys.argv[1:])