Script to publish R package fails fast and accepts host key
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import re
15 import shlex
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         try:
73             os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74             os.utime(self.path, (time.time(), self.start_time))
75         except:
76             # when the packages directory is created/populated by a build in a
77             # docker container, as root, the script that runs the upload
78             # doesn't always have permission to touch a timestamp file there.
79             # In production, we build/upload from ephemeral machines, which
80             # means that the timestamp mechanism is not used. We print a
81             # warning and move on without erroring out.
82             print("Warning: unable to update timestamp file",self.path,"permission problem?")
83             pass
84
85 class PackageSuite:
86     NEED_SSH = False
87
88     def __init__(self, glob_root, rel_globs):
89         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91         self.globs = [os.path.join(glob_root, rel_glob)
92                       for rel_glob in rel_globs]
93
94     def files_to_upload(self, since_timestamp):
95         for abs_glob in self.globs:
96             for path in glob.glob(abs_glob):
97                 if os.path.getmtime(path) >= since_timestamp:
98                     yield path
99
100     def upload_file(self, path):
101         raise NotImplementedError("PackageSuite.upload_file")
102
103     def upload_files(self, paths):
104         for path in paths:
105             self.logger.info("Uploading %s", path)
106             self.upload_file(path)
107
108     def post_uploads(self, paths):
109         pass
110
111     def update_packages(self, since_timestamp):
112         upload_paths = list(self.files_to_upload(since_timestamp))
113         if upload_paths:
114             self.upload_files(upload_paths)
115             self.post_uploads(upload_paths)
116
117
118 class PythonPackageSuite(PackageSuite):
119     LOGGER_PART = 'python'
120
121     def upload_file(self, path):
122         subprocess.run([
123             'twine', 'upload',
124             '--disable-progress-bar',
125             '--non-interactive',
126             '--skip-existing',
127             path,
128         ], stdin=subprocess.DEVNULL, check=True)
129
130
131 class GemPackageSuite(PackageSuite):
132     LOGGER_PART = 'gems'
133     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
134
135     def upload_file(self, path):
136         cmd = ['gem', 'push', path]
137         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
138         if (push_returncode != 0) and not repushed:
139             raise subprocess.CalledProcessError(push_returncode, cmd)
140
141
142 class DistroPackageSuite(PackageSuite):
143     NEED_SSH = True
144     REMOTE_DEST_DIR = 'tmp'
145
146     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
147         super().__init__(glob_root, rel_globs)
148         self.target = target
149         self.ssh_host = ssh_host
150         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
151         if not self.logger.isEnabledFor(logging.INFO):
152             self.ssh_opts.append('-q')
153
154     def _build_cmd(self, base_cmd, *args):
155         cmd = [base_cmd]
156         cmd.extend(self.ssh_opts)
157         cmd.extend(args)
158         return cmd
159
160     def _paths_basenames(self, paths):
161         return (os.path.basename(path) for path in paths)
162
163     def _run_script(self, script, *args):
164         # SSH will use a shell to run our bash command, so we have to
165         # quote our arguments.
166         # self.__class__.__name__ provides $0 for the script, which makes a
167         # nicer message if there's an error.
168         subprocess.check_call(self._build_cmd(
169                 'ssh', self.ssh_host, 'bash', '-ec', shlex.quote(script),
170                 self.__class__.__name__, *(shlex.quote(s) for s in args)))
171
172     def upload_files(self, paths):
173         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
174         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
175         subprocess.check_call(mkdir)
176         cmd = self._build_cmd('scp', *paths)
177         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
178         subprocess.check_call(cmd)
179
180
181 class DebianPackageSuite(DistroPackageSuite):
182     APT_SCRIPT = """
183 set -e
184 cd "$1"; shift
185 DISTNAME=$1; shift
186 # aptly implements its own locking, but its wait strategy as of April 2024 is
187 # not patient enough to accommodate multiple simultaneous uploads.
188 APTLY_LOCK="${XDG_RUNTIME_DIR:-/tmp}/aptly-upload.lock"
189 aptly() {
190   flock --wait=300 "$APTLY_LOCK" aptly "$@"
191 }
192 for package in "$@"; do
193   if aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1; then
194     echo "Not adding $package, it is already present in repo $DISTNAME"
195     rm "$package"
196   else
197     aptly repo add -remove-files "$DISTNAME" "$package"
198   fi
199 done
200 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
201 """
202
203     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
204         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
205         self.TARGET_DISTNAMES = {
206             'debian10': 'buster-'+repo,
207             'debian11': 'bullseye-'+repo,
208             'debian12': 'bookworm-'+repo,
209             'ubuntu1804': 'bionic-'+repo,
210             'ubuntu2004': 'focal-'+repo,
211             'ubuntu2204': 'jammy-'+repo,
212             'ubuntu2404': 'noble-'+repo,
213             }
214
215     def post_uploads(self, paths):
216         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
217                          self.TARGET_DISTNAMES[self.target],
218                          *self._paths_basenames(paths))
219
220
221 class RedHatPackageSuite(DistroPackageSuite):
222     CREATEREPO_SCRIPT = """
223 cd "$1"; shift
224 REPODIR=$1; shift
225 rpmsign --addsign "$@" </dev/null
226 mv "$@" "$REPODIR"
227 createrepo_c -c ~/.createrepo-cache --update "$REPODIR"
228 """
229     REPO_ROOT = '/var/www/rpm.arvados.org/'
230
231     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
232         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
233         self.TARGET_REPODIRS = {
234             'centos7': 'RHEL/7/%s/x86_64/' % repo,
235             'rocky8': 'RHEL/8/%s/x86_64/' % repo,
236         }
237
238     def post_uploads(self, paths):
239         repo_dir = os.path.join(self.REPO_ROOT,
240                                 self.TARGET_REPODIRS[self.target])
241         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
242                          repo_dir, *self._paths_basenames(paths))
243
244
245 def _define_suite(suite_class, *rel_globs, **kwargs):
246     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
247
248 PACKAGE_SUITES = {
249     'python': _define_suite(PythonPackageSuite,
250                             'sdk/cwl/dist/*.tar.gz',
251                             'sdk/cwl/dist/*.whl',
252                             'sdk/python/dist/*.tar.gz',
253                             'sdk/python/dist/*.whl',
254                             'services/fuse/dist/*.tar.gz',
255                             'services/fuse/dist/*.whl',
256                             'tools/crunchstat-summary/dist/*.tar.gz',
257                             'tools/crunchstat-summary/dist/*.whl',
258                         ),
259     'gems': _define_suite(GemPackageSuite,
260                           'sdk/ruby-google-api-client/*.gem',
261                           'sdk/ruby/*.gem',
262                           'sdk/cli/*.gem',
263                           'services/login-sync/*.gem',
264                       ),
265     }
266
267 def parse_arguments(arguments):
268     parser = argparse.ArgumentParser(
269         description="Upload Arvados packages to various repositories")
270     parser.add_argument(
271         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
272         help="Arvados source directory with built packages to upload")
273     parser.add_argument(
274         '--ssh-host', '-H',
275         help="Host specification for distribution repository server")
276     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
277                          metavar='OPTION', help="Pass option to `ssh -o`")
278     parser.add_argument('--verbose', '-v', action='count', default=0,
279                         help="Log more information and subcommand output")
280     parser.add_argument(
281         '--repo', choices=['dev', 'testing'],
282         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
283
284     parser.add_argument(
285         'targets', nargs='*', default=['all'], metavar='target',
286         help="Upload packages to these targets (default all)\nAvailable targets: " +
287         ', '.join(sorted(PACKAGE_SUITES.keys())))
288     args = parser.parse_args(arguments)
289     if 'all' in args.targets:
290         args.targets = list(PACKAGE_SUITES.keys())
291
292     if args.workspace is None:
293         parser.error("workspace not set from command line or environment")
294
295     for target in [
296             'debian10', 'debian11', 'debian12',
297             'ubuntu1804', 'ubuntu2004', 'ubuntu2204', 'ubuntu2404',
298     ]:
299         PACKAGE_SUITES[target] = _define_suite(
300             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
301             target=target, repo=args.repo)
302     for target in ['centos7', 'rocky8']:
303         PACKAGE_SUITES[target] = _define_suite(
304             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
305             target=target, repo=args.repo)
306
307     for target in args.targets:
308         try:
309             suite_class = PACKAGE_SUITES[target].func
310         except KeyError:
311             parser.error("unrecognized target {!r}".format(target))
312         if suite_class.NEED_SSH and (args.ssh_host is None):
313             parser.error(
314                 "--ssh-host must be specified to upload distribution packages")
315     return args
316
317 def setup_logger(stream_dest, args):
318     log_handler = logging.StreamHandler(stream_dest)
319     log_handler.setFormatter(logging.Formatter(
320             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
321             '%Y-%m-%d %H:%M:%S'))
322     logger = logging.getLogger('arvados-dev.upload')
323     logger.addHandler(log_handler)
324     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
325
326 def build_suite_and_upload(target, since_timestamp, args):
327     suite_def = PACKAGE_SUITES[target]
328     kwargs = {}
329     if suite_def.func.NEED_SSH:
330         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
331     suite = suite_def(args.workspace, **kwargs)
332     suite.update_packages(since_timestamp)
333
334 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
335     args = parse_arguments(arguments)
336     setup_logger(stderr, args)
337
338     for target in args.targets:
339         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
340                                              '.last_upload_%s' % target))
341         last_upload_ts = ts_file.last_upload()
342         build_suite_and_upload(target, last_upload_ts, args)
343         ts_file.update()
344
345 if __name__ == '__main__':
346     main(sys.argv[1:])