Passes arvados git hash as a envvar to the packer script. Refs #21461
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import pipes
15 import re
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         try:
73             os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74             os.utime(self.path, (time.time(), self.start_time))
75         except:
76             # when the packages directory is created/populated by a build in a
77             # docker container, as root, the script that runs the upload
78             # doesn't always have permission to touch a timestamp file there.
79             # In production, we build/upload from ephemeral machines, which
80             # means that the timestamp mechanism is not used. We print a
81             # warning and move on without erroring out.
82             print("Warning: unable to update timestamp file",self.path,"permission problem?")
83             pass
84
85 class PackageSuite:
86     NEED_SSH = False
87
88     def __init__(self, glob_root, rel_globs):
89         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91         self.globs = [os.path.join(glob_root, rel_glob)
92                       for rel_glob in rel_globs]
93
94     def files_to_upload(self, since_timestamp):
95         for abs_glob in self.globs:
96             for path in glob.glob(abs_glob):
97                 if os.path.getmtime(path) >= since_timestamp:
98                     yield path
99
100     def upload_file(self, path):
101         raise NotImplementedError("PackageSuite.upload_file")
102
103     def upload_files(self, paths):
104         for path in paths:
105             self.logger.info("Uploading %s", path)
106             self.upload_file(path)
107
108     def post_uploads(self, paths):
109         pass
110
111     def update_packages(self, since_timestamp):
112         upload_paths = list(self.files_to_upload(since_timestamp))
113         if upload_paths:
114             self.upload_files(upload_paths)
115             self.post_uploads(upload_paths)
116
117
118 class PythonPackageSuite(PackageSuite):
119     LOGGER_PART = 'python'
120     REUPLOAD_REGEXPS = [
121         re.compile(
122             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
123         re.compile(
124             r'^error: Upload failed \(400\): File already exists\b'),
125         re.compile(
126             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
127     ]
128
129     def __init__(self, glob_root, rel_globs):
130         super().__init__(glob_root, rel_globs)
131         self.seen_packages = set()
132
133     def upload_file(self, path):
134         src_dir = os.path.dirname(os.path.dirname(path))
135         if src_dir in self.seen_packages:
136             return
137         self.seen_packages.add(src_dir)
138         # We also must run `sdist` before `upload`: `upload` uploads any
139         # distributions previously generated in the command.  It doesn't
140         # know how to upload distributions already on disk.  We write the
141         # result to a dedicated directory to avoid interfering with our
142         # timestamp tracking.
143         cmd = ['python3', 'setup.py']
144         if not self.logger.isEnabledFor(logging.INFO):
145             cmd.append('--quiet')
146         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
147         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
148         cmd.extend(['upload'])
149         upload_returncode, repushed = run_and_grep(
150             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
151         if (upload_returncode != 0) and not repushed:
152             raise subprocess.CalledProcessError(upload_returncode, cmd)
153         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
154
155
156 class GemPackageSuite(PackageSuite):
157     LOGGER_PART = 'gems'
158     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
159
160     def upload_file(self, path):
161         cmd = ['gem', 'push', path]
162         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
163         if (push_returncode != 0) and not repushed:
164             raise subprocess.CalledProcessError(push_returncode, cmd)
165
166
167 class DistroPackageSuite(PackageSuite):
168     NEED_SSH = True
169     REMOTE_DEST_DIR = 'tmp'
170
171     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
172         super().__init__(glob_root, rel_globs)
173         self.target = target
174         self.ssh_host = ssh_host
175         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
176         if not self.logger.isEnabledFor(logging.INFO):
177             self.ssh_opts.append('-q')
178
179     def _build_cmd(self, base_cmd, *args):
180         cmd = [base_cmd]
181         cmd.extend(self.ssh_opts)
182         cmd.extend(args)
183         return cmd
184
185     def _paths_basenames(self, paths):
186         return (os.path.basename(path) for path in paths)
187
188     def _run_script(self, script, *args):
189         # SSH will use a shell to run our bash command, so we have to
190         # quote our arguments.
191         # self.__class__.__name__ provides $0 for the script, which makes a
192         # nicer message if there's an error.
193         subprocess.check_call(self._build_cmd(
194                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
195                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
196
197     def upload_files(self, paths):
198         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
199         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
200         subprocess.check_call(mkdir)
201         cmd = self._build_cmd('scp', *paths)
202         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
203         subprocess.check_call(cmd)
204
205
206 class DebianPackageSuite(DistroPackageSuite):
207     APT_SCRIPT = """
208 cd "$1"; shift
209 DISTNAME=$1; shift
210 for package in "$@"; do
211   set +e
212   aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
213   RET=$?
214   set -e
215   if [[ $RET -eq 0 ]]; then
216     echo "Not adding $package, it is already present in repo $DISTNAME"
217     rm "$package"
218   else
219     aptly repo add -remove-files "$DISTNAME" "$package"
220   fi
221 done
222 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
223 """
224
225     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
226         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
227         self.TARGET_DISTNAMES = {
228             'debian10': 'buster-'+repo,
229             'debian11': 'bullseye-'+repo,
230             'debian12': 'bookworm-'+repo,
231             'ubuntu1804': 'bionic-'+repo,
232             'ubuntu2004': 'focal-'+repo,
233             'ubuntu2204': 'jammy-'+repo,
234             }
235
236     def post_uploads(self, paths):
237         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
238                          self.TARGET_DISTNAMES[self.target],
239                          *self._paths_basenames(paths))
240
241
242 class RedHatPackageSuite(DistroPackageSuite):
243     CREATEREPO_SCRIPT = """
244 cd "$1"; shift
245 REPODIR=$1; shift
246 rpmsign --addsign "$@" </dev/null
247 mv "$@" "$REPODIR"
248 createrepo -c ~/.createrepo-cache --update "$REPODIR"
249 """
250     REPO_ROOT = '/var/www/rpm.arvados.org/'
251
252     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
253         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
254         self.TARGET_REPODIRS = {
255             'centos7': 'CentOS/7/%s/x86_64/' % repo,
256             'rocky8': 'CentOS/8/%s/x86_64/' % repo,
257         }
258
259     def post_uploads(self, paths):
260         repo_dir = os.path.join(self.REPO_ROOT,
261                                 self.TARGET_REPODIRS[self.target])
262         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
263                          repo_dir, *self._paths_basenames(paths))
264
265
266 def _define_suite(suite_class, *rel_globs, **kwargs):
267     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
268
269 PACKAGE_SUITES = {
270     'python': _define_suite(PythonPackageSuite,
271                             'sdk/python/dist/*.tar.gz',
272                             'sdk/cwl/dist/*.tar.gz',
273                             'services/fuse/dist/*.tar.gz',
274                             'tools/crunchstat-summary/dist/*.tar.gz',
275                         ),
276     'gems': _define_suite(GemPackageSuite,
277                           'sdk/ruby/*.gem',
278                           'sdk/cli/*.gem',
279                           'services/login-sync/*.gem',
280                       ),
281     }
282
283 def parse_arguments(arguments):
284     parser = argparse.ArgumentParser(
285         description="Upload Arvados packages to various repositories")
286     parser.add_argument(
287         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
288         help="Arvados source directory with built packages to upload")
289     parser.add_argument(
290         '--ssh-host', '-H',
291         help="Host specification for distribution repository server")
292     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
293                          metavar='OPTION', help="Pass option to `ssh -o`")
294     parser.add_argument('--verbose', '-v', action='count', default=0,
295                         help="Log more information and subcommand output")
296     parser.add_argument(
297         '--repo', choices=['dev', 'testing'],
298         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
299
300     parser.add_argument(
301         'targets', nargs='*', default=['all'], metavar='target',
302         help="Upload packages to these targets (default all)\nAvailable targets: " +
303         ', '.join(sorted(PACKAGE_SUITES.keys())))
304     args = parser.parse_args(arguments)
305     if 'all' in args.targets:
306         args.targets = list(PACKAGE_SUITES.keys())
307
308     if args.workspace is None:
309         parser.error("workspace not set from command line or environment")
310
311     for target in [
312             'debian10', 'debian11', 'debian12',
313             'ubuntu1804', 'ubuntu2004', 'ubuntu2204',
314     ]:
315         PACKAGE_SUITES[target] = _define_suite(
316             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
317             target=target, repo=args.repo)
318     for target in ['centos7', 'rocky8']:
319         PACKAGE_SUITES[target] = _define_suite(
320             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
321             target=target, repo=args.repo)
322
323     for target in args.targets:
324         try:
325             suite_class = PACKAGE_SUITES[target].func
326         except KeyError:
327             parser.error("unrecognized target {!r}".format(target))
328         if suite_class.NEED_SSH and (args.ssh_host is None):
329             parser.error(
330                 "--ssh-host must be specified to upload distribution packages")
331     return args
332
333 def setup_logger(stream_dest, args):
334     log_handler = logging.StreamHandler(stream_dest)
335     log_handler.setFormatter(logging.Formatter(
336             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
337             '%Y-%m-%d %H:%M:%S'))
338     logger = logging.getLogger('arvados-dev.upload')
339     logger.addHandler(log_handler)
340     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
341
342 def build_suite_and_upload(target, since_timestamp, args):
343     suite_def = PACKAGE_SUITES[target]
344     kwargs = {}
345     if suite_def.func.NEED_SSH:
346         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
347     suite = suite_def(args.workspace, **kwargs)
348     suite.update_packages(since_timestamp)
349
350 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
351     args = parse_arguments(arguments)
352     setup_logger(stderr, args)
353
354     for target in args.targets:
355         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
356                                              '.last_upload_%s' % target))
357         last_upload_ts = ts_file.last_upload()
358         build_suite_and_upload(target, last_upload_ts, args)
359         ts_file.update()
360
361 if __name__ == '__main__':
362     main(sys.argv[1:])