19092: upload crunchstat_summary to Pypi
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6
7 import argparse
8 import errno
9 import functools
10 import glob
11 import locale
12 import logging
13 import os
14 import pipes
15 import re
16 import shutil
17 import subprocess
18 import sys
19 import time
20
21 def run_and_grep(cmd, read_output, *regexps,
22                  encoding=locale.getpreferredencoding(), **popen_kwargs):
23     """Run a subprocess and capture output lines matching regexps.
24
25     Arguments:
26     * cmd: The command to run, as a list or string, as for subprocess.Popen.
27     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28     Remaining arguments are regexps to match output, as strings or compiled
29     regexp objects.  Output lines matching any regexp will be captured.
30
31     Keyword arguments:
32     * encoding: The encoding used to decode the subprocess output.
33     Remaining keyword arguments are passed directly to subprocess.Popen.
34
35     Returns 2-tuple (subprocess returncode, list of matched output lines).
36     """
37     regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38                for regexp in regexps]
39     popen_kwargs[read_output] = subprocess.PIPE
40     proc = subprocess.Popen(cmd, **popen_kwargs)
41     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
42         matched_lines = []
43         for line in output:
44             if any(regexp.search(line) for regexp in regexps):
45                 matched_lines.append(line)
46             if read_output == 'stderr':
47                 print(line, file=sys.stderr, end='')
48     return proc.wait(), matched_lines
49
50
51 class TimestampFile:
52     def __init__(self, path):
53         self.path = path
54         # Make sure the dirname for `path` exists
55         p = os.path.dirname(path)
56         try:
57             os.makedirs(p)
58         except OSError as exc:
59             if exc.errno == errno.EEXIST and os.path.isdir(p):
60                 pass
61             else:
62                 raise
63         self.start_time = time.time()
64
65     def last_upload(self):
66         try:
67             return os.path.getmtime(self.path)
68         except EnvironmentError:
69             return -1
70
71     def update(self):
72         try:
73             os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74             os.utime(self.path, (time.time(), self.start_time))
75         except:
76             # when the packages directory is created/populated by a build in a
77             # docker container, as root, the script that runs the upload
78             # doesn't always have permission to touch a timestamp file there.
79             # In production, we build/upload from ephemeral machines, which
80             # means that the timestamp mechanism is not used. We print a
81             # warning and move on without erroring out.
82             print("Warning: unable to update timestamp file",self.path,"permission problem?")
83             pass
84
85 class PackageSuite:
86     NEED_SSH = False
87
88     def __init__(self, glob_root, rel_globs):
89         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91         self.globs = [os.path.join(glob_root, rel_glob)
92                       for rel_glob in rel_globs]
93
94     def files_to_upload(self, since_timestamp):
95         for abs_glob in self.globs:
96             for path in glob.glob(abs_glob):
97                 if os.path.getmtime(path) >= since_timestamp:
98                     yield path
99
100     def upload_file(self, path):
101         raise NotImplementedError("PackageSuite.upload_file")
102
103     def upload_files(self, paths):
104         for path in paths:
105             self.logger.info("Uploading %s", path)
106             self.upload_file(path)
107
108     def post_uploads(self, paths):
109         pass
110
111     def update_packages(self, since_timestamp):
112         upload_paths = list(self.files_to_upload(since_timestamp))
113         if upload_paths:
114             self.upload_files(upload_paths)
115             self.post_uploads(upload_paths)
116
117
118 class PythonPackageSuite(PackageSuite):
119     LOGGER_PART = 'python'
120     REUPLOAD_REGEXPS = [
121         re.compile(
122             r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
123         re.compile(
124             r'^error: Upload failed \(400\): File already exists\b'),
125         re.compile(
126             r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
127     ]
128
129     def __init__(self, glob_root, rel_globs):
130         super().__init__(glob_root, rel_globs)
131         self.seen_packages = set()
132
133     def upload_file(self, path):
134         src_dir = os.path.dirname(os.path.dirname(path))
135         if src_dir in self.seen_packages:
136             return
137         self.seen_packages.add(src_dir)
138         # We also must run `sdist` before `upload`: `upload` uploads any
139         # distributions previously generated in the command.  It doesn't
140         # know how to upload distributions already on disk.  We write the
141         # result to a dedicated directory to avoid interfering with our
142         # timestamp tracking.
143         cmd = ['python3', 'setup.py']
144         if not self.logger.isEnabledFor(logging.INFO):
145             cmd.append('--quiet')
146         cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
147         cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
148         cmd.extend(['upload'])
149         upload_returncode, repushed = run_and_grep(
150             cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
151         if (upload_returncode != 0) and not repushed:
152             raise subprocess.CalledProcessError(upload_returncode, cmd)
153         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
154
155
156 class GemPackageSuite(PackageSuite):
157     LOGGER_PART = 'gems'
158     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
159
160     def upload_file(self, path):
161         cmd = ['gem', 'push', path]
162         push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
163         if (push_returncode != 0) and not repushed:
164             raise subprocess.CalledProcessError(push_returncode, cmd)
165
166
167 class DistroPackageSuite(PackageSuite):
168     NEED_SSH = True
169     REMOTE_DEST_DIR = 'tmp'
170
171     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
172         super().__init__(glob_root, rel_globs)
173         self.target = target
174         self.ssh_host = ssh_host
175         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
176         if not self.logger.isEnabledFor(logging.INFO):
177             self.ssh_opts.append('-q')
178
179     def _build_cmd(self, base_cmd, *args):
180         cmd = [base_cmd]
181         cmd.extend(self.ssh_opts)
182         cmd.extend(args)
183         return cmd
184
185     def _paths_basenames(self, paths):
186         return (os.path.basename(path) for path in paths)
187
188     def _run_script(self, script, *args):
189         # SSH will use a shell to run our bash command, so we have to
190         # quote our arguments.
191         # self.__class__.__name__ provides $0 for the script, which makes a
192         # nicer message if there's an error.
193         subprocess.check_call(self._build_cmd(
194                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
195                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
196
197     def upload_files(self, paths):
198         dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
199         mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
200         subprocess.check_call(mkdir)
201         cmd = self._build_cmd('scp', *paths)
202         cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
203         subprocess.check_call(cmd)
204
205
206 class DebianPackageSuite(DistroPackageSuite):
207     APT_SCRIPT = """
208 cd "$1"; shift
209 DISTNAME=$1; shift
210 for package in "$@"; do
211   set +e
212   aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
213   RET=$?
214   set -e
215   if [[ $RET -eq 0 ]]; then
216     echo "Not adding $package, it is already present in repo $DISTNAME"
217     rm "$package"
218   else
219     aptly repo add -remove-files "$DISTNAME" "$package"
220   fi
221 done
222 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
223 """
224
225     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
226         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
227         self.TARGET_DISTNAMES = {
228             'debian10': 'buster-'+repo,
229             'debian11': 'bullseye-'+repo,
230             'ubuntu1804': 'bionic-'+repo,
231             'ubuntu2004': 'focal-'+repo,
232             }
233
234     def post_uploads(self, paths):
235         self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
236                          self.TARGET_DISTNAMES[self.target],
237                          *self._paths_basenames(paths))
238
239
240 class RedHatPackageSuite(DistroPackageSuite):
241     CREATEREPO_SCRIPT = """
242 cd "$1"; shift
243 REPODIR=$1; shift
244 rpmsign --addsign "$@" </dev/null
245 mv "$@" "$REPODIR"
246 createrepo -c ~/.createrepo-cache --update "$REPODIR"
247 """
248     REPO_ROOT = '/var/www/rpm.arvados.org/'
249
250     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
251         super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
252         self.TARGET_REPODIRS = {
253             'centos7': 'CentOS/7/%s/x86_64/' % repo,
254         }
255
256     def post_uploads(self, paths):
257         repo_dir = os.path.join(self.REPO_ROOT,
258                                 self.TARGET_REPODIRS[self.target])
259         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
260                          repo_dir, *self._paths_basenames(paths))
261
262
263 def _define_suite(suite_class, *rel_globs, **kwargs):
264     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
265
266 PACKAGE_SUITES = {
267     'python': _define_suite(PythonPackageSuite,
268                             'sdk/python/dist/*.tar.gz',
269                             'sdk/cwl/dist/*.tar.gz',
270                             'services/fuse/dist/*.tar.gz',
271                             'tools/crunchstat-summary/dist/*.tar.gz',
272                         ),
273     'gems': _define_suite(GemPackageSuite,
274                           'sdk/ruby/*.gem',
275                           'sdk/cli/*.gem',
276                           'services/login-sync/*.gem',
277                       ),
278     }
279
280 def parse_arguments(arguments):
281     parser = argparse.ArgumentParser(
282         description="Upload Arvados packages to various repositories")
283     parser.add_argument(
284         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
285         help="Arvados source directory with built packages to upload")
286     parser.add_argument(
287         '--ssh-host', '-H',
288         help="Host specification for distribution repository server")
289     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
290                          metavar='OPTION', help="Pass option to `ssh -o`")
291     parser.add_argument('--verbose', '-v', action='count', default=0,
292                         help="Log more information and subcommand output")
293     parser.add_argument(
294         '--repo', choices=['dev', 'testing'],
295         help="Whether to upload to dev (nightly) or testing (release candidate) repository")
296
297     parser.add_argument(
298         'targets', nargs='*', default=['all'], metavar='target',
299         help="Upload packages to these targets (default all)\nAvailable targets: " +
300         ', '.join(sorted(PACKAGE_SUITES.keys())))
301     args = parser.parse_args(arguments)
302     if 'all' in args.targets:
303         args.targets = list(PACKAGE_SUITES.keys())
304
305     if args.workspace is None:
306         parser.error("workspace not set from command line or environment")
307
308     for target in ['debian10', 'debian11', 'ubuntu1804', 'ubuntu2004']:
309         PACKAGE_SUITES[target] = _define_suite(
310             DebianPackageSuite, os.path.join('packages', target, '*.deb'),
311             target=target, repo=args.repo)
312     for target in ['centos7']:
313         PACKAGE_SUITES[target] = _define_suite(
314             RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
315             target=target, repo=args.repo)
316
317     for target in args.targets:
318         try:
319             suite_class = PACKAGE_SUITES[target].func
320         except KeyError:
321             parser.error("unrecognized target {!r}".format(target))
322         if suite_class.NEED_SSH and (args.ssh_host is None):
323             parser.error(
324                 "--ssh-host must be specified to upload distribution packages")
325     return args
326
327 def setup_logger(stream_dest, args):
328     log_handler = logging.StreamHandler(stream_dest)
329     log_handler.setFormatter(logging.Formatter(
330             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
331             '%Y-%m-%d %H:%M:%S'))
332     logger = logging.getLogger('arvados-dev.upload')
333     logger.addHandler(log_handler)
334     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
335
336 def build_suite_and_upload(target, since_timestamp, args):
337     suite_def = PACKAGE_SUITES[target]
338     kwargs = {}
339     if suite_def.func.NEED_SSH:
340         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
341     suite = suite_def(args.workspace, **kwargs)
342     suite.update_packages(since_timestamp)
343
344 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
345     args = parse_arguments(arguments)
346     setup_logger(stderr, args)
347
348     for target in args.targets:
349         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
350                                              '.last_upload_%s' % target))
351         last_upload_ts = ts_file.last_upload()
352         build_suite_and_upload(target, last_upload_ts, args)
353         ts_file.update()
354
355 if __name__ == '__main__':
356     main(sys.argv[1:])