9309: run_upload_packages checks for re-upload errors from PyPI.
[arvados-dev.git] / jenkins / run_upload_packages.py
1 #!/usr/bin/env python3
2
3 import argparse
4 import functools
5 import glob
6 import locale
7 import logging
8 import os
9 import pipes
10 import re
11 import shutil
12 import subprocess
13 import sys
14 import time
15
16 def run_and_scan_output(cmd, read_output, *line_matchers,
17                         encoding=locale.getpreferredencoding(), **popen_kwargs):
18     """Run a subprocess and capture output lines matching regexps.
19
20     Arguments:
21     * cmd: The command to run, as a list or string, as for subprocess.Popen.
22     * read_output: 'stdout' or 'stderr', the name of the output stream to read.
23     Remaining arguments are regexps to match output, as strings or compiled
24     regexp objects.  Output lines matching any regexp will be captured.
25
26     Keyword arguments:
27     * encoding: The encoding used to decode the subprocess output.
28     Remaining keyword arguments are passed directly to subprocess.Popen.
29
30     Returns 2-tuple (subprocess returncode, list of matched output lines).
31     """
32     line_matchers = [matcher if hasattr(matcher, 'search') else re.compile(matcher)
33                      for matcher in line_matchers]
34     popen_kwargs[read_output] = subprocess.PIPE
35     proc = subprocess.Popen(cmd, **popen_kwargs)
36     with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
37         matched_lines = [line for line in output
38                          if any(regexp.search(line) for regexp in line_matchers)]
39     return proc.wait(), matched_lines
40
41
42 class TimestampFile:
43     def __init__(self, path):
44         self.path = path
45         self.start_time = time.time()
46
47     def last_upload(self):
48         try:
49             return os.path.getmtime(self.path)
50         except EnvironmentError:
51             return -1
52
53     def update(self):
54         os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
55         os.utime(self.path, (time.time(), self.start_time))
56
57
58 class PackageSuite:
59     NEED_SSH = False
60
61     def __init__(self, glob_root, rel_globs):
62         logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
63         self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
64         self.globs = [os.path.join(glob_root, rel_glob)
65                       for rel_glob in rel_globs]
66
67     def files_to_upload(self, since_timestamp):
68         for abs_glob in self.globs:
69             for path in glob.glob(abs_glob):
70                 if os.path.getmtime(path) >= since_timestamp:
71                     yield path
72
73     def upload_file(self, path):
74         raise NotImplementedError("PackageSuite.upload_file")
75
76     def upload_files(self, paths):
77         for path in paths:
78             self.logger.info("Uploading %s", path)
79             self.upload_file(path)
80
81     def post_uploads(self, paths):
82         pass
83
84     def update_packages(self, since_timestamp):
85         upload_paths = list(self.files_to_upload(since_timestamp))
86         if upload_paths:
87             self.upload_files(upload_paths)
88             self.post_uploads(upload_paths)
89
90
91 class PythonPackageSuite(PackageSuite):
92     LOGGER_PART = 'python'
93     REUPLOAD_REGEXP = re.compile(
94         r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b')
95
96     def __init__(self, glob_root, rel_globs):
97         super().__init__(glob_root, rel_globs)
98         self.seen_packages = set()
99
100     def upload_file(self, path):
101         src_dir = os.path.dirname(os.path.dirname(path))
102         if src_dir in self.seen_packages:
103             return
104         self.seen_packages.add(src_dir)
105         # NOTE: If we ever start uploading Python 3 packages, we'll need to
106         # figure out some way to adapt cmd to match.  It might be easiest
107         # to give all our setup.py files the executable bit, and run that
108         # directly.
109         # We also must run `sdist` before `upload`: `upload` uploads any
110         # distributions previously generated in the command.  It doesn't
111         # know how to upload distributions already on disk.  We write the
112         # result to a dedicated directory to avoid interfering with our
113         # timestamp tracking.
114         cmd = ['python2.7', 'setup.py']
115         if not self.logger.isEnabledFor(logging.INFO):
116             cmd.append('--quiet')
117         cmd.extend(['sdist', '--dist-dir', '.upload_dist', 'upload'])
118         upload_returncode, repushed = run_and_scan_output(
119             cmd, 'stderr', self.REUPLOAD_REGEXP, cwd=src_dir)
120         if (upload_returncode != 0) and not repushed:
121             raise subprocess.CalledProcessError(upload_returncode, cmd)
122         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
123
124
125 class GemPackageSuite(PackageSuite):
126     LOGGER_PART = 'gems'
127     REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
128
129     def upload_file(self, path):
130         cmd = ['gem', 'push', path]
131         push_returncode, repushed = run_and_scan_output(
132             cmd, 'stdout', self.REUPLOAD_REGEXP)
133         if (push_returncode != 0) and not repushed:
134             raise subprocess.CalledProcessError(push_returncode, cmd)
135
136
137 class DistroPackageSuite(PackageSuite):
138     NEED_SSH = True
139     REMOTE_DEST_DIR = 'tmp'
140
141     def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
142         super().__init__(glob_root, rel_globs)
143         self.target = target
144         self.ssh_host = ssh_host
145         self.ssh_opts = ['-o' + opt for opt in ssh_opts]
146         if not self.logger.isEnabledFor(logging.INFO):
147             self.ssh_opts.append('-q')
148
149     def _build_cmd(self, base_cmd, *args):
150         cmd = [base_cmd]
151         cmd.extend(self.ssh_opts)
152         cmd.extend(args)
153         return cmd
154
155     def _paths_basenames(self, paths):
156         return (os.path.basename(path) for path in paths)
157
158     def _run_script(self, script, *args):
159         # SSH will use a shell to run our bash command, so we have to
160         # quote our arguments.
161         # self.__class__.__name__ provides $0 for the script, which makes a
162         # nicer message if there's an error.
163         subprocess.check_call(self._build_cmd(
164                 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script),
165                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
166
167     def upload_files(self, paths):
168         cmd = self._build_cmd('scp', *paths)
169         cmd.append('{self.ssh_host}:{self.REMOTE_DEST_DIR}/{self.target}'.format(self=self))
170         subprocess.check_call(cmd)
171
172
173 class DebianPackageSuite(DistroPackageSuite):
174     FREIGHT_SCRIPT = """
175 cd "$1"; shift
176 DISTNAME=$1; shift
177 freight add "$@" "apt/$DISTNAME"
178 freight cache "apt/$DISTNAME"
179 rm "$@"
180 """
181     TARGET_DISTNAMES = {
182         'debian7': 'wheezy',
183         'debian8': 'jessie',
184         'ubuntu1204': 'precise',
185         'ubuntu1404': 'trusty',
186         }
187
188     def post_uploads(self, paths):
189         self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
190                          self.TARGET_DISTNAMES[self.target],
191                          *self._paths_basenames(paths))
192
193
194 class RedHatPackageSuite(DistroPackageSuite):
195     CREATEREPO_SCRIPT = """
196 cd "$1"; shift
197 REPODIR=$1; shift
198 rpmsign --addsign "$@" </dev/null
199 mv "$@" "$REPODIR"
200 createrepo "$REPODIR"
201 """
202     REPO_ROOT = '/var/www/rpm.arvados.org/'
203     TARGET_REPODIRS = {
204         'centos6': 'CentOS/6/os/x86_64/',
205         'centos7': 'CentOS/7/os/x86_64/',
206         }
207
208     def post_uploads(self, paths):
209         repo_dir = os.path.join(self.REPO_ROOT,
210                                 self.TARGET_REPODIRS[self.target])
211         self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
212                          repo_dir, *self._paths_basenames(paths))
213
214
215 def _define_suite(suite_class, *rel_globs, **kwargs):
216     return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
217
218 PACKAGE_SUITES = {
219     'python': _define_suite(PythonPackageSuite,
220                             'sdk/pam/dist/*.tar.gz',
221                             'sdk/python/dist/*.tar.gz',
222                             'sdk/cwl/dist/*.tar.gz',
223                             'services/nodemanager/dist/*.tar.gz',
224                             'services/fuse/dist/*.tar.gz',
225                         ),
226     'gems': _define_suite(GemPackageSuite,
227                           'sdk/ruby/*.gem',
228                           'sdk/cli/*.gem',
229                           'services/login-sync/*.gem',
230                       ),
231     }
232 for target in ['debian7', 'debian8', 'ubuntu1204', 'ubuntu1404']:
233     PACKAGE_SUITES[target] = _define_suite(
234         DebianPackageSuite, os.path.join('packages', target, '*.deb'),
235         target=target)
236 for target in ['centos6', 'centos7']:
237     PACKAGE_SUITES[target] = _define_suite(
238         RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
239         target=target)
240
241 def parse_arguments(arguments):
242     parser = argparse.ArgumentParser(
243         prog="run_upload_packages.py",
244         description="Upload Arvados packages to various repositories")
245     parser.add_argument(
246         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
247         help="Arvados source directory with built packages to upload")
248     parser.add_argument(
249         '--ssh-host', '-H',
250         help="Host specification for distribution repository server")
251     parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
252                          metavar='OPTION', help="Pass option to `ssh -o`")
253     parser.add_argument('--verbose', '-v', action='count', default=0,
254                         help="Log more information and subcommand output")
255     parser.add_argument(
256         'targets', nargs='*', default=['all'], metavar='target',
257         help="Upload packages to these targets (default all)\nAvailable targets: " +
258         ', '.join(sorted(PACKAGE_SUITES.keys())))
259     args = parser.parse_args(arguments)
260     if 'all' in args.targets:
261         args.targets = list(PACKAGE_SUITES.keys())
262
263     if args.workspace is None:
264         parser.error("workspace not set from command line or environment")
265     for target in args.targets:
266         try:
267             suite_class = PACKAGE_SUITES[target].func
268         except KeyError:
269             parser.error("unrecognized target {!r}".format(target))
270         if suite_class.NEED_SSH and (args.ssh_host is None):
271             parser.error(
272                 "--ssh-host must be specified to upload distribution packages")
273     return args
274
275 def setup_logger(stream_dest, args):
276     log_handler = logging.StreamHandler(stream_dest)
277     log_handler.setFormatter(logging.Formatter(
278             '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
279             '%Y-%m-%d %H:%M:%S'))
280     logger = logging.getLogger('arvados-dev.upload')
281     logger.addHandler(log_handler)
282     logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
283
284 def build_suite_and_upload(target, since_timestamp, args):
285     suite_def = PACKAGE_SUITES[target]
286     kwargs = {}
287     if suite_def.func.NEED_SSH:
288         kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
289     suite = suite_def(args.workspace, **kwargs)
290     suite.update_packages(since_timestamp)
291
292 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
293     args = parse_arguments(arguments)
294     setup_logger(stderr, args)
295     for target in args.targets:
296         ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
297                                              '.last_upload_%s' % target))
298         last_upload_ts = ts_file.last_upload()
299         build_suite_and_upload(target, last_upload_ts, args)
300         ts_file.update()
301
302 if __name__ == '__main__':
303     main(sys.argv[1:])