#!/usr/bin/env python3 # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: AGPL-3.0 import argparse import errno import functools import glob import locale import logging import os import pipes import re import shutil import subprocess import sys import time def run_and_grep(cmd, read_output, *regexps, encoding=locale.getpreferredencoding(), **popen_kwargs): """Run a subprocess and capture output lines matching regexps. Arguments: * cmd: The command to run, as a list or string, as for subprocess.Popen. * read_output: 'stdout' or 'stderr', the name of the output stream to read. Remaining arguments are regexps to match output, as strings or compiled regexp objects. Output lines matching any regexp will be captured. Keyword arguments: * encoding: The encoding used to decode the subprocess output. Remaining keyword arguments are passed directly to subprocess.Popen. Returns 2-tuple (subprocess returncode, list of matched output lines). """ regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp) for regexp in regexps] popen_kwargs[read_output] = subprocess.PIPE proc = subprocess.Popen(cmd, **popen_kwargs) with open(getattr(proc, read_output).fileno(), encoding=encoding) as output: matched_lines = [] for line in output: if any(regexp.search(line) for regexp in regexps): matched_lines.append(line) if read_output == 'stderr': print(line, file=sys.stderr, end='') return proc.wait(), matched_lines class TimestampFile: def __init__(self, path): self.path = path # Make sure the dirname for `path` exists p = os.path.dirname(path) try: os.makedirs(p) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(p): pass else: raise self.start_time = time.time() def last_upload(self): try: return os.path.getmtime(self.path) except EnvironmentError: return -1 def update(self): try: os.close(os.open(self.path, os.O_CREAT | os.O_APPEND)) os.utime(self.path, (time.time(), self.start_time)) except: # when the packages directory is created/populated by a build in a # docker container, as root, the script that runs the upload # doesn't always have permission to touch a timestamp file there. # In production, we build/upload from ephemeral machines, which # means that the timestamp mechanism is not used. We print a # warning and move on without erroring out. print("Warning: unable to update timestamp file",self.path,"permission problem?") pass class PackageSuite: NEED_SSH = False def __init__(self, glob_root, rel_globs): logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root)) self.logger = logging.getLogger('arvados-dev.upload.' + logger_part) self.globs = [os.path.join(glob_root, rel_glob) for rel_glob in rel_globs] def files_to_upload(self, since_timestamp): for abs_glob in self.globs: for path in glob.glob(abs_glob): if os.path.getmtime(path) >= since_timestamp: yield path def upload_file(self, path): raise NotImplementedError("PackageSuite.upload_file") def upload_files(self, paths): for path in paths: self.logger.info("Uploading %s", path) self.upload_file(path) def post_uploads(self, paths): pass def update_packages(self, since_timestamp): upload_paths = list(self.files_to_upload(since_timestamp)) if upload_paths: self.upload_files(upload_paths) self.post_uploads(upload_paths) class PythonPackageSuite(PackageSuite): LOGGER_PART = 'python' REUPLOAD_REGEXPS = [ re.compile( r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'), re.compile( r'^error: Upload failed \(400\): File already exists\b'), re.compile( r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'), ] def __init__(self, glob_root, rel_globs): super().__init__(glob_root, rel_globs) self.seen_packages = set() def upload_file(self, path): src_dir = os.path.dirname(os.path.dirname(path)) if src_dir in self.seen_packages: return self.seen_packages.add(src_dir) # We also must run `sdist` before `upload`: `upload` uploads any # distributions previously generated in the command. It doesn't # know how to upload distributions already on disk. We write the # result to a dedicated directory to avoid interfering with our # timestamp tracking. cmd = ['python3', 'setup.py'] if not self.logger.isEnabledFor(logging.INFO): cmd.append('--quiet') cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist']) cmd.extend(['sdist', '--dist-dir', '.upload_dist']) cmd.extend(['upload']) upload_returncode, repushed = run_and_grep( cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir) if (upload_returncode != 0) and not repushed: raise subprocess.CalledProcessError(upload_returncode, cmd) shutil.rmtree(os.path.join(src_dir, '.upload_dist')) class GemPackageSuite(PackageSuite): LOGGER_PART = 'gems' REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$') def upload_file(self, path): cmd = ['gem', 'push', path] push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP) if (push_returncode != 0) and not repushed: raise subprocess.CalledProcessError(push_returncode, cmd) class DistroPackageSuite(PackageSuite): NEED_SSH = True REMOTE_DEST_DIR = 'tmp' def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts): super().__init__(glob_root, rel_globs) self.target = target self.ssh_host = ssh_host self.ssh_opts = ['-o' + opt for opt in ssh_opts] if not self.logger.isEnabledFor(logging.INFO): self.ssh_opts.append('-q') def _build_cmd(self, base_cmd, *args): cmd = [base_cmd] cmd.extend(self.ssh_opts) cmd.extend(args) return cmd def _paths_basenames(self, paths): return (os.path.basename(path) for path in paths) def _run_script(self, script, *args): # SSH will use a shell to run our bash command, so we have to # quote our arguments. # self.__class__.__name__ provides $0 for the script, which makes a # nicer message if there's an error. subprocess.check_call(self._build_cmd( 'ssh', self.ssh_host, 'bash', '-ec', pipes.quote(script), self.__class__.__name__, *(pipes.quote(s) for s in args))) def upload_files(self, paths): dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target) mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir) subprocess.check_call(mkdir) cmd = self._build_cmd('scp', *paths) cmd.append('{}:{}'.format(self.ssh_host, dest_dir)) subprocess.check_call(cmd) class DebianPackageSuite(DistroPackageSuite): APT_SCRIPT = """ cd "$1"; shift DISTNAME=$1; shift for package in "$@"; do set +e aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1 RET=$? set -e if [[ $RET -eq 0 ]]; then echo "Not adding $package, it is already present in repo $DISTNAME" rm "$package" else aptly repo add -remove-files "$DISTNAME" "$package" fi done aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}": """ def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo): super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts) self.TARGET_DISTNAMES = { 'debian10': 'buster-'+repo, 'debian11': 'bullseye-'+repo, 'ubuntu1804': 'bionic-'+repo, 'ubuntu2004': 'focal-'+repo, } def post_uploads(self, paths): self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target, self.TARGET_DISTNAMES[self.target], *self._paths_basenames(paths)) class RedHatPackageSuite(DistroPackageSuite): CREATEREPO_SCRIPT = """ cd "$1"; shift REPODIR=$1; shift rpmsign --addsign "$@"