19092: upload crunchstat_summary to Pypi
[arvados-dev.git] / jenkins / run_upload_packages.py
index b0a97065380f604552186ad23c65882cc45a47be..6f695b7fc73b1acd41ecf219c36b9a6b8df7d1c0 100755 (executable)
@@ -1,19 +1,65 @@
 #!/usr/bin/env python3
 
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
 import argparse
+import errno
 import functools
 import glob
+import locale
 import logging
 import os
 import pipes
+import re
 import shutil
 import subprocess
 import sys
 import time
 
+def run_and_grep(cmd, read_output, *regexps,
+                 encoding=locale.getpreferredencoding(), **popen_kwargs):
+    """Run a subprocess and capture output lines matching regexps.
+
+    Arguments:
+    * cmd: The command to run, as a list or string, as for subprocess.Popen.
+    * read_output: 'stdout' or 'stderr', the name of the output stream to read.
+    Remaining arguments are regexps to match output, as strings or compiled
+    regexp objects.  Output lines matching any regexp will be captured.
+
+    Keyword arguments:
+    * encoding: The encoding used to decode the subprocess output.
+    Remaining keyword arguments are passed directly to subprocess.Popen.
+
+    Returns 2-tuple (subprocess returncode, list of matched output lines).
+    """
+    regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
+               for regexp in regexps]
+    popen_kwargs[read_output] = subprocess.PIPE
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
+        matched_lines = []
+        for line in output:
+            if any(regexp.search(line) for regexp in regexps):
+                matched_lines.append(line)
+            if read_output == 'stderr':
+                print(line, file=sys.stderr, end='')
+    return proc.wait(), matched_lines
+
+
 class TimestampFile:
     def __init__(self, path):
         self.path = path
+        # Make sure the dirname for `path` exists
+        p = os.path.dirname(path)
+        try:
+            os.makedirs(p)
+        except OSError as exc:
+            if exc.errno == errno.EEXIST and os.path.isdir(p):
+                pass
+            else:
+                raise
         self.start_time = time.time()
 
     def last_upload(self):
@@ -23,9 +69,18 @@ class TimestampFile:
             return -1
 
     def update(self):
-        os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
-        os.utime(self.path, (time.time(), self.start_time))
-
+        try:
+            os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
+            os.utime(self.path, (time.time(), self.start_time))
+        except:
+            # when the packages directory is created/populated by a build in a
+            # docker container, as root, the script that runs the upload
+            # doesn't always have permission to touch a timestamp file there.
+            # In production, we build/upload from ephemeral machines, which
+            # means that the timestamp mechanism is not used. We print a
+            # warning and move on without erroring out.
+            print("Warning: unable to update timestamp file",self.path,"permission problem?")
+            pass
 
 class PackageSuite:
     NEED_SSH = False
@@ -62,6 +117,14 @@ class PackageSuite:
 
 class PythonPackageSuite(PackageSuite):
     LOGGER_PART = 'python'
+    REUPLOAD_REGEXPS = [
+        re.compile(
+            r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
+        re.compile(
+            r'^error: Upload failed \(400\): File already exists\b'),
+        re.compile(
+            r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
+    ]
 
     def __init__(self, glob_root, rel_globs):
         super().__init__(glob_root, rel_globs)
@@ -72,37 +135,33 @@ class PythonPackageSuite(PackageSuite):
         if src_dir in self.seen_packages:
             return
         self.seen_packages.add(src_dir)
-        # NOTE: If we ever start uploading Python 3 packages, we'll need to
-        # figure out some way to adapt cmd to match.  It might be easiest
-        # to give all our setup.py files the executable bit, and run that
-        # directly.
         # We also must run `sdist` before `upload`: `upload` uploads any
         # distributions previously generated in the command.  It doesn't
         # know how to upload distributions already on disk.  We write the
         # result to a dedicated directory to avoid interfering with our
         # timestamp tracking.
-        cmd = ['python2.7', 'setup.py']
+        cmd = ['python3', 'setup.py']
         if not self.logger.isEnabledFor(logging.INFO):
             cmd.append('--quiet')
-        cmd.extend(['sdist', '--dist-dir', '.upload_dist', 'upload'])
-        subprocess.check_call(cmd, cwd=src_dir)
+        cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
+        cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
+        cmd.extend(['upload'])
+        upload_returncode, repushed = run_and_grep(
+            cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
+        if (upload_returncode != 0) and not repushed:
+            raise subprocess.CalledProcessError(upload_returncode, cmd)
         shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
 
 
 class GemPackageSuite(PackageSuite):
     LOGGER_PART = 'gems'
+    REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
 
     def upload_file(self, path):
         cmd = ['gem', 'push', path]
-        push_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
-        repushed = any(line == b'Repushing of gem versions is not allowed.\n'
-                       for line in push_proc.stdout)
-        # Read any remaining stdout before closing.
-        for line in push_proc.stdout:
-            pass
-        push_proc.stdout.close()
-        if (push_proc.wait() != 0) and not repushed:
-            raise subprocess.CalledProcessError(push_proc.returncode, cmd)
+        push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
+        if (push_returncode != 0) and not repushed:
+            raise subprocess.CalledProcessError(push_returncode, cmd)
 
 
 class DistroPackageSuite(PackageSuite):
@@ -136,28 +195,44 @@ class DistroPackageSuite(PackageSuite):
                 self.__class__.__name__, *(pipes.quote(s) for s in args)))
 
     def upload_files(self, paths):
+        dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
+        mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
+        subprocess.check_call(mkdir)
         cmd = self._build_cmd('scp', *paths)
-        cmd.append('{self.ssh_host}:{self.REMOTE_DEST_DIR}'.format(self=self))
+        cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
         subprocess.check_call(cmd)
 
 
 class DebianPackageSuite(DistroPackageSuite):
-    FREIGHT_SCRIPT = """
+    APT_SCRIPT = """
 cd "$1"; shift
 DISTNAME=$1; shift
-freight add "$@" "apt/$DISTNAME"
-freight cache
-rm "$@"
+for package in "$@"; do
+  set +e
+  aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1
+  RET=$?
+  set -e
+  if [[ $RET -eq 0 ]]; then
+    echo "Not adding $package, it is already present in repo $DISTNAME"
+    rm "$package"
+  else
+    aptly repo add -remove-files "$DISTNAME" "$package"
+  fi
+done
+aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
 """
-    TARGET_DISTNAMES = {
-        'debian7': 'wheezy',
-        'debian8': 'jessie',
-        'ubuntu1204': 'precise',
-        'ubuntu1404': 'trusty',
-        }
+
+    def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
+        super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
+        self.TARGET_DISTNAMES = {
+            'debian10': 'buster-'+repo,
+            'debian11': 'bullseye-'+repo,
+            'ubuntu1804': 'bionic-'+repo,
+            'ubuntu2004': 'focal-'+repo,
+            }
 
     def post_uploads(self, paths):
-        self._run_script(self.FREIGHT_SCRIPT, self.REMOTE_DEST_DIR,
+        self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
                          self.TARGET_DISTNAMES[self.target],
                          *self._paths_basenames(paths))
 
@@ -168,17 +243,20 @@ cd "$1"; shift
 REPODIR=$1; shift
 rpmsign --addsign "$@" </dev/null
 mv "$@" "$REPODIR"
-createrepo "$REPODIR"
+createrepo -c ~/.createrepo-cache --update "$REPODIR"
 """
     REPO_ROOT = '/var/www/rpm.arvados.org/'
-    TARGET_REPODIRS = {
-        'centos6': 'CentOS/6/os/x86_64/'
+
+    def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
+        super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
+        self.TARGET_REPODIRS = {
+            'centos7': 'CentOS/7/%s/x86_64/' % repo,
         }
 
     def post_uploads(self, paths):
         repo_dir = os.path.join(self.REPO_ROOT,
                                 self.TARGET_REPODIRS[self.target])
-        self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR,
+        self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
                          repo_dir, *self._paths_basenames(paths))
 
 
@@ -188,22 +266,19 @@ def _define_suite(suite_class, *rel_globs, **kwargs):
 PACKAGE_SUITES = {
     'python': _define_suite(PythonPackageSuite,
                             'sdk/python/dist/*.tar.gz',
+                            'sdk/cwl/dist/*.tar.gz',
                             'services/fuse/dist/*.tar.gz',
-                            'services/nodemanager/dist/*.tar.gz'),
-    'gems': _define_suite(GemPackageSuite, 'sdk/ruby/*.gem', 'sdk/cli/*.gem'),
+                            'tools/crunchstat-summary/dist/*.tar.gz',
+                        ),
+    'gems': _define_suite(GemPackageSuite,
+                          'sdk/ruby/*.gem',
+                          'sdk/cli/*.gem',
+                          'services/login-sync/*.gem',
+                      ),
     }
-for target in ['debian7', 'debian8', 'ubuntu1204', 'ubuntu1404']:
-    PACKAGE_SUITES[target] = _define_suite(
-        DebianPackageSuite, os.path.join('packages', target, '*.deb'),
-        target=target)
-for target in ['centos6']:
-    PACKAGE_SUITES[target] = _define_suite(
-        RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
-        target=target)
 
 def parse_arguments(arguments):
     parser = argparse.ArgumentParser(
-        prog="run_upload_packages.py",
         description="Upload Arvados packages to various repositories")
     parser.add_argument(
         '--workspace', '-W', default=os.environ.get('WORKSPACE'),
@@ -215,6 +290,10 @@ def parse_arguments(arguments):
                          metavar='OPTION', help="Pass option to `ssh -o`")
     parser.add_argument('--verbose', '-v', action='count', default=0,
                         help="Log more information and subcommand output")
+    parser.add_argument(
+        '--repo', choices=['dev', 'testing'],
+        help="Whether to upload to dev (nightly) or testing (release candidate) repository")
+
     parser.add_argument(
         'targets', nargs='*', default=['all'], metavar='target',
         help="Upload packages to these targets (default all)\nAvailable targets: " +
@@ -225,6 +304,16 @@ def parse_arguments(arguments):
 
     if args.workspace is None:
         parser.error("workspace not set from command line or environment")
+
+    for target in ['debian10', 'debian11', 'ubuntu1804', 'ubuntu2004']:
+        PACKAGE_SUITES[target] = _define_suite(
+            DebianPackageSuite, os.path.join('packages', target, '*.deb'),
+            target=target, repo=args.repo)
+    for target in ['centos7']:
+        PACKAGE_SUITES[target] = _define_suite(
+            RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
+            target=target, repo=args.repo)
+
     for target in args.targets:
         try:
             suite_class = PACKAGE_SUITES[target].func
@@ -255,12 +344,13 @@ def build_suite_and_upload(target, since_timestamp, args):
 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
     args = parse_arguments(arguments)
     setup_logger(stderr, args)
-    ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
-                                         '.last_upload'))
-    last_upload_ts = ts_file.last_upload()
+
     for target in args.targets:
+        ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
+                                             '.last_upload_%s' % target))
+        last_upload_ts = ts_file.last_upload()
         build_suite_and_upload(target, last_upload_ts, args)
-    ts_file.update()
+        ts_file.update()
 
 if __name__ == '__main__':
     main(sys.argv[1:])