3 # Copyright (C) The Arvados Authors. All rights reserved.
5 # SPDX-License-Identifier: AGPL-3.0
21 def run_and_grep(cmd, read_output, *regexps,
22 encoding=locale.getpreferredencoding(), **popen_kwargs):
23 """Run a subprocess and capture output lines matching regexps.
26 * cmd: The command to run, as a list or string, as for subprocess.Popen.
27 * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28 Remaining arguments are regexps to match output, as strings or compiled
29 regexp objects. Output lines matching any regexp will be captured.
32 * encoding: The encoding used to decode the subprocess output.
33 Remaining keyword arguments are passed directly to subprocess.Popen.
35 Returns 2-tuple (subprocess returncode, list of matched output lines).
37 regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38 for regexp in regexps]
39 popen_kwargs[read_output] = subprocess.PIPE
40 proc = subprocess.Popen(cmd, **popen_kwargs)
41 with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
44 if any(regexp.search(line) for regexp in regexps):
45 matched_lines.append(line)
46 if read_output == 'stderr':
47 print(line, file=sys.stderr, end='')
48 return proc.wait(), matched_lines
52 def __init__(self, path):
54 # Make sure the dirname for `path` exists
55 p = os.path.dirname(path)
58 except OSError as exc:
59 if exc.errno == errno.EEXIST and os.path.isdir(p):
63 self.start_time = time.time()
65 def last_upload(self):
67 return os.path.getmtime(self.path)
68 except EnvironmentError:
73 os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74 os.utime(self.path, (time.time(), self.start_time))
76 # when the packages directory is created/populated by a build in a
77 # docker container, as root, the script that runs the upload
78 # doesn't always have permission to touch a timestamp file there.
79 # In production, we build/upload from ephemeral machines, which
80 # means that the timestamp mechanism is not used. We print a
81 # warning and move on without erroring out.
82 print("Warning: unable to update timestamp file",self.path,"permission problem?")
88 def __init__(self, glob_root, rel_globs):
89 logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90 self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91 self.globs = [os.path.join(glob_root, rel_glob)
92 for rel_glob in rel_globs]
94 def files_to_upload(self, since_timestamp):
95 for abs_glob in self.globs:
96 for path in glob.glob(abs_glob):
97 if os.path.getmtime(path) >= since_timestamp:
100 def upload_file(self, path):
101 raise NotImplementedError("PackageSuite.upload_file")
103 def upload_files(self, paths):
105 self.logger.info("Uploading %s", path)
106 self.upload_file(path)
108 def post_uploads(self, paths):
111 def update_packages(self, since_timestamp):
112 upload_paths = list(self.files_to_upload(since_timestamp))
114 self.upload_files(upload_paths)
115 self.post_uploads(upload_paths)
118 class PythonPackageSuite(PackageSuite):
119 LOGGER_PART = 'python'
122 r'^error: Upload failed \(400\): A file named "[^"]+" already exists\b'),
124 r'^error: Upload failed \(400\): File already exists\b'),
126 r'^error: Upload failed \(400\): Only one sdist may be uploaded per release\b'),
129 def __init__(self, glob_root, rel_globs):
130 super().__init__(glob_root, rel_globs)
131 self.seen_packages = set()
133 def upload_file(self, path):
134 src_dir = os.path.dirname(os.path.dirname(path))
135 if src_dir in self.seen_packages:
137 self.seen_packages.add(src_dir)
138 # We also must run `sdist` before `upload`: `upload` uploads any
139 # distributions previously generated in the command. It doesn't
140 # know how to upload distributions already on disk. We write the
141 # result to a dedicated directory to avoid interfering with our
142 # timestamp tracking.
143 cmd = ['python3', 'setup.py']
144 if not self.logger.isEnabledFor(logging.INFO):
145 cmd.append('--quiet')
146 cmd.extend(['bdist_wheel', '--dist-dir', '.upload_dist'])
147 cmd.extend(['sdist', '--dist-dir', '.upload_dist'])
148 cmd.extend(['upload'])
149 upload_returncode, repushed = run_and_grep(
150 cmd, 'stderr', *self.REUPLOAD_REGEXPS, cwd=src_dir)
151 if (upload_returncode != 0) and not repushed:
152 raise subprocess.CalledProcessError(upload_returncode, cmd)
153 shutil.rmtree(os.path.join(src_dir, '.upload_dist'))
156 class GemPackageSuite(PackageSuite):
158 REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
160 def upload_file(self, path):
161 cmd = ['gem', 'push', path]
162 push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
163 if (push_returncode != 0) and not repushed:
164 raise subprocess.CalledProcessError(push_returncode, cmd)
167 class DistroPackageSuite(PackageSuite):
169 REMOTE_DEST_DIR = 'tmp'
171 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
172 super().__init__(glob_root, rel_globs)
174 self.ssh_host = ssh_host
175 self.ssh_opts = ['-o' + opt for opt in ssh_opts]
176 if not self.logger.isEnabledFor(logging.INFO):
177 self.ssh_opts.append('-q')
179 def _build_cmd(self, base_cmd, *args):
181 cmd.extend(self.ssh_opts)
185 def _paths_basenames(self, paths):
186 return (os.path.basename(path) for path in paths)
188 def _run_script(self, script, *args):
189 # SSH will use a shell to run our bash command, so we have to
190 # quote our arguments.
191 # self.__class__.__name__ provides $0 for the script, which makes a
192 # nicer message if there's an error.
193 subprocess.check_call(self._build_cmd(
194 'ssh', self.ssh_host, 'bash', '-ec', shlex.quote(script),
195 self.__class__.__name__, *(shlex.quote(s) for s in args)))
197 def upload_files(self, paths):
198 dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
199 mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
200 subprocess.check_call(mkdir)
201 cmd = self._build_cmd('scp', *paths)
202 cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
203 subprocess.check_call(cmd)
206 class DebianPackageSuite(DistroPackageSuite):
211 # aptly implements its own locking, but its wait strategy as of April 2024 is
212 # not patient enough to accommodate multiple simultaneous uploads.
213 APTLY_LOCK="${XDG_RUNTIME_DIR:-/tmp}/aptly-upload.lock"
215 flock --wait=300 "$APTLY_LOCK" aptly "$@"
217 for package in "$@"; do
218 if aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1; then
219 echo "Not adding $package, it is already present in repo $DISTNAME"
222 aptly repo add -remove-files "$DISTNAME" "$package"
225 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
228 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
229 super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
230 self.TARGET_DISTNAMES = {
231 'debian10': 'buster-'+repo,
232 'debian11': 'bullseye-'+repo,
233 'debian12': 'bookworm-'+repo,
234 'ubuntu1804': 'bionic-'+repo,
235 'ubuntu2004': 'focal-'+repo,
236 'ubuntu2204': 'jammy-'+repo,
239 def post_uploads(self, paths):
240 self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
241 self.TARGET_DISTNAMES[self.target],
242 *self._paths_basenames(paths))
245 class RedHatPackageSuite(DistroPackageSuite):
246 CREATEREPO_SCRIPT = """
249 rpmsign --addsign "$@" </dev/null
251 createrepo_c -c ~/.createrepo-cache --update "$REPODIR"
253 REPO_ROOT = '/var/www/rpm.arvados.org/'
255 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
256 super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
257 self.TARGET_REPODIRS = {
258 'centos7': 'CentOS/7/%s/x86_64/' % repo,
259 'rocky8': 'CentOS/8/%s/x86_64/' % repo,
262 def post_uploads(self, paths):
263 repo_dir = os.path.join(self.REPO_ROOT,
264 self.TARGET_REPODIRS[self.target])
265 self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
266 repo_dir, *self._paths_basenames(paths))
269 def _define_suite(suite_class, *rel_globs, **kwargs):
270 return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
273 'python': _define_suite(PythonPackageSuite,
274 'sdk/python/dist/*.tar.gz',
275 'sdk/cwl/dist/*.tar.gz',
276 'services/fuse/dist/*.tar.gz',
277 'tools/crunchstat-summary/dist/*.tar.gz',
279 'gems': _define_suite(GemPackageSuite,
282 'services/login-sync/*.gem',
286 def parse_arguments(arguments):
287 parser = argparse.ArgumentParser(
288 description="Upload Arvados packages to various repositories")
290 '--workspace', '-W', default=os.environ.get('WORKSPACE'),
291 help="Arvados source directory with built packages to upload")
294 help="Host specification for distribution repository server")
295 parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
296 metavar='OPTION', help="Pass option to `ssh -o`")
297 parser.add_argument('--verbose', '-v', action='count', default=0,
298 help="Log more information and subcommand output")
300 '--repo', choices=['dev', 'testing'],
301 help="Whether to upload to dev (nightly) or testing (release candidate) repository")
304 'targets', nargs='*', default=['all'], metavar='target',
305 help="Upload packages to these targets (default all)\nAvailable targets: " +
306 ', '.join(sorted(PACKAGE_SUITES.keys())))
307 args = parser.parse_args(arguments)
308 if 'all' in args.targets:
309 args.targets = list(PACKAGE_SUITES.keys())
311 if args.workspace is None:
312 parser.error("workspace not set from command line or environment")
315 'debian10', 'debian11', 'debian12',
316 'ubuntu1804', 'ubuntu2004', 'ubuntu2204',
318 PACKAGE_SUITES[target] = _define_suite(
319 DebianPackageSuite, os.path.join('packages', target, '*.deb'),
320 target=target, repo=args.repo)
321 for target in ['centos7', 'rocky8']:
322 PACKAGE_SUITES[target] = _define_suite(
323 RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
324 target=target, repo=args.repo)
326 for target in args.targets:
328 suite_class = PACKAGE_SUITES[target].func
330 parser.error("unrecognized target {!r}".format(target))
331 if suite_class.NEED_SSH and (args.ssh_host is None):
333 "--ssh-host must be specified to upload distribution packages")
336 def setup_logger(stream_dest, args):
337 log_handler = logging.StreamHandler(stream_dest)
338 log_handler.setFormatter(logging.Formatter(
339 '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
340 '%Y-%m-%d %H:%M:%S'))
341 logger = logging.getLogger('arvados-dev.upload')
342 logger.addHandler(log_handler)
343 logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
345 def build_suite_and_upload(target, since_timestamp, args):
346 suite_def = PACKAGE_SUITES[target]
348 if suite_def.func.NEED_SSH:
349 kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
350 suite = suite_def(args.workspace, **kwargs)
351 suite.update_packages(since_timestamp)
353 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
354 args = parse_arguments(arguments)
355 setup_logger(stderr, args)
357 for target in args.targets:
358 ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
359 '.last_upload_%s' % target))
360 last_upload_ts = ts_file.last_upload()
361 build_suite_and_upload(target, last_upload_ts, args)
364 if __name__ == '__main__':