3 # Copyright (C) The Arvados Authors. All rights reserved.
5 # SPDX-License-Identifier: AGPL-3.0
21 def run_and_grep(cmd, read_output, *regexps,
22 encoding=locale.getpreferredencoding(), **popen_kwargs):
23 """Run a subprocess and capture output lines matching regexps.
26 * cmd: The command to run, as a list or string, as for subprocess.Popen.
27 * read_output: 'stdout' or 'stderr', the name of the output stream to read.
28 Remaining arguments are regexps to match output, as strings or compiled
29 regexp objects. Output lines matching any regexp will be captured.
32 * encoding: The encoding used to decode the subprocess output.
33 Remaining keyword arguments are passed directly to subprocess.Popen.
35 Returns 2-tuple (subprocess returncode, list of matched output lines).
37 regexps = [regexp if hasattr(regexp, 'search') else re.compile(regexp)
38 for regexp in regexps]
39 popen_kwargs[read_output] = subprocess.PIPE
40 proc = subprocess.Popen(cmd, **popen_kwargs)
41 with open(getattr(proc, read_output).fileno(), encoding=encoding) as output:
44 if any(regexp.search(line) for regexp in regexps):
45 matched_lines.append(line)
46 if read_output == 'stderr':
47 print(line, file=sys.stderr, end='')
48 return proc.wait(), matched_lines
52 def __init__(self, path):
54 # Make sure the dirname for `path` exists
55 p = os.path.dirname(path)
58 except OSError as exc:
59 if exc.errno == errno.EEXIST and os.path.isdir(p):
63 self.start_time = time.time()
65 def last_upload(self):
67 return os.path.getmtime(self.path)
68 except EnvironmentError:
73 os.close(os.open(self.path, os.O_CREAT | os.O_APPEND))
74 os.utime(self.path, (time.time(), self.start_time))
76 # when the packages directory is created/populated by a build in a
77 # docker container, as root, the script that runs the upload
78 # doesn't always have permission to touch a timestamp file there.
79 # In production, we build/upload from ephemeral machines, which
80 # means that the timestamp mechanism is not used. We print a
81 # warning and move on without erroring out.
82 print("Warning: unable to update timestamp file",self.path,"permission problem?")
88 def __init__(self, glob_root, rel_globs):
89 logger_part = getattr(self, 'LOGGER_PART', os.path.basename(glob_root))
90 self.logger = logging.getLogger('arvados-dev.upload.' + logger_part)
91 self.globs = [os.path.join(glob_root, rel_glob)
92 for rel_glob in rel_globs]
94 def files_to_upload(self, since_timestamp):
95 for abs_glob in self.globs:
96 for path in glob.glob(abs_glob):
97 if os.path.getmtime(path) >= since_timestamp:
100 def upload_file(self, path):
101 raise NotImplementedError("PackageSuite.upload_file")
103 def upload_files(self, paths):
105 self.logger.info("Uploading %s", path)
106 self.upload_file(path)
108 def post_uploads(self, paths):
111 def update_packages(self, since_timestamp):
112 upload_paths = list(self.files_to_upload(since_timestamp))
114 self.upload_files(upload_paths)
115 self.post_uploads(upload_paths)
118 class PythonPackageSuite(PackageSuite):
119 LOGGER_PART = 'python'
121 def upload_file(self, path):
124 '--disable-progress-bar',
128 ], stdin=subprocess.DEVNULL, check=True)
131 class GemPackageSuite(PackageSuite):
133 REUPLOAD_REGEXP = re.compile(r'^Repushing of gem versions is not allowed\.$')
135 def upload_file(self, path):
136 cmd = ['gem', 'push', path]
137 push_returncode, repushed = run_and_grep(cmd, 'stdout', self.REUPLOAD_REGEXP)
138 if (push_returncode != 0) and not repushed:
139 raise subprocess.CalledProcessError(push_returncode, cmd)
142 class DistroPackageSuite(PackageSuite):
144 REMOTE_DEST_DIR = 'tmp'
146 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts):
147 super().__init__(glob_root, rel_globs)
149 self.ssh_host = ssh_host
150 self.ssh_opts = ['-o' + opt for opt in ssh_opts]
151 if not self.logger.isEnabledFor(logging.INFO):
152 self.ssh_opts.append('-q')
154 def _build_cmd(self, base_cmd, *args):
156 cmd.extend(self.ssh_opts)
160 def _paths_basenames(self, paths):
161 return (os.path.basename(path) for path in paths)
163 def _run_script(self, script, *args):
164 # SSH will use a shell to run our bash command, so we have to
165 # quote our arguments.
166 # self.__class__.__name__ provides $0 for the script, which makes a
167 # nicer message if there's an error.
168 subprocess.check_call(self._build_cmd(
169 'ssh', self.ssh_host, 'bash', '-ec', shlex.quote(script),
170 self.__class__.__name__, *(shlex.quote(s) for s in args)))
172 def upload_files(self, paths):
173 dest_dir = os.path.join(self.REMOTE_DEST_DIR, self.target)
174 mkdir = self._build_cmd('ssh', self.ssh_host, 'install', '-d', dest_dir)
175 subprocess.check_call(mkdir)
176 cmd = self._build_cmd('scp', *paths)
177 cmd.append('{}:{}'.format(self.ssh_host, dest_dir))
178 subprocess.check_call(cmd)
181 class DebianPackageSuite(DistroPackageSuite):
186 # aptly implements its own locking, but its wait strategy as of April 2024 is
187 # not patient enough to accommodate multiple simultaneous uploads.
188 APTLY_LOCK="${XDG_RUNTIME_DIR:-/tmp}/aptly-upload.lock"
190 flock --wait=300 "$APTLY_LOCK" aptly "$@"
192 for package in "$@"; do
193 if aptly repo search "$DISTNAME" "${package%.deb}" >/dev/null 2>&1; then
194 echo "Not adding $package, it is already present in repo $DISTNAME"
197 aptly repo add -remove-files "$DISTNAME" "$package"
200 aptly publish update "$DISTNAME" filesystem:"${DISTNAME%-*}":
203 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
204 super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
205 self.TARGET_DISTNAMES = {
206 'debian10': 'buster-'+repo,
207 'debian11': 'bullseye-'+repo,
208 'debian12': 'bookworm-'+repo,
209 'ubuntu1804': 'bionic-'+repo,
210 'ubuntu2004': 'focal-'+repo,
211 'ubuntu2204': 'jammy-'+repo,
212 'ubuntu2404': 'noble-'+repo,
215 def post_uploads(self, paths):
216 self._run_script(self.APT_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
217 self.TARGET_DISTNAMES[self.target],
218 *self._paths_basenames(paths))
221 class RedHatPackageSuite(DistroPackageSuite):
222 CREATEREPO_SCRIPT = """
225 rpmsign --addsign "$@" </dev/null
227 createrepo_c -c ~/.createrepo-cache --update "$REPODIR"
229 REPO_ROOT = '/var/www/rpm.arvados.org/'
231 def __init__(self, glob_root, rel_globs, target, ssh_host, ssh_opts, repo):
232 super().__init__(glob_root, rel_globs, target, ssh_host, ssh_opts)
233 self.TARGET_REPODIRS = {
234 'centos7': 'RHEL/7/%s/x86_64/' % repo,
235 'rocky8': 'RHEL/8/%s/x86_64/' % repo,
238 def post_uploads(self, paths):
239 repo_dir = os.path.join(self.REPO_ROOT,
240 self.TARGET_REPODIRS[self.target])
241 self._run_script(self.CREATEREPO_SCRIPT, self.REMOTE_DEST_DIR + '/' + self.target,
242 repo_dir, *self._paths_basenames(paths))
245 def _define_suite(suite_class, *rel_globs, **kwargs):
246 return functools.partial(suite_class, rel_globs=rel_globs, **kwargs)
249 'python': _define_suite(PythonPackageSuite,
250 'sdk/cwl/dist/*.tar.gz',
251 'sdk/cwl/dist/*.whl',
252 'sdk/python/dist/*.tar.gz',
253 'sdk/python/dist/*.whl',
254 'services/fuse/dist/*.tar.gz',
255 'services/fuse/dist/*.whl',
256 'tools/crunchstat-summary/dist/*.tar.gz',
257 'tools/crunchstat-summary/dist/*.whl',
259 'gems': _define_suite(GemPackageSuite,
260 'sdk/ruby-google-api-client/*.gem',
263 'services/login-sync/*.gem',
267 def parse_arguments(arguments):
268 parser = argparse.ArgumentParser(
269 description="Upload Arvados packages to various repositories")
271 '--workspace', '-W', default=os.environ.get('WORKSPACE'),
272 help="Arvados source directory with built packages to upload")
275 help="Host specification for distribution repository server")
276 parser.add_argument('-o', action='append', default=[], dest='ssh_opts',
277 metavar='OPTION', help="Pass option to `ssh -o`")
278 parser.add_argument('--verbose', '-v', action='count', default=0,
279 help="Log more information and subcommand output")
281 '--repo', choices=['dev', 'testing'],
282 help="Whether to upload to dev (nightly) or testing (release candidate) repository")
285 'targets', nargs='*', default=['all'], metavar='target',
286 help="Upload packages to these targets (default all)\nAvailable targets: " +
287 ', '.join(sorted(PACKAGE_SUITES.keys())))
288 args = parser.parse_args(arguments)
289 if 'all' in args.targets:
290 args.targets = list(PACKAGE_SUITES.keys())
292 if args.workspace is None:
293 parser.error("workspace not set from command line or environment")
296 'debian10', 'debian11', 'debian12',
297 'ubuntu1804', 'ubuntu2004', 'ubuntu2204', 'ubuntu2404',
299 PACKAGE_SUITES[target] = _define_suite(
300 DebianPackageSuite, os.path.join('packages', target, '*.deb'),
301 target=target, repo=args.repo)
302 for target in ['centos7', 'rocky8']:
303 PACKAGE_SUITES[target] = _define_suite(
304 RedHatPackageSuite, os.path.join('packages', target, '*.rpm'),
305 target=target, repo=args.repo)
307 for target in args.targets:
309 suite_class = PACKAGE_SUITES[target].func
311 parser.error("unrecognized target {!r}".format(target))
312 if suite_class.NEED_SSH and (args.ssh_host is None):
314 "--ssh-host must be specified to upload distribution packages")
317 def setup_logger(stream_dest, args):
318 log_handler = logging.StreamHandler(stream_dest)
319 log_handler.setFormatter(logging.Formatter(
320 '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
321 '%Y-%m-%d %H:%M:%S'))
322 logger = logging.getLogger('arvados-dev.upload')
323 logger.addHandler(log_handler)
324 logger.setLevel(max(1, logging.WARNING - (10 * args.verbose)))
326 def build_suite_and_upload(target, since_timestamp, args):
327 suite_def = PACKAGE_SUITES[target]
329 if suite_def.func.NEED_SSH:
330 kwargs.update(ssh_host=args.ssh_host, ssh_opts=args.ssh_opts)
331 suite = suite_def(args.workspace, **kwargs)
332 suite.update_packages(since_timestamp)
334 def main(arguments, stdout=sys.stdout, stderr=sys.stderr):
335 args = parse_arguments(arguments)
336 setup_logger(stderr, args)
338 for target in args.targets:
339 ts_file = TimestampFile(os.path.join(args.workspace, 'packages',
340 '.last_upload_%s' % target))
341 last_upload_ts = ts_file.last_upload()
342 build_suite_and_upload(target, last_upload_ts, args)
345 if __name__ == '__main__':