+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
import fcntl
import hashlib
+import httplib2
import os
import re
import subprocess
import errno
import sys
-from arvados.collection import *
+
+import arvados
+from arvados.collection import CollectionReader
HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*')
+signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*\+A\S+(\+\S+)*')
+portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
+uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
+collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
+group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
+user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
+manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE)
+
def clear_tmpdir(path=None):
"""
Ensure the given directory (or TASK_TMPDIR if none given)
exists and is empty.
"""
- if path == None:
+ if path is None:
path = arvados.current_task().tmpdir
if os.path.exists(path):
p = subprocess.Popen(['rm', '-rf', path])
p = subprocess.Popen(execargs, **kwargs)
stdoutdata, stderrdata = p.communicate(None)
if p.returncode != 0:
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"run_command %s exit %d:\n%s" %
(execargs, p.returncode, stderrdata))
return stdoutdata, stderrdata
elif re.search('\.tar$', f.name()):
p = tar_extractor(path, '')
else:
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"tarball_extract cannot handle filename %s" % f.name())
while True:
buf = f.read(2**20)
p.wait()
if p.returncode != 0:
lockfile.close()
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"tar exited %d" % p.returncode)
os.symlink(tarball, os.path.join(path, '.locator'))
- tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
+ tld_extracts = [f for f in os.listdir(path) if f != '.locator']
lockfile.close()
if len(tld_extracts) == 1:
return os.path.join(path, tld_extracts[0])
for f in CollectionReader(zipball).all_files():
if not re.search('\.zip$', f.name()):
- raise errors.NotImplementedError(
+ raise arvados.errors.NotImplementedError(
"zipball_extract cannot handle filename %s" % f.name())
zip_filename = os.path.join(path, os.path.basename(f.name()))
zip_file = open(zip_filename, 'wb')
p.wait()
if p.returncode != 0:
lockfile.close()
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"unzip exited %d" % p.returncode)
os.unlink(zip_filename)
os.symlink(zipball, os.path.join(path, '.locator'))
- tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
+ tld_extracts = [f for f in os.listdir(path) if f != '.locator']
lockfile.close()
if len(tld_extracts) == 1:
return os.path.join(path, tld_extracts[0])
outfile.write(buf)
outfile.close()
if len(files_got) < len(files):
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"Wanted files %s but only got %s from %s" %
(files, files_got,
[z.name() for z in CollectionReader(collection).all_files()]))
outfile.write(buf)
outfile.close()
if len(files_got) < len(files):
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"Wanted files %s but only got %s from %s" %
(files, files_got, [z.name() for z in stream.all_files()]))
lockfile.close()
return path
-def listdir_recursive(dirname, base=None):
+def listdir_recursive(dirname, base=None, max_depth=None):
+ """listdir_recursive(dirname, base, max_depth)
+
+ Return a list of file and directory names found under dirname.
+
+ If base is not None, prepend "{base}/" to each returned name.
+
+ If max_depth is None, descend into directories and return only the
+ names of files found in the directory tree.
+
+ If max_depth is a non-negative integer, stop descending into
+ directories at the given depth, and at that point return directory
+ names instead.
+
+ If max_depth==0 (and base is None) this is equivalent to
+ sorted(os.listdir(dirname)).
+ """
allfiles = []
for ent in sorted(os.listdir(dirname)):
ent_path = os.path.join(dirname, ent)
ent_base = os.path.join(base, ent) if base else ent
- if os.path.isdir(ent_path):
- allfiles += listdir_recursive(ent_path, ent_base)
+ if os.path.isdir(ent_path) and max_depth != 0:
+ allfiles += listdir_recursive(
+ ent_path, base=ent_base,
+ max_depth=(max_depth-1 if max_depth else None))
else:
allfiles += [ent_base]
return allfiles
"""
num_length_args = len(length_args)
if num_length_args > 2:
- raise ArgumentError("is_hex accepts up to 3 arguments ({} given)".
- format(1 + num_length_args))
+ raise arvados.errors.ArgumentError(
+ "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
elif num_length_args == 2:
good_len = (length_args[0] <= len(s) <= length_args[1])
elif num_length_args == 1:
good_len = True
return bool(good_len and HEX_RE.match(s))
-def all_contents(api, uuid):
+def list_all(fn, num_retries=0, **kwargs):
+ # Default limit to (effectively) api server's MAX_LIMIT
+ kwargs.setdefault('limit', sys.maxsize)
items = []
offset = 0
- items_available = sys.maxint
+ items_available = sys.maxsize
while len(items) < items_available:
- c = api.groups().contents(uuid=uuid, offset=offset).execute()
+ c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
items += c['items']
items_available = c['items_available']
offset = c['offset'] + len(c['items'])
return items
+
+def ca_certs_path(fallback=httplib2.CA_CERTS):
+ """Return the path of the best available CA certs source.
+
+ This function searches for various distribution sources of CA
+ certificates, and returns the first it finds. If it doesn't find any,
+ it returns the value of `fallback` (httplib2's CA certs by default).
+ """
+ for ca_certs_path in [
+ # Arvados specific:
+ '/etc/arvados/ca-certificates.crt',
+ # Debian:
+ '/etc/ssl/certs/ca-certificates.crt',
+ # Red Hat:
+ '/etc/pki/tls/certs/ca-bundle.crt',
+ ]:
+ if os.path.exists(ca_certs_path):
+ return ca_certs_path
+ return fallback