import re
import subprocess
import errno
+import sys
+from arvados.collection import *
+
+HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+
+keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*')
+signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*\+A\S+(\+\S+)*')
+portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
+uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
+collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
+group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
+user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
+manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE)
def clear_tmpdir(path=None):
"""
Ensure the given directory (or TASK_TMPDIR if none given)
exists and is empty.
"""
- if path == None:
- path = current_task().tmpdir
+ if path is None:
+ path = arvados.current_task().tmpdir
if os.path.exists(path):
p = subprocess.Popen(['rm', '-rf', path])
stdout, stderr = p.communicate(None)
def git_checkout(url, version, path):
if not re.search('^/', path):
- path = os.path.join(current_job().tmpdir, path)
+ path = os.path.join(arvados.current_job().tmpdir, path)
if not os.path.exists(path):
- util.run_command(["git", "clone", url, path],
- cwd=os.path.dirname(path))
- util.run_command(["git", "checkout", version],
- cwd=path)
+ run_command(["git", "clone", url, path],
+ cwd=os.path.dirname(path))
+ run_command(["git", "checkout", version],
+ cwd=path)
return path
def tar_extractor(path, decompress_flag):
path -- where to extract the tarball: absolute, or relative to job tmp
"""
if not re.search('^/', path):
- path = os.path.join(current_job().tmpdir, path)
+ path = os.path.join(arvados.current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
for f in CollectionReader(tarball).all_files():
if re.search('\.(tbz|tar.bz2)$', f.name()):
- p = util.tar_extractor(path, 'j')
+ p = tar_extractor(path, 'j')
elif re.search('\.(tgz|tar.gz)$', f.name()):
- p = util.tar_extractor(path, 'z')
+ p = tar_extractor(path, 'z')
elif re.search('\.tar$', f.name()):
- p = util.tar_extractor(path, '')
+ p = tar_extractor(path, '')
else:
raise errors.AssertionError(
"tarball_extract cannot handle filename %s" % f.name())
path -- where to extract the archive: absolute, or relative to job tmp
"""
if not re.search('^/', path):
- path = os.path.join(current_job().tmpdir, path)
+ path = os.path.join(arvados.current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
break
zip_file.write(buf)
zip_file.close()
-
+
p = subprocess.Popen(["unzip",
"-q", "-o",
"-d", path,
else:
collection_hash = hashlib.md5(collection).hexdigest()
if not re.search('^/', path):
- path = os.path.join(current_job().tmpdir, path)
+ path = os.path.join(arvados.current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
path -- where to extract: absolute, or relative to job tmp
"""
if not re.search('^/', path):
- path = os.path.join(current_job().tmpdir, path)
+ path = os.path.join(arvados.current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
files_got += [outname]
if os.path.exists(os.path.join(path, outname)):
os.unlink(os.path.join(path, outname))
- util.mkdir_dash_p(os.path.dirname(os.path.join(path, outname)))
+ mkdir_dash_p(os.path.dirname(os.path.join(path, outname)))
outfile = open(os.path.join(path, outname), 'wb')
for buf in (f.readall_decompressed() if decompress
else f.readall()):
lockfile.close()
return path
-def listdir_recursive(dirname, base=None):
+def listdir_recursive(dirname, base=None, max_depth=None):
+ """listdir_recursive(dirname, base, max_depth)
+
+ Return a list of file and directory names found under dirname.
+
+ If base is not None, prepend "{base}/" to each returned name.
+
+ If max_depth is None, descend into directories and return only the
+ names of files found in the directory tree.
+
+ If max_depth is a non-negative integer, stop descending into
+ directories at the given depth, and at that point return directory
+ names instead.
+
+ If max_depth==0 (and base is None) this is equivalent to
+ sorted(os.listdir(dirname)).
+ """
allfiles = []
for ent in sorted(os.listdir(dirname)):
ent_path = os.path.join(dirname, ent)
ent_base = os.path.join(base, ent) if base else ent
- if os.path.isdir(ent_path):
- allfiles += util.listdir_recursive(ent_path, ent_base)
+ if os.path.isdir(ent_path) and max_depth != 0:
+ allfiles += listdir_recursive(
+ ent_path, base=ent_base,
+ max_depth=(max_depth-1 if max_depth else None))
else:
allfiles += [ent_base]
return allfiles
+
+def is_hex(s, *length_args):
+ """is_hex(s[, length[, max_length]]) -> boolean
+
+ Return True if s is a string of hexadecimal digits.
+ If one length argument is given, the string must contain exactly
+ that number of digits.
+ If two length arguments are given, the string must contain a number of
+ digits between those two lengths, inclusive.
+ Return False otherwise.
+ """
+ num_length_args = len(length_args)
+ if num_length_args > 2:
+ raise ArgumentError("is_hex accepts up to 3 arguments ({} given)".
+ format(1 + num_length_args))
+ elif num_length_args == 2:
+ good_len = (length_args[0] <= len(s) <= length_args[1])
+ elif num_length_args == 1:
+ good_len = (len(s) == length_args[0])
+ else:
+ good_len = True
+ return bool(good_len and HEX_RE.match(s))
+
+def list_all(fn, num_retries=0, **kwargs):
+ items = []
+ offset = 0
+ items_available = sys.maxint
+ while len(items) < items_available:
+ c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
+ items += c['items']
+ items_available = c['items_available']
+ offset = c['offset'] + len(c['items'])
+ return items