X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9e3bb9b984ff700fc3455f87437a8f1ac5841f0e..8c492b3940f398dddea54792f67a7a0e9d2383a6:/sdk/python/arvados/util.py diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index 552fbbe31f..2609f112fe 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -4,6 +4,17 @@ import os import re import subprocess import errno +import sys +from arvados.collection import * + +HEX_RE = re.compile(r'^[0-9a-fA-F]+$') + +portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+') +uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}') +collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}') +group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}') +user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}') +link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}') def clear_tmpdir(path=None): """ @@ -11,7 +22,7 @@ def clear_tmpdir(path=None): exists and is empty. """ if path == None: - path = current_task().tmpdir + path = arvados.current_task().tmpdir if os.path.exists(path): p = subprocess.Popen(['rm', '-rf', path]) stdout, stderr = p.communicate(None) @@ -35,12 +46,12 @@ def run_command(execargs, **kwargs): def git_checkout(url, version, path): if not re.search('^/', path): - path = os.path.join(current_job().tmpdir, path) + path = os.path.join(arvados.current_job().tmpdir, path) if not os.path.exists(path): - util.run_command(["git", "clone", url, path], - cwd=os.path.dirname(path)) - util.run_command(["git", "checkout", version], - cwd=path) + run_command(["git", "clone", url, path], + cwd=os.path.dirname(path)) + run_command(["git", "checkout", version], + cwd=path) return path def tar_extractor(path, decompress_flag): @@ -63,7 +74,7 @@ def tarball_extract(tarball, path): path -- where to extract the tarball: absolute, or relative to job tmp """ if not re.search('^/', path): - path = os.path.join(current_job().tmpdir, path) + path = os.path.join(arvados.current_job().tmpdir, path) lockfile = open(path + '.lock', 'w') fcntl.flock(lockfile, fcntl.LOCK_EX) try: @@ -87,11 +98,11 @@ def tarball_extract(tarball, path): for f in CollectionReader(tarball).all_files(): if re.search('\.(tbz|tar.bz2)$', f.name()): - p = util.tar_extractor(path, 'j') + p = tar_extractor(path, 'j') elif re.search('\.(tgz|tar.gz)$', f.name()): - p = util.tar_extractor(path, 'z') + p = tar_extractor(path, 'z') elif re.search('\.tar$', f.name()): - p = util.tar_extractor(path, '') + p = tar_extractor(path, '') else: raise errors.AssertionError( "tarball_extract cannot handle filename %s" % f.name()) @@ -124,7 +135,7 @@ def zipball_extract(zipball, path): path -- where to extract the archive: absolute, or relative to job tmp """ if not re.search('^/', path): - path = os.path.join(current_job().tmpdir, path) + path = os.path.join(arvados.current_job().tmpdir, path) lockfile = open(path + '.lock', 'w') fcntl.flock(lockfile, fcntl.LOCK_EX) try: @@ -158,7 +169,7 @@ def zipball_extract(zipball, path): break zip_file.write(buf) zip_file.close() - + p = subprocess.Popen(["unzip", "-q", "-o", "-d", path, @@ -193,7 +204,7 @@ def collection_extract(collection, path, files=[], decompress=True): else: collection_hash = hashlib.md5(collection).hexdigest() if not re.search('^/', path): - path = os.path.join(current_job().tmpdir, path) + path = os.path.join(arvados.current_job().tmpdir, path) lockfile = open(path + '.lock', 'w') fcntl.flock(lockfile, fcntl.LOCK_EX) try: @@ -263,7 +274,7 @@ def stream_extract(stream, path, files=[], decompress=True): path -- where to extract: absolute, or relative to job tmp """ if not re.search('^/', path): - path = os.path.join(current_job().tmpdir, path) + path = os.path.join(arvados.current_job().tmpdir, path) lockfile = open(path + '.lock', 'w') fcntl.flock(lockfile, fcntl.LOCK_EX) try: @@ -281,7 +292,7 @@ def stream_extract(stream, path, files=[], decompress=True): files_got += [outname] if os.path.exists(os.path.join(path, outname)): os.unlink(os.path.join(path, outname)) - util.mkdir_dash_p(os.path.dirname(os.path.join(path, outname))) + mkdir_dash_p(os.path.dirname(os.path.join(path, outname))) outfile = open(os.path.join(path, outname), 'wb') for buf in (f.readall_decompressed() if decompress else f.readall()): @@ -300,7 +311,40 @@ def listdir_recursive(dirname, base=None): ent_path = os.path.join(dirname, ent) ent_base = os.path.join(base, ent) if base else ent if os.path.isdir(ent_path): - allfiles += util.listdir_recursive(ent_path, ent_base) + allfiles += listdir_recursive(ent_path, ent_base) else: allfiles += [ent_base] return allfiles + +def is_hex(s, *length_args): + """is_hex(s[, length[, max_length]]) -> boolean + + Return True if s is a string of hexadecimal digits. + If one length argument is given, the string must contain exactly + that number of digits. + If two length arguments are given, the string must contain a number of + digits between those two lengths, inclusive. + Return False otherwise. + """ + num_length_args = len(length_args) + if num_length_args > 2: + raise ArgumentError("is_hex accepts up to 3 arguments ({} given)". + format(1 + num_length_args)) + elif num_length_args == 2: + good_len = (length_args[0] <= len(s) <= length_args[1]) + elif num_length_args == 1: + good_len = (len(s) == length_args[0]) + else: + good_len = True + return bool(good_len and HEX_RE.match(s)) + +def list_all(fn, num_retries=0, **kwargs): + items = [] + offset = 0 + items_available = sys.maxint + while len(items) < items_available: + c = fn(offset=offset, **kwargs).execute(num_retries=num_retries) + items += c['items'] + items_available = c['items_available'] + offset = c['offset'] + len(c['items']) + return items