4297: Merge branch 'master' into 4297-dispatch-load
[arvados.git] / sdk / python / arvados / util.py
index 9286795e5dc5879160244d94dccc91b95251870a..2609f112fe31d050a5a1db094e32ca8087774c5c 100644 (file)
@@ -3,6 +3,18 @@ import hashlib
 import os
 import re
 import subprocess
+import errno
+import sys
+from arvados.collection import *
+
+HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+
+portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+')
+uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
+collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
+group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
+user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
 
 def clear_tmpdir(path=None):
     """
@@ -10,7 +22,7 @@ def clear_tmpdir(path=None):
     exists and is empty.
     """
     if path == None:
-        path = current_task().tmpdir
+        path = arvados.current_task().tmpdir
     if os.path.exists(path):
         p = subprocess.Popen(['rm', '-rf', path])
         stdout, stderr = p.communicate(None)
@@ -34,12 +46,12 @@ def run_command(execargs, **kwargs):
 
 def git_checkout(url, version, path):
     if not re.search('^/', path):
-        path = os.path.join(current_job().tmpdir, path)
+        path = os.path.join(arvados.current_job().tmpdir, path)
     if not os.path.exists(path):
-        util.run_command(["git", "clone", url, path],
-                         cwd=os.path.dirname(path))
-    util.run_command(["git", "checkout", version],
-                     cwd=path)
+        run_command(["git", "clone", url, path],
+                    cwd=os.path.dirname(path))
+    run_command(["git", "checkout", version],
+                cwd=path)
     return path
 
 def tar_extractor(path, decompress_flag):
@@ -62,7 +74,7 @@ def tarball_extract(tarball, path):
     path -- where to extract the tarball: absolute, or relative to job tmp
     """
     if not re.search('^/', path):
-        path = os.path.join(current_job().tmpdir, path)
+        path = os.path.join(arvados.current_job().tmpdir, path)
     lockfile = open(path + '.lock', 'w')
     fcntl.flock(lockfile, fcntl.LOCK_EX)
     try:
@@ -86,11 +98,11 @@ def tarball_extract(tarball, path):
 
         for f in CollectionReader(tarball).all_files():
             if re.search('\.(tbz|tar.bz2)$', f.name()):
-                p = util.tar_extractor(path, 'j')
+                p = tar_extractor(path, 'j')
             elif re.search('\.(tgz|tar.gz)$', f.name()):
-                p = util.tar_extractor(path, 'z')
+                p = tar_extractor(path, 'z')
             elif re.search('\.tar$', f.name()):
-                p = util.tar_extractor(path, '')
+                p = tar_extractor(path, '')
             else:
                 raise errors.AssertionError(
                     "tarball_extract cannot handle filename %s" % f.name())
@@ -123,7 +135,7 @@ def zipball_extract(zipball, path):
     path -- where to extract the archive: absolute, or relative to job tmp
     """
     if not re.search('^/', path):
-        path = os.path.join(current_job().tmpdir, path)
+        path = os.path.join(arvados.current_job().tmpdir, path)
     lockfile = open(path + '.lock', 'w')
     fcntl.flock(lockfile, fcntl.LOCK_EX)
     try:
@@ -157,7 +169,7 @@ def zipball_extract(zipball, path):
                     break
                 zip_file.write(buf)
             zip_file.close()
-            
+
             p = subprocess.Popen(["unzip",
                                   "-q", "-o",
                                   "-d", path,
@@ -192,7 +204,7 @@ def collection_extract(collection, path, files=[], decompress=True):
     else:
         collection_hash = hashlib.md5(collection).hexdigest()
     if not re.search('^/', path):
-        path = os.path.join(current_job().tmpdir, path)
+        path = os.path.join(arvados.current_job().tmpdir, path)
     lockfile = open(path + '.lock', 'w')
     fcntl.flock(lockfile, fcntl.LOCK_EX)
     try:
@@ -225,7 +237,7 @@ def collection_extract(collection, path, files=[], decompress=True):
                 files_got += [outname]
                 if os.path.exists(os.path.join(path, stream_name, outname)):
                     continue
-                util.mkdir_dash_p(os.path.dirname(os.path.join(path, stream_name, outname)))
+                mkdir_dash_p(os.path.dirname(os.path.join(path, stream_name, outname)))
                 outfile = open(os.path.join(path, stream_name, outname), 'wb')
                 for buf in (f.readall_decompressed() if decompress
                             else f.readall()):
@@ -242,13 +254,16 @@ def collection_extract(collection, path, files=[], decompress=True):
     return path
 
 def mkdir_dash_p(path):
-    if not os.path.exists(path):
-        util.mkdir_dash_p(os.path.dirname(path))
+    if not os.path.isdir(path):
         try:
-            os.mkdir(path)
-        except OSError:
-            if not os.path.exists(path):
-                os.mkdir(path)
+            os.makedirs(path)
+        except OSError as e:
+            if e.errno == errno.EEXIST and os.path.isdir(path):
+                # It is not an error if someone else creates the
+                # directory between our exists() and makedirs() calls.
+                pass
+            else:
+                raise
 
 def stream_extract(stream, path, files=[], decompress=True):
     """Retrieve a stream from Keep and extract it to a local
@@ -259,7 +274,7 @@ def stream_extract(stream, path, files=[], decompress=True):
     path -- where to extract: absolute, or relative to job tmp
     """
     if not re.search('^/', path):
-        path = os.path.join(current_job().tmpdir, path)
+        path = os.path.join(arvados.current_job().tmpdir, path)
     lockfile = open(path + '.lock', 'w')
     fcntl.flock(lockfile, fcntl.LOCK_EX)
     try:
@@ -277,7 +292,7 @@ def stream_extract(stream, path, files=[], decompress=True):
             files_got += [outname]
             if os.path.exists(os.path.join(path, outname)):
                 os.unlink(os.path.join(path, outname))
-            util.mkdir_dash_p(os.path.dirname(os.path.join(path, outname)))
+            mkdir_dash_p(os.path.dirname(os.path.join(path, outname)))
             outfile = open(os.path.join(path, outname), 'wb')
             for buf in (f.readall_decompressed() if decompress
                         else f.readall()):
@@ -296,7 +311,40 @@ def listdir_recursive(dirname, base=None):
         ent_path = os.path.join(dirname, ent)
         ent_base = os.path.join(base, ent) if base else ent
         if os.path.isdir(ent_path):
-            allfiles += util.listdir_recursive(ent_path, ent_base)
+            allfiles += listdir_recursive(ent_path, ent_base)
         else:
             allfiles += [ent_base]
     return allfiles
+
+def is_hex(s, *length_args):
+    """is_hex(s[, length[, max_length]]) -> boolean
+
+    Return True if s is a string of hexadecimal digits.
+    If one length argument is given, the string must contain exactly
+    that number of digits.
+    If two length arguments are given, the string must contain a number of
+    digits between those two lengths, inclusive.
+    Return False otherwise.
+    """
+    num_length_args = len(length_args)
+    if num_length_args > 2:
+        raise ArgumentError("is_hex accepts up to 3 arguments ({} given)".
+                            format(1 + num_length_args))
+    elif num_length_args == 2:
+        good_len = (length_args[0] <= len(s) <= length_args[1])
+    elif num_length_args == 1:
+        good_len = (len(s) == length_args[0])
+    else:
+        good_len = True
+    return bool(good_len and HEX_RE.match(s))
+
+def list_all(fn, num_retries=0, **kwargs):
+    items = []
+    offset = 0
+    items_available = sys.maxint
+    while len(items) < items_available:
+        c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
+        items += c['items']
+        items_available = c['items_available']
+        offset = c['offset'] + len(c['items'])
+    return items