import sys
import logging
import tempfile
+import urlparse
import arvados
import arvados.config
help='Perform copy even if the object appears to exist at the remote destination.')
copy_opts.add_argument(
'--src', dest='source_arvados', required=True,
- help='The name of the source Arvados instance (required). May be either a pathname to a config file, or the basename of a file in $HOME/.config/arvados/instance_name.conf.')
+ help='The name of the source Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.')
copy_opts.add_argument(
'--dst', dest='destination_arvados', required=True,
- help='The name of the destination Arvados instance (required). May be either a pathname to a config file, or the basename of a file in $HOME/.config/arvados/instance_name.conf.')
+ help='The name of the destination Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.')
copy_opts.add_argument(
'--recursive', dest='recursive', action='store_true',
help='Recursively copy any dependencies for this object. (default)')
copy_opts.add_argument(
'--project-uuid', dest='project_uuid',
help='The UUID of the project at the destination to which the pipeline should be copied.')
+ copy_opts.add_argument(
+ '--allow-git-http-src', action="store_true",
+ help='Allow cloning git repositories over insecure http')
+ copy_opts.add_argument(
+ '--allow-git-http-dst', action="store_true",
+ help='Allow pushing git repositories over insecure http')
+
copy_opts.add_argument(
'object_uuid',
help='The UUID of the object to be copied.')
abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
return client
+# Check if git is available
+def check_git_availability():
+ try:
+ arvados.util.run_command(['git', '--help'])
+ except Exception:
+ abort('git command is not available. Please ensure git is installed.')
+
# copy_pipeline_instance(pi_uuid, src, dst, args)
#
# Copies a pipeline instance identified by pi_uuid from src to dst.
pi = src.pipeline_instances().get(uuid=pi_uuid).execute(num_retries=args.retries)
if args.recursive:
+ check_git_availability()
+
if not args.dst_git_repo:
abort('--dst-git-repo is required when copying a pipeline recursively.')
# Copy the pipeline template and save the copied template.
pt = src.pipeline_templates().get(uuid=pt_uuid).execute(num_retries=args.retries)
if args.recursive:
+ check_git_availability()
+
if not args.dst_git_repo:
abort('--dst-git-repo is required when copying a pipeline recursively.')
# Copy input collections, docker images and git repos.
obj = arvados.util.portable_data_hash_pattern.sub(copy_collection_fn, obj)
obj = arvados.util.collection_uuid_pattern.sub(copy_collection_fn, obj)
return obj
- elif type(obj) == dict:
- return {v: copy_collections(obj[v], src, dst, args) for v in obj}
- elif type(obj) == list:
- return [copy_collections(v, src, dst, args) for v in obj]
+ elif isinstance(obj, dict):
+ return type(obj)((v, copy_collections(obj[v], src, dst, args))
+ for v in obj)
+ elif isinstance(obj, list):
+ return type(obj)(copy_collections(v, src, dst, args) for v in obj)
return obj
def migrate_jobspec(jobspec, src, dst, dst_repo, args):
else:
progress_writer = None
- for line in manifest.splitlines(True):
+ for line in manifest.splitlines():
words = line.split()
- dst_manifest_line = words[0]
+ dst_manifest += words[0]
for word in words[1:]:
try:
loc = arvados.KeepLocator(word)
- blockhash = loc.md5sum
- # copy this block if we haven't seen it before
- # (otherwise, just reuse the existing dst_locator)
- if blockhash not in dst_locators:
- logger.debug("Copying block %s (%s bytes)", blockhash, loc.size)
- if progress_writer:
- progress_writer.report(obj_uuid, bytes_written, bytes_expected)
- data = src_keep.get(word)
- dst_locator = dst_keep.put(data)
- dst_locators[blockhash] = dst_locator
- bytes_written += loc.size
- dst_manifest_line += ' ' + dst_locators[blockhash]
except ValueError:
# If 'word' can't be parsed as a locator,
# presume it's a filename.
- dst_manifest_line += ' ' + word
- dst_manifest += dst_manifest_line
- if line.endswith("\n"):
- dst_manifest += "\n"
+ dst_manifest += ' ' + word
+ continue
+ blockhash = loc.md5sum
+ # copy this block if we haven't seen it before
+ # (otherwise, just reuse the existing dst_locator)
+ if blockhash not in dst_locators:
+ logger.debug("Copying block %s (%s bytes)", blockhash, loc.size)
+ if progress_writer:
+ progress_writer.report(obj_uuid, bytes_written, bytes_expected)
+ data = src_keep.get(word)
+ dst_locator = dst_keep.put(data)
+ dst_locators[blockhash] = dst_locator
+ bytes_written += loc.size
+ dst_manifest += ' ' + dst_locators[blockhash]
+ dst_manifest += "\n"
if progress_writer:
progress_writer.report(obj_uuid, bytes_written, bytes_expected)
# Copy the manifest and save the collection.
logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest)
- dst_keep.put(dst_manifest.encode('utf-8'))
c['manifest_text'] = dst_manifest
return create_collection_from(c, src, dst, args)
+def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_http_opt):
+ r = api.repositories().list(
+ filters=[['name', '=', repo_name]]).execute(num_retries=retries)
+ if r['items_available'] != 1:
+ raise Exception('cannot identify repo {}; {} repos found'
+ .format(repo_name, r['items_available']))
+
+ https_url = [c for c in r['items'][0]["clone_urls"] if c.startswith("https:")]
+ http_url = [c for c in r['items'][0]["clone_urls"] if c.startswith("http:")]
+ other_url = [c for c in r['items'][0]["clone_urls"] if not c.startswith("http")]
+
+ priority = https_url + other_url + http_url
+
+ git_config = []
+ git_url = None
+ for url in priority:
+ if url.startswith("http"):
+ u = urlparse.urlsplit(url)
+ baseurl = urlparse.urlunsplit((u.scheme, u.netloc, "", "", ""))
+ git_config = ["-c", "credential.%s/.username=none" % baseurl,
+ "-c", "credential.%s/.helper=!cred(){ cat >/dev/null; if [ \"$1\" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred" % baseurl]
+ else:
+ git_config = []
+
+ try:
+ logger.debug("trying %s", url)
+ arvados.util.run_command(["git"] + git_config + ["ls-remote", url],
+ env={"HOME": os.environ["HOME"],
+ "ARVADOS_API_TOKEN": api.api_token,
+ "GIT_ASKPASS": "/bin/false"})
+ except arvados.errors.CommandFailedError:
+ pass
+ else:
+ git_url = url
+ break
+
+ if not git_url:
+ raise Exception('Cannot access git repository, tried {}'
+ .format(priority))
+
+ if git_url.startswith("http:"):
+ if allow_insecure_http:
+ logger.warn("Using insecure git url %s but will allow this because %s", git_url, allow_insecure_http_opt)
+ else:
+ raise Exception("Refusing to use insecure git url %s, use %s if you really want this." % (git_url, allow_insecure_http_opt))
+
+ return (git_url, git_config)
+
+
# copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args)
#
# Copies commits from git repository 'src_git_repo' on Arvados
#
def copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args):
# Identify the fetch and push URLs for the git repositories.
- r = src.repositories().list(
- filters=[['name', '=', src_git_repo]]).execute(num_retries=args.retries)
- if r['items_available'] != 1:
- raise Exception('cannot identify source repo {}; {} repos found'
- .format(src_git_repo, r['items_available']))
- src_git_url = r['items'][0]['fetch_url']
- logger.debug('src_git_url: {}'.format(src_git_url))
- r = dst.repositories().list(
- filters=[['name', '=', dst_git_repo]]).execute(num_retries=args.retries)
- if r['items_available'] != 1:
- raise Exception('cannot identify destination repo {}; {} repos found'
- .format(dst_git_repo, r['items_available']))
- dst_git_push_url = r['items'][0]['push_url']
- logger.debug('dst_git_push_url: {}'.format(dst_git_push_url))
+ (src_git_url, src_git_config) = select_git_url(src, src_git_repo, args.retries, args.allow_git_http_src, "--allow-git-http-src")
+ (dst_git_url, dst_git_config) = select_git_url(dst, dst_git_repo, args.retries, args.allow_git_http_dst, "--allow-git-http-dst")
+
+ logger.debug('src_git_url: {}'.format(src_git_url))
+ logger.debug('dst_git_url: {}'.format(dst_git_url))
dst_branch = re.sub(r'\W+', '_', "{}_{}".format(src_git_url, script_version))
if src_git_repo not in local_repo_dir:
local_repo_dir[src_git_repo] = tempfile.mkdtemp()
arvados.util.run_command(
- ["git", "clone", "--bare", src_git_url,
+ ["git"] + src_git_config + ["clone", "--bare", src_git_url,
local_repo_dir[src_git_repo]],
- cwd=os.path.dirname(local_repo_dir[src_git_repo]))
+ cwd=os.path.dirname(local_repo_dir[src_git_repo]),
+ env={"HOME": os.environ["HOME"],
+ "ARVADOS_API_TOKEN": src.api_token,
+ "GIT_ASKPASS": "/bin/false"})
arvados.util.run_command(
- ["git", "remote", "add", "dst", dst_git_push_url],
+ ["git", "remote", "add", "dst", dst_git_url],
cwd=local_repo_dir[src_git_repo])
arvados.util.run_command(
["git", "branch", dst_branch, script_version],
cwd=local_repo_dir[src_git_repo])
- arvados.util.run_command(["git", "push", "dst", dst_branch],
- cwd=local_repo_dir[src_git_repo])
+ arvados.util.run_command(["git"] + dst_git_config + ["push", "dst", dst_branch],
+ cwd=local_repo_dir[src_git_repo],
+ env={"HOME": os.environ["HOME"],
+ "ARVADOS_API_TOKEN": dst.api_token,
+ "GIT_ASKPASS": "/bin/false"})
def copy_docker_images(pipeline, src, dst, args):
"""Copy any docker images named in the pipeline components'