X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fb429aa6a8dd1d28d08038abd8de8b9206a1d51e..HEAD:/sdk/python/arvados/commands/put.py diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index be7cd629c9..65e0fcce1d 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -2,10 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from __future__ import division -from future.utils import listitems, listvalues -from builtins import str -from builtins import object import argparse import arvados import arvados.collection @@ -30,12 +26,15 @@ import threading import time import traceback -from apiclient import errors as apiclient_errors -from arvados._version import __version__ -from arvados.util import keep_locator_pattern +from pathlib import Path +import arvados.util import arvados.commands._util as arv_cmd +from apiclient import errors as apiclient_errors +from arvados._internal import basedirs +from arvados._version import __version__ + api_client = None upload_opts = argparse.ArgumentParser(add_help=False) @@ -141,7 +140,10 @@ physical storage devices (e.g., disks) should have a copy of each data block. Default is to use the server-provided default (if any) or 2. """) -upload_opts.add_argument('--storage-classes', help=""" +upload_opts.add_argument( + '--storage-classes', + type=arv_cmd.UniqueSplit(), + help=""" Specify comma separated list of storage classes to be used when saving data to Keep. """) @@ -355,7 +357,7 @@ class ArvPutLogFormatter(logging.Formatter): class ResumeCache(object): - CACHE_DIR = '.cache/arvados/arv-put' + CACHE_DIR = 'arv-put' def __init__(self, file_spec): self.cache_file = open(file_spec, 'a+') @@ -372,9 +374,14 @@ class ResumeCache(object): md5.update(b'-1') elif args.filename: md5.update(args.filename.encode()) - return os.path.join( - arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'), - md5.hexdigest()) + cache_path = Path(cls.CACHE_DIR) + if len(cache_path.parts) == 1: + cache_path = basedirs.BaseDirectories('CACHE').storage_path(cache_path) + else: + # Note this is a noop if cache_path is absolute, which is what we want. + cache_path = Path.home() / cache_path + cache_path.mkdir(parents=True, exist_ok=True, mode=0o700) + return str(cache_path / md5.hexdigest()) def _lock_file(self, fileobj): try: @@ -437,7 +444,7 @@ class ResumeCache(object): class ArvPutUploadJob(object): - CACHE_DIR = '.cache/arvados/arv-put' + CACHE_DIR = 'arv-put' EMPTY_STATE = { 'manifest' : None, # Last saved manifest checkpoint 'files' : {} # Previous run file list: {path : {size, mtime}} @@ -696,7 +703,7 @@ class ArvPutUploadJob(object): Recursively get the total size of the collection """ size = 0 - for item in listvalues(collection): + for item in collection.values(): if isinstance(item, arvados.collection.Collection) or isinstance(item, arvados.collection.Subcollection): size += self._collection_size(item) else: @@ -863,11 +870,14 @@ class ArvPutUploadJob(object): md5.update(b'\0'.join([p.encode() for p in realpaths])) if self.filename: md5.update(self.filename.encode()) - cache_filename = md5.hexdigest() - cache_filepath = os.path.join( - arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'), - cache_filename) - return cache_filepath + cache_path = Path(self.CACHE_DIR) + if len(cache_path.parts) == 1: + cache_path = basedirs.BaseDirectories('CACHE').storage_path(cache_path) + else: + # Note this is a noop if cache_path is absolute, which is what we want. + cache_path = Path.home() / cache_path + cache_path.mkdir(parents=True, exist_ok=True, mode=0o700) + return str(cache_path / md5.hexdigest()) def _setup_state(self, update_collection): """ @@ -946,7 +956,7 @@ class ArvPutUploadJob(object): oldest_exp = None oldest_loc = None block_found = False - for m in keep_locator_pattern.finditer(self._state['manifest']): + for m in arvados.util.keep_locator_pattern.finditer(self._state['manifest']): loc = m.group(0) try: exp = datetime.datetime.utcfromtimestamp(int(loc.split('@')[1], 16)) @@ -978,7 +988,7 @@ class ArvPutUploadJob(object): def collection_file_paths(self, col, path_prefix='.'): """Return a list of file paths by recursively go through the entire collection `col`""" file_paths = [] - for name, item in listitems(col): + for name, item in col.items(): if isinstance(item, arvados.arvfile.ArvadosFile): file_paths.append(os.path.join(path_prefix, name)) elif isinstance(item, arvados.collection.Subcollection): @@ -1058,7 +1068,7 @@ class ArvPutUploadJob(object): locators.append(loc) return locators elif isinstance(item, arvados.collection.Collection): - l = [self._datablocks_on_item(x) for x in listvalues(item)] + l = [self._datablocks_on_item(x) for x in item.values()] # Fast list flattener method taken from: # http://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python return [loc for sublist in l for loc in sublist] @@ -1136,7 +1146,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, logging.getLogger('arvados').handlers[0].setFormatter(formatter) if api_client is None: - api_client = arvados.api('v1', request_id=request_id) + api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries) if install_sig_handlers: arv_cmd.install_signal_handlers() @@ -1213,11 +1223,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, else: reporter = None - # Split storage-classes argument - storage_classes = None - if args.storage_classes: - storage_classes = args.storage_classes.strip().replace(' ', '').split(',') - # Setup exclude regex from all the --exclude arguments provided name_patterns = [] exclude_paths = [] @@ -1276,7 +1281,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, owner_uuid = project_uuid, ensure_unique_name = True, update_collection = args.update_collection, - storage_classes=storage_classes, + storage_classes=args.storage_classes, logger=logger, dry_run=args.dry_run, follow_links=args.follow_links,