X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4de2c25d6bd0974b456cbe1f5c2f0ff91a15f087..42377686908783fb9d043e616f099e153f7834bc:/sdk/python/bin/arv-put diff --git a/sdk/python/bin/arv-put b/sdk/python/bin/arv-put index a47de3063e..eaeecfb2f1 100755 --- a/sdk/python/bin/arv-put +++ b/sdk/python/bin/arv-put @@ -1,199 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 -# TODO: -# --md5sum - display md5 of each file as read from disk - -import argparse -import os -import sys - -parser = argparse.ArgumentParser( - description='Copy data from the local filesystem to Keep.') -parser.add_argument('paths', metavar='path', type=str, nargs='*', - help=""" -Local file or directory. Default: read from standard input. -""") -parser.add_argument('--max-manifest-depth', type=int, metavar='N', default=-1, - help=""" -Maximum depth of directory tree to represent in the manifest -structure. A directory structure deeper than this will be represented -as a single stream in the manifest. If N=0, the manifest will contain -a single stream. Default: -1 (unlimited), i.e., exactly one manifest -stream per filesystem directory that contains files. -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('--as-stream', action='store_true', dest='stream', - help=""" -Synonym for --stream. -""") -group.add_argument('--stream', action='store_true', - help=""" -Store the file content and display the resulting manifest on -stdout. Do not write the manifest to Keep or save a Collection object -in Arvados. -""") -group.add_argument('--as-manifest', action='store_true', dest='manifest', - help=""" -Synonym for --manifest. -""") -group.add_argument('--in-manifest', action='store_true', dest='manifest', - help=""" -Synonym for --manifest. -""") -group.add_argument('--manifest', action='store_true', - help=""" -Store the file data and resulting manifest in Keep, save a Collection -object in Arvados, and display the manifest locator (Collection uuid) -on stdout. This is the default behavior. -""") -group.add_argument('--as-raw', action='store_true', dest='raw', - help=""" -Synonym for --raw. -""") -group.add_argument('--raw', action='store_true', - help=""" -Store the file content and display the data block locators on stdout, -separated by commas, with a trailing newline. Do not store a -manifest. -""") -parser.add_argument('--use-filename', type=str, default=None, dest='filename', - help=""" -Synonym for --filename. -""") -parser.add_argument('--filename', type=str, default=None, - help=""" -Use the given filename in the manifest, instead of the name of the -local file. This is useful when "-" or "/dev/stdin" is given as an -input file. It can be used only if there is exactly one path given and -it is not a directory. Implies --manifest. -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('--progress', action='store_true', - help=""" -Display human-readable progress on stderr (bytes and, if possible, -percentage of total data size). This is the default behavior when -stderr is a tty. -""") -group.add_argument('--no-progress', action='store_true', - help=""" -Do not display human-readable progress on stderr, even if stderr is a -tty. -""") -group.add_argument('--batch-progress', action='store_true', - help=""" -Display machine-readable progress on stderr (bytes and, if known, -total data size). -""") - -args = parser.parse_args() - -if len(args.paths) == 0: - args.paths += ['/dev/stdin'] - -if len(args.paths) != 1 or os.path.isdir(args.paths[0]): - if args.filename: - parser.error(""" ---filename argument cannot be used when storing a directory or -multiple files. -""") - -# Turn on --progress by default if stderr is a tty. -if (not (args.batch_progress or args.no_progress) - and os.isatty(sys.stderr.fileno())): - args.progress = True - - -import arvados -import re -import string - -class CollectionWriterWithProgress(arvados.CollectionWriter): - def flush_data(self, *args, **kwargs): - if not getattr(self, 'display_type', None): - return - if not hasattr(self, 'bytes_flushed'): - self.bytes_flushed = 0 - self.bytes_flushed += self._data_buffer_len - super(CollectionWriterWithProgress, self).flush_data(*args, **kwargs) - self.bytes_flushed -= self._data_buffer_len - if self.display_type == 'machine': - sys.stderr.write('%s %d: %d written %d total\n' % - (sys.argv[0], - os.getpid(), - self.bytes_flushed, - getattr(self, 'bytes_expected', -1))) - elif getattr(self, 'bytes_expected', 0) > 0: - pct = 100.0 * self.bytes_flushed / self.bytes_expected - sys.stderr.write('\r%dM / %dM %.1f%% ' % - (self.bytes_flushed >> 20, - self.bytes_expected >> 20, pct)) - else: - sys.stderr.write('\r%d ' % self.bytes_flushed) - def manifest_text(self, *args, **kwargs): - manifest_text = (super(CollectionWriterWithProgress, self) - .manifest_text(*args, **kwargs)) - if getattr(self, 'display_type', None): - if self.display_type == 'human': - sys.stderr.write('\n') - self.display_type = None - return manifest_text - -if args.progress: - writer = CollectionWriterWithProgress() - writer.display_type = 'human' -elif args.batch_progress: - writer = CollectionWriterWithProgress() - writer.display_type = 'machine' -else: - writer = arvados.CollectionWriter() - -if args.paths == ['-']: - args.paths = ['/dev/stdin'] - if not args.filename: - args.filename = '-' - -# Walk the given directory trees and stat files, adding up file sizes, -# so we can display progress as percent -writer.bytes_expected = 0 -for path in args.paths: - if os.path.isdir(path): - for filename in arvados.util.listdir_recursive(path): - writer.bytes_expected += os.path.getsize( - os.path.join(path, filename)) - elif not os.path.isfile(path): - del writer.bytes_expected - break - else: - writer.bytes_expected += os.path.getsize(path) - -# Copy file data to Keep. -for path in args.paths: - if os.path.isdir(path): - writer.write_directory_tree(path, - max_manifest_depth=args.max_manifest_depth) - else: - writer.start_new_stream() - writer.start_new_file(args.filename or os.path.split(path)[1]) - with open(path, 'rb') as f: - while True: - buf = f.read(2**26) - if len(buf) == 0: - break - writer.write(buf) - -if args.stream: - print writer.manifest_text(), -elif args.raw: - writer.finish_current_stream() - print string.join(writer.data_locators(), ',') -else: - # Register the resulting collection in Arvados. - arvados.api().collections().create( - body={ - 'uuid': writer.finish(), - 'manifest_text': writer.manifest_text(), - }, - ).execute() - - # Print the locator (uuid) of the new collection. - print writer.finish() +from arvados.commands.put import main +main()