X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/da85cb6ee25ca7d9efd0cf39005047a02806e98e..dd2a958d4731695144bb7add675fa14bf590d72a:/sdk/cli/bin/arv-put diff --git a/sdk/cli/bin/arv-put b/sdk/cli/bin/arv-put deleted file mode 100755 index ebfc5cce46..0000000000 --- a/sdk/cli/bin/arv-put +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -# TODO: -# --md5sum - display md5 of each file as read from disk - -import argparse -import os -import sys - -parser = argparse.ArgumentParser( - description='Copy data from the local filesystem to Keep.') -parser.add_argument('paths', metavar='path', type=str, nargs='+', - help='Local file or directory.') -parser.add_argument('--max-manifest-depth', type=int, metavar='N', default=-1, - help=""" -Maximum depth of directory tree to represent in the manifest -structure. A directory structure deeper than this will be represented -as a single stream in the manifest. If N=0, the manifest will contain -a single stream. Default: -1 (unlimited), i.e., exactly one manifest -stream per filesystem directory that contains files. -""") -parser.add_argument('--as-stream', action='store_true', - help=""" -Store the file content and display the resulting manifest on -stdout. Do not write the manifest to Keep or save a Collection object -in Arvados. -""") -parser.add_argument('--as-manifest', action='store_const', - dest='as_stream', const=False, default=False, - help=""" -Store the file data and resulting manifest in Keep, save a Collection -object in Arvados, and display the manifest locator (Collection uuid) -on stdout. This is the default behavior. -""") -parser.add_argument('--filename', type=str, default=None, - help=""" -Use the given filename in the manifest, instead of the name of the -local file. This is useful when "-" or "/dev/stdin" is given as an -input file. It can be used only if there is exactly one path given and -it is not a directory. -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('--progress', action='store_true', - help=""" -Display human-readable progress on stderr (bytes and, if possible, -percentage of total data size). This is the default behavior when -stderr is a tty. -""") -group.add_argument('--no-progress', action='store_true', - help=""" -Do not display human-readable progress on stderr, even if stderr is a -tty. -""") -group.add_argument('--batch-progress', action='store_true', - help=""" -Display machine-readable progress on stderr (bytes and, if known, -total data size). -""") - -args = parser.parse_args() - -if args.filename and (len(args.paths) != 1 or os.path.isdir(args.paths[0])): - parser.error(""" ---filename argument cannot be used when storing a directory or -multiple files. -""") - -# Turn on --progress by default if stderr is a tty. -if (not (args.batch_progress or args.no_progress) - and os.isatty(sys.stderr.fileno())): - args.progress = True - - -import re -import arvados - -class CollectionWriterWithProgress(arvados.CollectionWriter): - def flush_data(self, *args, **kwargs): - if not getattr(self, 'display_type', None): - return - if not hasattr(self, 'bytes_flushed'): - self.bytes_flushed = 0 - self.bytes_flushed += self._data_buffer_len - super(CollectionWriterWithProgress, self).flush_data(*args, **kwargs) - self.bytes_flushed -= self._data_buffer_len - if self.display_type == 'machine': - sys.stderr.write('%s %d: %d written %d total\n' % - (sys.argv[0], - os.getpid(), - self.bytes_flushed, - getattr(self, 'bytes_expected', -1))) - elif getattr(self, 'bytes_expected', 0) > 0: - pct = 100.0 * self.bytes_flushed / self.bytes_expected - sys.stderr.write('\r%dM / %dM %.1f%% ' % - (self.bytes_flushed >> 20, - self.bytes_expected >> 20, pct)) - else: - sys.stderr.write('\r%d ' % self.bytes_flushed) - def manifest_text(self, *args, **kwargs): - manifest_text = (super(CollectionWriterWithProgress, self) - .manifest_text(*args, **kwargs)) - if getattr(self, 'display_type', None): - if self.display_type == 'human': - sys.stderr.write('\n') - self.display_type = None - return manifest_text - -if args.progress: - writer = CollectionWriterWithProgress() - writer.display_type = 'human' -elif args.batch_progress: - writer = CollectionWriterWithProgress() - writer.display_type = 'machine' -else: - writer = arvados.CollectionWriter() - -args.paths = [('/dev/stdin' if p=='-' else p) for p in args.paths] - -# Walk the given directory trees and stat files, adding up file sizes, -# so we can display progress as percent -writer.bytes_expected = 0 -for path in args.paths: - if os.path.isdir(path): - for filename in arvados.util.listdir_recursive(path): - writer.bytes_expected += os.path.getsize( - os.path.join(path, filename)) - elif not os.path.isfile(path): - del writer.bytes_expected - break - else: - writer.bytes_expected += os.path.getsize(path) - -# Copy file data to Keep. -for path in args.paths: - if os.path.isdir(path): - writer.write_directory_tree(path, - max_manifest_depth=args.max_manifest_depth) - else: - writer.start_new_stream() - writer.start_new_file(args.filename or os.path.split(path)[1]) - with open(path, 'rb') as f: - while True: - buf = f.read(2**26) - if len(buf) == 0: - break - writer.write(buf) - -if args.as_stream: - print writer.manifest_text(), -else: - # Register the resulting collection in Arvados. - arvados.api().collections().create( - body={ - 'uuid': writer.finish(), - 'manifest_text': writer.manifest_text(), - }, - ).execute() - - # Print the locator (uuid) of the new collection. - print writer.finish() diff --git a/sdk/cli/bin/arv-put b/sdk/cli/bin/arv-put new file mode 120000 index 0000000000..487caf4c80 --- /dev/null +++ b/sdk/cli/bin/arv-put @@ -0,0 +1 @@ +../../python/bin/arv-put \ No newline at end of file