+++ /dev/null
-#!/usr/bin/env python
-
-import argparse
-import hashlib
-import os
-import re
-import string
-import sys
-import logging
-
-logger = logging.getLogger(os.path.basename(sys.argv[0]))
-
-parser = argparse.ArgumentParser(
- description='Copy data from Keep to a local file or pipe.')
-parser.add_argument('locator', type=str,
- help="""
-Collection locator, optionally with a file path or prefix.
-""")
-parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
- help="""
-Local file or directory where the data is to be written. Default:
-/dev/stdout.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--progress', action='store_true',
- help="""
-Display human-readable progress on stderr (bytes and, if possible,
-percentage of total data size). This is the default behavior when
-stderr is a tty and stdout is not a tty.
-""")
-group.add_argument('--no-progress', action='store_true',
- help="""
-Do not display human-readable progress on stderr.
-""")
-group.add_argument('--batch-progress', action='store_true',
- help="""
-Display machine-readable progress on stderr (bytes and, if known,
-total data size).
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--hash',
- help="""
-Display the hash of each file as it is read from Keep, using the given
-hash algorithm. Supported algorithms include md5, sha1, sha224,
-sha256, sha384, and sha512.
-""")
-group.add_argument('--md5sum', action='store_const',
- dest='hash', const='md5',
- help="""
-Display the MD5 hash of each file as it is read from Keep.
-""")
-parser.add_argument('-n', action='store_true',
- help="""
-Do not write any data -- just read from Keep, and report md5sums if
-requested.
-""")
-parser.add_argument('-r', action='store_true',
- help="""
-Retrieve all files in the specified collection/prefix. This is the
-default behavior if the "locator" argument ends with a forward slash.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('-f', action='store_true',
- help="""
-Overwrite existing files while writing. The default behavior is to
-refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to /dev/stdout.
-""")
-group.add_argument('--skip-existing', action='store_true',
- help="""
-Skip files that already exist. The default behavior is to refuse to
-write *anything* if any files exist that would have to be
-overwritten. This option causes even devices, sockets, and fifos to be
-skipped.
-""")
-
-args = parser.parse_args()
-
-if args.locator[-1] == os.sep:
- args.r = True
-if (args.r and
- not args.n and
- not (args.destination and
- os.path.isdir(args.destination))):
- parser.error('Destination is not a directory.')
-if not args.r and (os.path.isdir(args.destination) or
- args.destination[-1] == os.path.sep):
- args.destination = os.path.join(args.destination,
- os.path.basename(args.locator))
- logger.debug("Appended source file name to destination directory: %s" %
- args.destination)
-
-# Turn on --progress by default if stderr is a tty and stdout isn't.
-if (not (args.batch_progress or args.no_progress)
- and os.isatty(sys.stderr.fileno())
- and not os.isatty(sys.stdout.fileno())):
- args.progress = True
-
-if args.destination == '-':
- args.destination = '/dev/stdout'
-if args.destination == '/dev/stdout':
- # Normally you have to use -f to write to a file (or device) that
- # already exists, but "-" and "/dev/stdout" are common enough to
- # merit a special exception.
- args.f = True
-else:
- args.destination = args.destination.rstrip(os.sep)
-
-
-import arvados
-
-r = re.search(r'^(.*?)(/.*)?$', args.locator)
-collection = r.group(1)
-get_prefix = r.group(2)
-if args.r and not get_prefix:
- get_prefix = os.sep
-
-todo = []
-todo_bytes = 0
-if not get_prefix:
- try:
- if not args.n:
- if not args.f and os.path.exists(args.destination):
- logger.error('Local file %s already exists' % args.destination)
- sys.exit(1)
- with open(args.destination, 'wb') as f:
- f.write(arvados.Keep.get(collection))
- sys.exit(0)
- except arvados.errors.NotFoundError as e:
- logger.error(e)
- sys.exit(1)
-
-reader = arvados.CollectionReader(collection)
-
-# Scan the collection. Make an array of (stream, file, local
-# destination filename) tuples, and add up total size to extract.
-
-try:
- for s in reader.all_streams():
- for f in s.all_files():
- if get_prefix and get_prefix[-1] == os.sep:
- if 0 != string.find(os.path.join(s.name(), f.name()),
- '.' + get_prefix):
- continue
- dest_path = os.path.join(
- args.destination,
- os.path.join(s.name(), f.name())[len(get_prefix)+1:])
- if (not (args.f or args.skip_existing) and
- os.path.exists(dest_path)):
- logger.error('Local file %s already exists' % dest_path)
- sys.exit(1)
- else:
- if os.path.join(s.name(), f.name()) != '.' + get_prefix:
- continue
- dest_path = args.destination
- todo += [(s, f, dest_path)]
- todo_bytes += f.size()
-except arvados.errors.NotFoundError as e:
- logger.error(e)
- sys.exit(1)
-
-# Read data, and (if not -n) write to local file(s) or pipe.
-
-out_bytes = 0
-for s,f,outfilename in todo:
- outfile = None
- digestor = None
- if not args.n:
- if args.skip_existing and os.path.exists(outfilename):
- logger.debug('Local file %s exists. Skipping.' % outfilename)
- continue
- elif not args.f and (os.path.isfile(outfilename) or
- os.path.isdir(outfilename)):
- # Good thing we looked again: apparently this file wasn't
- # here yet when we checked earlier.
- logger.error('Local file %s already exists' % outfilename)
- sys.exit(1)
- if args.r:
- arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
- try:
- outfile = open(outfilename, 'wb')
- except Exception as e:
- logger.error('Open(%s) failed: %s' % (outfilename, e))
- sys.exit(1)
- if args.hash:
- digestor = hashlib.new(args.hash)
- try:
- for data in f.readall():
- if outfile:
- outfile.write(data)
- if digestor:
- digestor.update(data)
- out_bytes += len(data)
- if args.progress:
- sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
- (out_bytes >> 20,
- todo_bytes >> 20,
- (100
- if todo_bytes==0
- else 100.0*out_bytes/todo_bytes)))
- elif args.batch_progress:
- sys.stderr.write('%s %d read %d total\n' %
- (sys.argv[0], os.getpid(),
- out_bytes, todo_bytes))
- if digestor:
- sys.stderr.write("%s %s/%s\n"
- % (digestor.hexdigest(), s.name(), f.name()))
- except KeyboardInterrupt:
- if outfile:
- os.unlink(outfilename)
- break
-
-if args.progress:
- sys.stderr.write('\n')
--- /dev/null
+../../python/bin/arv-get
\ No newline at end of file
+++ /dev/null
-#!/usr/bin/env python
-
-# TODO:
-# --md5sum - display md5 of each file as read from disk
-
-import argparse
-import os
-import sys
-
-parser = argparse.ArgumentParser(
- description='Copy data from the local filesystem to Keep.')
-parser.add_argument('paths', metavar='path', type=str, nargs='*',
- help="""
-Local file or directory. Default: read from standard input.
-""")
-parser.add_argument('--max-manifest-depth', type=int, metavar='N', default=-1,
- help="""
-Maximum depth of directory tree to represent in the manifest
-structure. A directory structure deeper than this will be represented
-as a single stream in the manifest. If N=0, the manifest will contain
-a single stream. Default: -1 (unlimited), i.e., exactly one manifest
-stream per filesystem directory that contains files.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--as-stream', action='store_true', dest='stream',
- help="""
-Synonym for --stream.
-""")
-group.add_argument('--stream', action='store_true',
- help="""
-Store the file content and display the resulting manifest on
-stdout. Do not write the manifest to Keep or save a Collection object
-in Arvados.
-""")
-group.add_argument('--as-manifest', action='store_true', dest='manifest',
- help="""
-Synonym for --manifest.
-""")
-group.add_argument('--in-manifest', action='store_true', dest='manifest',
- help="""
-Synonym for --manifest.
-""")
-group.add_argument('--manifest', action='store_true',
- help="""
-Store the file data and resulting manifest in Keep, save a Collection
-object in Arvados, and display the manifest locator (Collection uuid)
-on stdout. This is the default behavior if more than one path argument
-is given, or the path given is a directory, or a --filename argument
-is given.
-""")
-group.add_argument('--as-raw', action='store_true', dest='raw',
- help="""
-Synonym for --raw.
-""")
-group.add_argument('--raw', action='store_true',
- help="""
-Store the file content and display the data block locators on stdout,
-separated by spaces, with a trailing newline. Do not store a
-manifest. This is the default behavior when reading data from a single
-file or standard input.
-""")
-parser.add_argument('--use-filename', type=str, default=None, dest='filename',
- help="""
-Synonym for --filename.
-""")
-parser.add_argument('--filename', type=str, default=None,
- help="""
-Use the given filename in the manifest, instead of the name of the
-local file. This is useful when "-" or "/dev/stdin" is given as an
-input file. It can be used only if there is exactly one path given and
-it is not a directory. Implies --manifest.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--progress', action='store_true',
- help="""
-Display human-readable progress on stderr (bytes and, if possible,
-percentage of total data size). This is the default behavior when
-stderr is a tty.
-""")
-group.add_argument('--no-progress', action='store_true',
- help="""
-Do not display human-readable progress on stderr, even if stderr is a
-tty.
-""")
-group.add_argument('--batch-progress', action='store_true',
- help="""
-Display machine-readable progress on stderr (bytes and, if known,
-total data size).
-""")
-
-args = parser.parse_args()
-
-if len(args.paths) == 0:
- args.paths += ['/dev/stdin']
-
-if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
- if args.filename:
- parser.error("""
---filename argument cannot be used when storing a directory or
-multiple files.
-""")
-elif not args.filename and not args.stream and not args.manifest:
- # When reading from a single non-directory, and no --filename is
- # given, default to writing raw blocks rather than a manifest.
- args.raw = True
-
-# Turn on --progress by default if stderr is a tty.
-if (not (args.batch_progress or args.no_progress)
- and os.isatty(sys.stderr.fileno())):
- args.progress = True
-
-
-import arvados
-import re
-import string
-
-class CollectionWriterWithProgress(arvados.CollectionWriter):
- def flush_data(self, *args, **kwargs):
- if not getattr(self, 'display_type', None):
- return
- if not hasattr(self, 'bytes_flushed'):
- self.bytes_flushed = 0
- self.bytes_flushed += self._data_buffer_len
- super(CollectionWriterWithProgress, self).flush_data(*args, **kwargs)
- self.bytes_flushed -= self._data_buffer_len
- if self.display_type == 'machine':
- sys.stderr.write('%s %d: %d written %d total\n' %
- (sys.argv[0],
- os.getpid(),
- self.bytes_flushed,
- getattr(self, 'bytes_expected', -1)))
- elif getattr(self, 'bytes_expected', 0) > 0:
- pct = 100.0 * self.bytes_flushed / self.bytes_expected
- sys.stderr.write('\r%dM / %dM %.1f%% ' %
- (self.bytes_flushed >> 20,
- self.bytes_expected >> 20, pct))
- else:
- sys.stderr.write('\r%d ' % self.bytes_flushed)
- def manifest_text(self, *args, **kwargs):
- manifest_text = (super(CollectionWriterWithProgress, self)
- .manifest_text(*args, **kwargs))
- if getattr(self, 'display_type', None):
- if self.display_type == 'human':
- sys.stderr.write('\n')
- self.display_type = None
- return manifest_text
-
-if args.progress:
- writer = CollectionWriterWithProgress()
- writer.display_type = 'human'
-elif args.batch_progress:
- writer = CollectionWriterWithProgress()
- writer.display_type = 'machine'
-else:
- writer = arvados.CollectionWriter()
-
-args.paths = [('/dev/stdin' if p=='-' else p) for p in args.paths]
-
-# Walk the given directory trees and stat files, adding up file sizes,
-# so we can display progress as percent
-writer.bytes_expected = 0
-for path in args.paths:
- if os.path.isdir(path):
- for filename in arvados.util.listdir_recursive(path):
- writer.bytes_expected += os.path.getsize(
- os.path.join(path, filename))
- elif not os.path.isfile(path):
- del writer.bytes_expected
- break
- else:
- writer.bytes_expected += os.path.getsize(path)
-
-# Copy file data to Keep.
-for path in args.paths:
- if os.path.isdir(path):
- writer.write_directory_tree(path,
- max_manifest_depth=args.max_manifest_depth)
- else:
- writer.start_new_stream()
- writer.start_new_file(args.filename or os.path.split(path)[1])
- with open(path, 'rb') as f:
- while True:
- buf = f.read(2**26)
- if len(buf) == 0:
- break
- writer.write(buf)
-
-if args.stream:
- print writer.manifest_text(),
-elif args.raw:
- writer.finish_current_stream()
- print string.join(writer.data_locators(), ',') + '\n'
-else:
- # Register the resulting collection in Arvados.
- arvados.api().collections().create(
- body={
- 'uuid': writer.finish(),
- 'manifest_text': writer.manifest_text(),
- },
- ).execute()
-
- # Print the locator (uuid) of the new collection.
- print writer.finish()
--- /dev/null
+../../python/bin/arv-put
\ No newline at end of file
--- /dev/null
+/build/
+/dist/
+/*.egg-info
--- /dev/null
+#!/usr/bin/env python
+
+import argparse
+import hashlib
+import os
+import re
+import string
+import sys
+import logging
+
+logger = logging.getLogger(os.path.basename(sys.argv[0]))
+
+parser = argparse.ArgumentParser(
+ description='Copy data from Keep to a local file or pipe.')
+parser.add_argument('locator', type=str,
+ help="""
+Collection locator, optionally with a file path or prefix.
+""")
+parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
+ help="""
+Local file or directory where the data is to be written. Default:
+/dev/stdout.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--progress', action='store_true',
+ help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when
+stderr is a tty and stdout is not a tty.
+""")
+group.add_argument('--no-progress', action='store_true',
+ help="""
+Do not display human-readable progress on stderr.
+""")
+group.add_argument('--batch-progress', action='store_true',
+ help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--hash',
+ help="""
+Display the hash of each file as it is read from Keep, using the given
+hash algorithm. Supported algorithms include md5, sha1, sha224,
+sha256, sha384, and sha512.
+""")
+group.add_argument('--md5sum', action='store_const',
+ dest='hash', const='md5',
+ help="""
+Display the MD5 hash of each file as it is read from Keep.
+""")
+parser.add_argument('-n', action='store_true',
+ help="""
+Do not write any data -- just read from Keep, and report md5sums if
+requested.
+""")
+parser.add_argument('-r', action='store_true',
+ help="""
+Retrieve all files in the specified collection/prefix. This is the
+default behavior if the "locator" argument ends with a forward slash.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('-f', action='store_true',
+ help="""
+Overwrite existing files while writing. The default behavior is to
+refuse to write *anything* if any of the output files already
+exist. As a special case, -f is not needed to write to /dev/stdout.
+""")
+group.add_argument('--skip-existing', action='store_true',
+ help="""
+Skip files that already exist. The default behavior is to refuse to
+write *anything* if any files exist that would have to be
+overwritten. This option causes even devices, sockets, and fifos to be
+skipped.
+""")
+
+args = parser.parse_args()
+
+if args.locator[-1] == os.sep:
+ args.r = True
+if (args.r and
+ not args.n and
+ not (args.destination and
+ os.path.isdir(args.destination))):
+ parser.error('Destination is not a directory.')
+if not args.r and (os.path.isdir(args.destination) or
+ args.destination[-1] == os.path.sep):
+ args.destination = os.path.join(args.destination,
+ os.path.basename(args.locator))
+ logger.debug("Appended source file name to destination directory: %s" %
+ args.destination)
+
+# Turn on --progress by default if stderr is a tty and stdout isn't.
+if (not (args.batch_progress or args.no_progress)
+ and os.isatty(sys.stderr.fileno())
+ and not os.isatty(sys.stdout.fileno())):
+ args.progress = True
+
+if args.destination == '-':
+ args.destination = '/dev/stdout'
+if args.destination == '/dev/stdout':
+ # Normally you have to use -f to write to a file (or device) that
+ # already exists, but "-" and "/dev/stdout" are common enough to
+ # merit a special exception.
+ args.f = True
+else:
+ args.destination = args.destination.rstrip(os.sep)
+
+
+import arvados
+
+r = re.search(r'^(.*?)(/.*)?$', args.locator)
+collection = r.group(1)
+get_prefix = r.group(2)
+if args.r and not get_prefix:
+ get_prefix = os.sep
+
+todo = []
+todo_bytes = 0
+if not get_prefix:
+ try:
+ if not args.n:
+ if not args.f and os.path.exists(args.destination):
+ logger.error('Local file %s already exists' % args.destination)
+ sys.exit(1)
+ with open(args.destination, 'wb') as f:
+ f.write(arvados.Keep.get(collection))
+ sys.exit(0)
+ except arvados.errors.NotFoundError as e:
+ logger.error(e)
+ sys.exit(1)
+
+reader = arvados.CollectionReader(collection)
+
+# Scan the collection. Make an array of (stream, file, local
+# destination filename) tuples, and add up total size to extract.
+
+try:
+ for s in reader.all_streams():
+ for f in s.all_files():
+ if get_prefix and get_prefix[-1] == os.sep:
+ if 0 != string.find(os.path.join(s.name(), f.name()),
+ '.' + get_prefix):
+ continue
+ dest_path = os.path.join(
+ args.destination,
+ os.path.join(s.name(), f.name())[len(get_prefix)+1:])
+ if (not (args.f or args.skip_existing) and
+ os.path.exists(dest_path)):
+ logger.error('Local file %s already exists' % dest_path)
+ sys.exit(1)
+ else:
+ if os.path.join(s.name(), f.name()) != '.' + get_prefix:
+ continue
+ dest_path = args.destination
+ todo += [(s, f, dest_path)]
+ todo_bytes += f.size()
+except arvados.errors.NotFoundError as e:
+ logger.error(e)
+ sys.exit(1)
+
+# Read data, and (if not -n) write to local file(s) or pipe.
+
+out_bytes = 0
+for s,f,outfilename in todo:
+ outfile = None
+ digestor = None
+ if not args.n:
+ if args.skip_existing and os.path.exists(outfilename):
+ logger.debug('Local file %s exists. Skipping.' % outfilename)
+ continue
+ elif not args.f and (os.path.isfile(outfilename) or
+ os.path.isdir(outfilename)):
+ # Good thing we looked again: apparently this file wasn't
+ # here yet when we checked earlier.
+ logger.error('Local file %s already exists' % outfilename)
+ sys.exit(1)
+ if args.r:
+ arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+ try:
+ outfile = open(outfilename, 'wb')
+ except Exception as e:
+ logger.error('Open(%s) failed: %s' % (outfilename, e))
+ sys.exit(1)
+ if args.hash:
+ digestor = hashlib.new(args.hash)
+ try:
+ for data in f.readall():
+ if outfile:
+ outfile.write(data)
+ if digestor:
+ digestor.update(data)
+ out_bytes += len(data)
+ if args.progress:
+ sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
+ (out_bytes >> 20,
+ todo_bytes >> 20,
+ (100
+ if todo_bytes==0
+ else 100.0*out_bytes/todo_bytes)))
+ elif args.batch_progress:
+ sys.stderr.write('%s %d read %d total\n' %
+ (sys.argv[0], os.getpid(),
+ out_bytes, todo_bytes))
+ if digestor:
+ sys.stderr.write("%s %s/%s\n"
+ % (digestor.hexdigest(), s.name(), f.name()))
+ except KeyboardInterrupt:
+ if outfile:
+ os.unlink(outfilename)
+ break
+
+if args.progress:
+ sys.stderr.write('\n')
--- /dev/null
+#!/usr/bin/env python
+
+# TODO:
+# --md5sum - display md5 of each file as read from disk
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser(
+ description='Copy data from the local filesystem to Keep.')
+parser.add_argument('paths', metavar='path', type=str, nargs='*',
+ help="""
+Local file or directory. Default: read from standard input.
+""")
+parser.add_argument('--max-manifest-depth', type=int, metavar='N', default=-1,
+ help="""
+Maximum depth of directory tree to represent in the manifest
+structure. A directory structure deeper than this will be represented
+as a single stream in the manifest. If N=0, the manifest will contain
+a single stream. Default: -1 (unlimited), i.e., exactly one manifest
+stream per filesystem directory that contains files.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--as-stream', action='store_true', dest='stream',
+ help="""
+Synonym for --stream.
+""")
+group.add_argument('--stream', action='store_true',
+ help="""
+Store the file content and display the resulting manifest on
+stdout. Do not write the manifest to Keep or save a Collection object
+in Arvados.
+""")
+group.add_argument('--as-manifest', action='store_true', dest='manifest',
+ help="""
+Synonym for --manifest.
+""")
+group.add_argument('--in-manifest', action='store_true', dest='manifest',
+ help="""
+Synonym for --manifest.
+""")
+group.add_argument('--manifest', action='store_true',
+ help="""
+Store the file data and resulting manifest in Keep, save a Collection
+object in Arvados, and display the manifest locator (Collection uuid)
+on stdout. This is the default behavior if more than one path argument
+is given, or the path given is a directory, or a --filename argument
+is given.
+""")
+group.add_argument('--as-raw', action='store_true', dest='raw',
+ help="""
+Synonym for --raw.
+""")
+group.add_argument('--raw', action='store_true',
+ help="""
+Store the file content and display the data block locators on stdout,
+separated by spaces, with a trailing newline. Do not store a
+manifest. This is the default behavior when reading data from a single
+file or standard input.
+""")
+parser.add_argument('--use-filename', type=str, default=None, dest='filename',
+ help="""
+Synonym for --filename.
+""")
+parser.add_argument('--filename', type=str, default=None,
+ help="""
+Use the given filename in the manifest, instead of the name of the
+local file. This is useful when "-" or "/dev/stdin" is given as an
+input file. It can be used only if there is exactly one path given and
+it is not a directory. Implies --manifest.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--progress', action='store_true',
+ help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when
+stderr is a tty.
+""")
+group.add_argument('--no-progress', action='store_true',
+ help="""
+Do not display human-readable progress on stderr, even if stderr is a
+tty.
+""")
+group.add_argument('--batch-progress', action='store_true',
+ help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+
+args = parser.parse_args()
+
+if len(args.paths) == 0:
+ args.paths += ['/dev/stdin']
+
+if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
+ if args.filename:
+ parser.error("""
+--filename argument cannot be used when storing a directory or
+multiple files.
+""")
+elif not args.filename and not args.stream and not args.manifest:
+ # When reading from a single non-directory, and no --filename is
+ # given, default to writing raw blocks rather than a manifest.
+ args.raw = True
+
+# Turn on --progress by default if stderr is a tty.
+if (not (args.batch_progress or args.no_progress)
+ and os.isatty(sys.stderr.fileno())):
+ args.progress = True
+
+
+import arvados
+import re
+import string
+
+class CollectionWriterWithProgress(arvados.CollectionWriter):
+ def flush_data(self, *args, **kwargs):
+ if not getattr(self, 'display_type', None):
+ return
+ if not hasattr(self, 'bytes_flushed'):
+ self.bytes_flushed = 0
+ self.bytes_flushed += self._data_buffer_len
+ super(CollectionWriterWithProgress, self).flush_data(*args, **kwargs)
+ self.bytes_flushed -= self._data_buffer_len
+ if self.display_type == 'machine':
+ sys.stderr.write('%s %d: %d written %d total\n' %
+ (sys.argv[0],
+ os.getpid(),
+ self.bytes_flushed,
+ getattr(self, 'bytes_expected', -1)))
+ elif getattr(self, 'bytes_expected', 0) > 0:
+ pct = 100.0 * self.bytes_flushed / self.bytes_expected
+ sys.stderr.write('\r%dM / %dM %.1f%% ' %
+ (self.bytes_flushed >> 20,
+ self.bytes_expected >> 20, pct))
+ else:
+ sys.stderr.write('\r%d ' % self.bytes_flushed)
+ def manifest_text(self, *args, **kwargs):
+ manifest_text = (super(CollectionWriterWithProgress, self)
+ .manifest_text(*args, **kwargs))
+ if getattr(self, 'display_type', None):
+ if self.display_type == 'human':
+ sys.stderr.write('\n')
+ self.display_type = None
+ return manifest_text
+
+if args.progress:
+ writer = CollectionWriterWithProgress()
+ writer.display_type = 'human'
+elif args.batch_progress:
+ writer = CollectionWriterWithProgress()
+ writer.display_type = 'machine'
+else:
+ writer = arvados.CollectionWriter()
+
+args.paths = [('/dev/stdin' if p=='-' else p) for p in args.paths]
+
+# Walk the given directory trees and stat files, adding up file sizes,
+# so we can display progress as percent
+writer.bytes_expected = 0
+for path in args.paths:
+ if os.path.isdir(path):
+ for filename in arvados.util.listdir_recursive(path):
+ writer.bytes_expected += os.path.getsize(
+ os.path.join(path, filename))
+ elif not os.path.isfile(path):
+ del writer.bytes_expected
+ break
+ else:
+ writer.bytes_expected += os.path.getsize(path)
+
+# Copy file data to Keep.
+for path in args.paths:
+ if os.path.isdir(path):
+ writer.write_directory_tree(path,
+ max_manifest_depth=args.max_manifest_depth)
+ else:
+ writer.start_new_stream()
+ writer.start_new_file(args.filename or os.path.split(path)[1])
+ with open(path, 'rb') as f:
+ while True:
+ buf = f.read(2**26)
+ if len(buf) == 0:
+ break
+ writer.write(buf)
+
+if args.stream:
+ print writer.manifest_text(),
+elif args.raw:
+ writer.finish_current_stream()
+ print string.join(writer.data_locators(), ',') + '\n'
+else:
+ # Register the resulting collection in Arvados.
+ arvados.api().collections().create(
+ body={
+ 'uuid': writer.finish(),
+ 'manifest_text': writer.manifest_text(),
+ },
+ ).execute()
+
+ # Print the locator (uuid) of the new collection.
+ print writer.finish()
--- /dev/null
+from setuptools import setup
+import subprocess
+
+minor_version = subprocess.check_output(
+ ['git', 'log', '--format=format:%ct.%h', '-n1', '.'])
+
+setup(name='arvados-python-client',
+ version='0.1.' + minor_version,
+ description='Arvados client library',
+ url='https://arvados.org',
+ author='Arvados',
+ author_email='info@arvados.org',
+ license='Apache 2.0',
+ packages=['arvados'],
+ scripts=[
+ 'bin/arv-get',
+ 'bin/arv-put',
+ ],
+ install_requires=[
+ 'python-gflags',
+ 'google-api-python-client',
+ ],
+ zip_safe=False)