10 parser = argparse.ArgumentParser(
11 description='Copy data from Keep to a local file or pipe.')
12 parser.add_argument('locator', type=str,
14 Collection locator, optionally with a file path or prefix.
16 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
18 Local file or directory where the data is to be written. Default:
21 group = parser.add_mutually_exclusive_group()
22 group.add_argument('--progress', action='store_true',
24 Display human-readable progress on stderr (bytes and, if possible,
25 percentage of total data size). This is the default behavior when
26 stderr is a tty and stdout is not a tty.
28 group.add_argument('--no-progress', action='store_true',
30 Do not display human-readable progress on stderr.
32 group.add_argument('--batch-progress', action='store_true',
34 Display machine-readable progress on stderr (bytes and, if known,
37 group = parser.add_mutually_exclusive_group()
38 group.add_argument('--hash',
40 Display the hash of each file as it is read from Keep, using the given
41 hash algorithm. Supported algorithms include md5, sha1, sha224,
42 sha256, sha384, and sha512.
44 group.add_argument('--md5sum', action='store_const',
45 dest='hash', const='md5',
47 Display the MD5 hash of each file as it is read from Keep.
49 parser.add_argument('-n', action='store_true',
51 Do not write any data -- just read from Keep, and report md5sums if
54 parser.add_argument('-r', action='store_true',
56 Retrieve all files in the specified collection/prefix. This is the
57 default behavior if the "locator" argument ends with a forward slash.
60 args = parser.parse_args()
62 if args.locator[-1] == os.sep:
66 not (args.destination and
67 os.path.isdir(args.destination))):
68 parser.error('Destination is not a directory.')
69 if not args.r and (os.path.isdir(args.destination) or
70 args.destination[-1] == os.path.sep):
71 parser.error('Destination is a directory.')
73 # Turn on --progress by default if stderr is a tty and stdout isn't.
74 if (not (args.batch_progress or args.no_progress)
75 and os.isatty(sys.stderr.fileno())
76 and not os.isatty(sys.stdout.fileno())):
79 if args.destination == '-':
80 args.destination = '/dev/stdout'
81 args.destination = args.destination.rstrip(os.sep)
86 r = re.search(r'^(.*?)(/.*)?$', args.locator)
87 collection = r.group(1)
88 get_prefix = r.group(2)
89 if args.r and not get_prefix:
96 with open(args.destination, 'wb') as f:
97 f.write(arvados.Keep.get(collection))
100 reader = arvados.CollectionReader(collection)
102 # Scan the collection. Make an array of (stream, file, local
103 # destination filename) tuples, and add up total size to extract.
105 for s in reader.all_streams():
106 for f in s.all_files():
107 if get_prefix and get_prefix[-1] == os.sep:
108 if 0 != string.find(os.path.join(s.name(), f.name()),
111 dest_path = os.path.join(
113 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
115 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
117 dest_path = args.destination
118 todo += [(s, f, dest_path)]
119 todo_bytes += f.size()
121 # Read data, and (if not -n) write to local file(s) or pipe.
124 for s,f,outfilename in todo:
129 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
130 outfile = open(outfilename, 'wb')
132 digestor = hashlib.new(args.hash)
134 for data in f.readall():
138 digestor.update(data)
139 out_bytes += len(data)
141 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
146 else 100.0*out_bytes/todo_bytes)))
147 elif args.batch_progress:
148 sys.stderr.write('%s %d read %d total\n' %
149 (sys.argv[0], os.getpid(),
150 out_bytes, todo_bytes))
152 sys.stderr.write("%s %s/%s\n"
153 % (digestor.hexdigest(), s.name(), f.name()))
154 except KeyboardInterrupt:
156 os.unlink(outfilename)
160 sys.stderr.write('\n')