11 logger = logging.getLogger(os.path.basename(sys.argv[0]))
13 parser = argparse.ArgumentParser(
14 description='Copy data from Keep to a local file or pipe.')
15 parser.add_argument('locator', type=str,
17 Collection locator, optionally with a file path or prefix.
19 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
21 Local file or directory where the data is to be written. Default:
24 group = parser.add_mutually_exclusive_group()
25 group.add_argument('--progress', action='store_true',
27 Display human-readable progress on stderr (bytes and, if possible,
28 percentage of total data size). This is the default behavior when
29 stderr is a tty and stdout is not a tty.
31 group.add_argument('--no-progress', action='store_true',
33 Do not display human-readable progress on stderr.
35 group.add_argument('--batch-progress', action='store_true',
37 Display machine-readable progress on stderr (bytes and, if known,
40 group = parser.add_mutually_exclusive_group()
41 group.add_argument('--hash',
43 Display the hash of each file as it is read from Keep, using the given
44 hash algorithm. Supported algorithms include md5, sha1, sha224,
45 sha256, sha384, and sha512.
47 group.add_argument('--md5sum', action='store_const',
48 dest='hash', const='md5',
50 Display the MD5 hash of each file as it is read from Keep.
52 parser.add_argument('-n', action='store_true',
54 Do not write any data -- just read from Keep, and report md5sums if
57 parser.add_argument('-r', action='store_true',
59 Retrieve all files in the specified collection/prefix. This is the
60 default behavior if the "locator" argument ends with a forward slash.
63 args = parser.parse_args()
65 if args.locator[-1] == os.sep:
69 not (args.destination and
70 os.path.isdir(args.destination))):
71 parser.error('Destination is not a directory.')
72 if not args.r and (os.path.isdir(args.destination) or
73 args.destination[-1] == os.path.sep):
74 args.destination = os.path.join(args.destination,
75 os.path.basename(args.locator))
76 logger.debug("Appended source file name to destination directory: %s" %
79 # Turn on --progress by default if stderr is a tty and stdout isn't.
80 if (not (args.batch_progress or args.no_progress)
81 and os.isatty(sys.stderr.fileno())
82 and not os.isatty(sys.stdout.fileno())):
85 if args.destination == '-':
86 args.destination = '/dev/stdout'
87 args.destination = args.destination.rstrip(os.sep)
92 r = re.search(r'^(.*?)(/.*)?$', args.locator)
93 collection = r.group(1)
94 get_prefix = r.group(2)
95 if args.r and not get_prefix:
103 with open(args.destination, 'wb') as f:
104 f.write(arvados.Keep.get(collection))
106 except arvados.errors.NotFoundError as e:
110 reader = arvados.CollectionReader(collection)
112 # Scan the collection. Make an array of (stream, file, local
113 # destination filename) tuples, and add up total size to extract.
116 for s in reader.all_streams():
117 for f in s.all_files():
118 if get_prefix and get_prefix[-1] == os.sep:
119 if 0 != string.find(os.path.join(s.name(), f.name()),
122 dest_path = os.path.join(
124 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
126 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
128 dest_path = args.destination
129 todo += [(s, f, dest_path)]
130 todo_bytes += f.size()
131 except arvados.errors.NotFoundError as e:
135 # Read data, and (if not -n) write to local file(s) or pipe.
138 for s,f,outfilename in todo:
143 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
145 outfile = open(outfilename, 'wb')
146 except Exception as e:
147 logger.error('Open(%s) failed: %s' % (outfilename, e))
149 digestor = hashlib.new(args.hash)
151 for data in f.readall():
155 digestor.update(data)
156 out_bytes += len(data)
158 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
163 else 100.0*out_bytes/todo_bytes)))
164 elif args.batch_progress:
165 sys.stderr.write('%s %d read %d total\n' %
166 (sys.argv[0], os.getpid(),
167 out_bytes, todo_bytes))
169 sys.stderr.write("%s %s/%s\n"
170 % (digestor.hexdigest(), s.name(), f.name()))
171 except KeyboardInterrupt:
173 os.unlink(outfilename)
177 sys.stderr.write('\n')