12 import arvados.commands._util as arv_cmd
14 from arvados._version import __version__
16 logger = logging.getLogger('arvados.arv-get')
18 def abort(msg, code=1):
19 print >>sys.stderr, "arv-get:", msg
22 parser = argparse.ArgumentParser(
23 description='Copy data from Keep to a local file or pipe.',
24 parents=[arv_cmd.retry_opt])
25 parser.add_argument('--version', action='version',
26 version="%s %s" % (sys.argv[0], __version__),
27 help='Print version and exit.')
28 parser.add_argument('locator', type=str,
30 Collection locator, optionally with a file path or prefix.
32 parser.add_argument('destination', type=str, nargs='?', default='-',
34 Local file or directory where the data is to be written. Default: stdout.
36 group = parser.add_mutually_exclusive_group()
37 group.add_argument('--progress', action='store_true',
39 Display human-readable progress on stderr (bytes and, if possible,
40 percentage of total data size). This is the default behavior when it
41 is not expected to interfere with the output: specifically, stderr is
42 a tty _and_ either stdout is not a tty, or output is being written to
43 named files rather than stdout.
45 group.add_argument('--no-progress', action='store_true',
47 Do not display human-readable progress on stderr.
49 group.add_argument('--batch-progress', action='store_true',
51 Display machine-readable progress on stderr (bytes and, if known,
54 group = parser.add_mutually_exclusive_group()
55 group.add_argument('--hash',
57 Display the hash of each file as it is read from Keep, using the given
58 hash algorithm. Supported algorithms include md5, sha1, sha224,
59 sha256, sha384, and sha512.
61 group.add_argument('--md5sum', action='store_const',
62 dest='hash', const='md5',
64 Display the MD5 hash of each file as it is read from Keep.
66 parser.add_argument('-n', action='store_true',
68 Do not write any data -- just read from Keep, and report md5sums if
71 parser.add_argument('-r', action='store_true',
73 Retrieve all files in the specified collection/prefix. This is the
74 default behavior if the "locator" argument ends with a forward slash.
76 group = parser.add_mutually_exclusive_group()
77 group.add_argument('-f', action='store_true',
79 Overwrite existing files while writing. The default behavior is to
80 refuse to write *anything* if any of the output files already
81 exist. As a special case, -f is not needed to write to stdout.
83 group.add_argument('--skip-existing', action='store_true',
85 Skip files that already exist. The default behavior is to refuse to
86 write *anything* if any files exist that would have to be
87 overwritten. This option causes even devices, sockets, and fifos to be
91 args = parser.parse_args()
93 if args.locator[-1] == os.sep:
97 not (args.destination and
98 os.path.isdir(args.destination))):
99 parser.error('Destination is not a directory.')
100 if not args.r and (os.path.isdir(args.destination) or
101 args.destination[-1] == os.path.sep):
102 args.destination = os.path.join(args.destination,
103 os.path.basename(args.locator))
104 logger.debug("Appended source file name to destination directory: %s",
107 if args.destination == '/dev/stdout':
108 args.destination = "-"
110 if args.destination == '-':
111 # Normally you have to use -f to write to a file (or device) that
112 # already exists, but "-" and "/dev/stdout" are common enough to
113 # merit a special exception.
116 args.destination = args.destination.rstrip(os.sep)
118 # Turn on --progress by default if stderr is a tty and output is
119 # either going to a named file, or going (via stdout) to something
121 if (not (args.batch_progress or args.no_progress)
122 and sys.stderr.isatty()
123 and (args.destination != '-'
124 or not sys.stdout.isatty())):
128 r = re.search(r'^(.*?)(/.*)?$', args.locator)
129 collection = r.group(1)
130 get_prefix = r.group(2)
131 if args.r and not get_prefix:
133 api_client = arvados.api('v1')
134 reader = arvados.CollectionReader(collection, num_retries=args.retries)
138 open_flags = os.O_CREAT | os.O_WRONLY
140 open_flags |= os.O_EXCL
142 if args.destination == "-":
143 sys.stdout.write(reader.manifest_text())
145 out_fd = os.open(args.destination, open_flags)
146 with os.fdopen(out_fd, 'wb') as out_file:
147 out_file.write(reader.manifest_text())
148 except (IOError, OSError) as error:
149 abort("can't write to '{}': {}".format(args.destination, error))
150 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
151 abort("failed to download '{}': {}".format(collection, error))
156 # Scan the collection. Make an array of (stream, file, local
157 # destination filename) tuples, and add up total size to extract.
161 for s in reader.all_streams():
162 for f in s.all_files():
163 if get_prefix and get_prefix[-1] == os.sep:
164 if 0 != string.find(os.path.join(s.name(), f.name()),
167 if args.destination == "-":
170 dest_path = os.path.join(
172 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
173 if (not (args.n or args.f or args.skip_existing) and
174 os.path.exists(dest_path)):
175 abort('Local file %s already exists.' % (dest_path,))
177 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
179 dest_path = args.destination
180 todo += [(s, f, dest_path)]
181 todo_bytes += f.size()
182 except arvados.errors.NotFoundError as e:
185 # Read data, and (if not -n) write to local file(s) or pipe.
188 for s,f,outfilename in todo:
192 if outfilename == "-":
195 if args.skip_existing and os.path.exists(outfilename):
196 logger.debug('Local file %s exists. Skipping.', outfilename)
198 elif not args.f and (os.path.isfile(outfilename) or
199 os.path.isdir(outfilename)):
200 # Good thing we looked again: apparently this file wasn't
201 # here yet when we checked earlier.
202 abort('Local file %s already exists.' % (outfilename,))
204 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
206 outfile = open(outfilename, 'wb')
207 except Exception as error:
208 abort('Open(%s) failed: %s' % (outfilename, error))
210 digestor = hashlib.new(args.hash)
212 for data in f.readall():
216 digestor.update(data)
217 out_bytes += len(data)
219 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
224 else 100.0*out_bytes/todo_bytes)))
225 elif args.batch_progress:
226 sys.stderr.write('%s %d read %d total\n' %
227 (sys.argv[0], os.getpid(),
228 out_bytes, todo_bytes))
230 sys.stderr.write("%s %s/%s\n"
231 % (digestor.hexdigest(), s.name(), f.name()))
232 except KeyboardInterrupt:
233 if outfile and (outfile.fileno() > 2) and not outfile.closed:
234 os.unlink(outfile.name)
238 sys.stderr.write('\n')