12 import arvados.commands._util as arv_cmd
14 logger = logging.getLogger('arvados.arv-get')
16 def abort(msg, code=1):
17 print >>sys.stderr, "arv-get:", msg
20 parser = argparse.ArgumentParser(
21 description='Copy data from Keep to a local file or pipe.',
22 parents=[arv_cmd.retry_opt])
23 parser.add_argument('locator', type=str,
25 Collection locator, optionally with a file path or prefix.
27 parser.add_argument('destination', type=str, nargs='?', default='-',
29 Local file or directory where the data is to be written. Default: stdout.
31 group = parser.add_mutually_exclusive_group()
32 group.add_argument('--progress', action='store_true',
34 Display human-readable progress on stderr (bytes and, if possible,
35 percentage of total data size). This is the default behavior when it
36 is not expected to interfere with the output: specifically, stderr is
37 a tty _and_ either stdout is not a tty, or output is being written to
38 named files rather than stdout.
40 group.add_argument('--no-progress', action='store_true',
42 Do not display human-readable progress on stderr.
44 group.add_argument('--batch-progress', action='store_true',
46 Display machine-readable progress on stderr (bytes and, if known,
49 group = parser.add_mutually_exclusive_group()
50 group.add_argument('--hash',
52 Display the hash of each file as it is read from Keep, using the given
53 hash algorithm. Supported algorithms include md5, sha1, sha224,
54 sha256, sha384, and sha512.
56 group.add_argument('--md5sum', action='store_const',
57 dest='hash', const='md5',
59 Display the MD5 hash of each file as it is read from Keep.
61 parser.add_argument('-n', action='store_true',
63 Do not write any data -- just read from Keep, and report md5sums if
66 parser.add_argument('-r', action='store_true',
68 Retrieve all files in the specified collection/prefix. This is the
69 default behavior if the "locator" argument ends with a forward slash.
71 group = parser.add_mutually_exclusive_group()
72 group.add_argument('-f', action='store_true',
74 Overwrite existing files while writing. The default behavior is to
75 refuse to write *anything* if any of the output files already
76 exist. As a special case, -f is not needed to write to stdout.
78 group.add_argument('--skip-existing', action='store_true',
80 Skip files that already exist. The default behavior is to refuse to
81 write *anything* if any files exist that would have to be
82 overwritten. This option causes even devices, sockets, and fifos to be
86 args = parser.parse_args()
88 if args.locator[-1] == os.sep:
92 not (args.destination and
93 os.path.isdir(args.destination))):
94 parser.error('Destination is not a directory.')
95 if not args.r and (os.path.isdir(args.destination) or
96 args.destination[-1] == os.path.sep):
97 args.destination = os.path.join(args.destination,
98 os.path.basename(args.locator))
99 logger.debug("Appended source file name to destination directory: %s",
102 if args.destination == '/dev/stdout':
103 args.destination = "-"
105 if args.destination == '-':
106 # Normally you have to use -f to write to a file (or device) that
107 # already exists, but "-" and "/dev/stdout" are common enough to
108 # merit a special exception.
111 args.destination = args.destination.rstrip(os.sep)
113 # Turn on --progress by default if stderr is a tty and output is
114 # either going to a named file, or going (via stdout) to something
116 if (not (args.batch_progress or args.no_progress)
117 and sys.stderr.isatty()
118 and (args.destination != '-'
119 or not sys.stdout.isatty())):
123 r = re.search(r'^(.*?)(/.*)?$', args.locator)
124 collection = r.group(1)
125 get_prefix = r.group(2)
126 if args.r and not get_prefix:
128 api_client = arvados.api('v1')
129 reader = arvados.CollectionReader(collection, num_retries=args.retries)
133 open_flags = os.O_CREAT | os.O_WRONLY
135 open_flags |= os.O_EXCL
137 if args.destination == "-":
138 sys.stdout.write(reader.manifest_text())
140 out_fd = os.open(args.destination, open_flags)
141 with os.fdopen(out_fd, 'wb') as out_file:
142 out_file.write(reader.manifest_text())
143 except (IOError, OSError) as error:
144 abort("can't write to '{}': {}".format(args.destination, error))
145 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
146 abort("failed to download '{}': {}".format(collection, error))
151 # Scan the collection. Make an array of (stream, file, local
152 # destination filename) tuples, and add up total size to extract.
156 for s in reader.all_streams():
157 for f in s.all_files():
158 if get_prefix and get_prefix[-1] == os.sep:
159 if 0 != string.find(os.path.join(s.name(), f.name()),
162 if args.destination == "-":
165 dest_path = os.path.join(
167 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
168 if (not (args.n or args.f or args.skip_existing) and
169 os.path.exists(dest_path)):
170 abort('Local file %s already exists.' % (dest_path,))
172 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
174 dest_path = args.destination
175 todo += [(s, f, dest_path)]
176 todo_bytes += f.size()
177 except arvados.errors.NotFoundError as e:
180 # Read data, and (if not -n) write to local file(s) or pipe.
183 for s,f,outfilename in todo:
187 if outfilename == "-":
190 if args.skip_existing and os.path.exists(outfilename):
191 logger.debug('Local file %s exists. Skipping.', outfilename)
193 elif not args.f and (os.path.isfile(outfilename) or
194 os.path.isdir(outfilename)):
195 # Good thing we looked again: apparently this file wasn't
196 # here yet when we checked earlier.
197 abort('Local file %s already exists.' % (outfilename,))
199 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
201 outfile = open(outfilename, 'wb')
202 except Exception as error:
203 abort('Open(%s) failed: %s' % (outfilename, error))
205 digestor = hashlib.new(args.hash)
207 for data in f.readall():
211 digestor.update(data)
212 out_bytes += len(data)
214 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
219 else 100.0*out_bytes/todo_bytes)))
220 elif args.batch_progress:
221 sys.stderr.write('%s %d read %d total\n' %
222 (sys.argv[0], os.getpid(),
223 out_bytes, todo_bytes))
225 sys.stderr.write("%s %s/%s\n"
226 % (digestor.hexdigest(), s.name(), f.name()))
227 except KeyboardInterrupt:
228 if outfile and (outfile.fileno() > 2) and not outfile.closed:
229 os.unlink(outfile.name)
233 sys.stderr.write('\n')