12 import arvados.commands._util as arv_cmd
14 logger = logging.getLogger('arvados.arv-get')
16 def abort(msg, code=1):
17 print >>sys.stderr, "arv-get:", msg
20 parser = argparse.ArgumentParser(
21 description='Copy data from Keep to a local file or pipe.',
22 parents=[arv_cmd.retry_opt])
23 parser.add_argument('locator', type=str,
25 Collection locator, optionally with a file path or prefix.
27 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
29 Local file or directory where the data is to be written. Default:
32 group = parser.add_mutually_exclusive_group()
33 group.add_argument('--progress', action='store_true',
35 Display human-readable progress on stderr (bytes and, if possible,
36 percentage of total data size). This is the default behavior when it
37 is not expected to interfere with the output: specifically, stderr is
38 a tty _and_ either stdout is not a tty, or output is being written to
39 named files rather than stdout.
41 group.add_argument('--no-progress', action='store_true',
43 Do not display human-readable progress on stderr.
45 group.add_argument('--batch-progress', action='store_true',
47 Display machine-readable progress on stderr (bytes and, if known,
50 group = parser.add_mutually_exclusive_group()
51 group.add_argument('--hash',
53 Display the hash of each file as it is read from Keep, using the given
54 hash algorithm. Supported algorithms include md5, sha1, sha224,
55 sha256, sha384, and sha512.
57 group.add_argument('--md5sum', action='store_const',
58 dest='hash', const='md5',
60 Display the MD5 hash of each file as it is read from Keep.
62 parser.add_argument('-n', action='store_true',
64 Do not write any data -- just read from Keep, and report md5sums if
67 parser.add_argument('-r', action='store_true',
69 Retrieve all files in the specified collection/prefix. This is the
70 default behavior if the "locator" argument ends with a forward slash.
72 group = parser.add_mutually_exclusive_group()
73 group.add_argument('-f', action='store_true',
75 Overwrite existing files while writing. The default behavior is to
76 refuse to write *anything* if any of the output files already
77 exist. As a special case, -f is not needed to write to /dev/stdout.
79 group.add_argument('--skip-existing', action='store_true',
81 Skip files that already exist. The default behavior is to refuse to
82 write *anything* if any files exist that would have to be
83 overwritten. This option causes even devices, sockets, and fifos to be
87 args = parser.parse_args()
89 if args.locator[-1] == os.sep:
93 not (args.destination and
94 os.path.isdir(args.destination))):
95 parser.error('Destination is not a directory.')
96 if not args.r and (os.path.isdir(args.destination) or
97 args.destination[-1] == os.path.sep):
98 args.destination = os.path.join(args.destination,
99 os.path.basename(args.locator))
100 logger.debug("Appended source file name to destination directory: %s",
103 if args.destination == '-':
104 args.destination = '/dev/stdout'
105 if args.destination == '/dev/stdout':
106 # Normally you have to use -f to write to a file (or device) that
107 # already exists, but "-" and "/dev/stdout" are common enough to
108 # merit a special exception.
111 args.destination = args.destination.rstrip(os.sep)
113 # Turn on --progress by default if stderr is a tty and output is
114 # either going to a named file, or going (via stdout) to something
116 if (not (args.batch_progress or args.no_progress)
117 and sys.stderr.isatty()
118 and (args.destination != '/dev/stdout'
119 or not sys.stdout.isatty())):
123 r = re.search(r'^(.*?)(/.*)?$', args.locator)
124 collection = r.group(1)
125 get_prefix = r.group(2)
126 if args.r and not get_prefix:
128 api_client = arvados.api('v1')
129 reader = arvados.CollectionReader(collection, num_retries=args.retries)
133 open_flags = os.O_CREAT | os.O_WRONLY
135 open_flags |= os.O_EXCL
137 out_fd = os.open(args.destination, open_flags)
138 with os.fdopen(out_fd, 'wb') as out_file:
139 out_file.write(reader.manifest_text())
140 except (IOError, OSError) as error:
141 abort("can't write to '{}': {}".format(args.destination, error))
142 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
143 abort("failed to download '{}': {}".format(collection, error))
148 # Scan the collection. Make an array of (stream, file, local
149 # destination filename) tuples, and add up total size to extract.
153 for s in reader.all_streams():
154 for f in s.all_files():
155 if get_prefix and get_prefix[-1] == os.sep:
156 if 0 != string.find(os.path.join(s.name(), f.name()),
159 dest_path = os.path.join(
161 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
162 if (not (args.n or args.f or args.skip_existing) and
163 os.path.exists(dest_path)):
164 abort('Local file %s already exists.' % (dest_path,))
166 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
168 dest_path = args.destination
169 todo += [(s, f, dest_path)]
170 todo_bytes += f.size()
171 except arvados.errors.NotFoundError as e:
174 # Read data, and (if not -n) write to local file(s) or pipe.
177 for s,f,outfilename in todo:
181 if args.skip_existing and os.path.exists(outfilename):
182 logger.debug('Local file %s exists. Skipping.', outfilename)
184 elif not args.f and (os.path.isfile(outfilename) or
185 os.path.isdir(outfilename)):
186 # Good thing we looked again: apparently this file wasn't
187 # here yet when we checked earlier.
188 abort('Local file %s already exists.' % (outfilename,))
190 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
192 outfile = open(outfilename, 'wb')
193 except Exception as error:
194 abort('Open(%s) failed: %s' % (outfilename, error))
196 digestor = hashlib.new(args.hash)
198 for data in f.readall():
202 digestor.update(data)
203 out_bytes += len(data)
205 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
210 else 100.0*out_bytes/todo_bytes)))
211 elif args.batch_progress:
212 sys.stderr.write('%s %d read %d total\n' %
213 (sys.argv[0], os.getpid(),
214 out_bytes, todo_bytes))
216 sys.stderr.write("%s %s/%s\n"
217 % (digestor.hexdigest(), s.name(), f.name()))
218 except KeyboardInterrupt:
219 if outfile and outfilename != '/dev/stdout':
220 os.unlink(outfilename)
224 sys.stderr.write('\n')