12 import arvados.commands._util as arv_cmd
14 logger = logging.getLogger('arvados.arv-get')
16 def abort(msg, code=1):
17 print >>sys.stderr, "arv-get:", msg
20 parser = argparse.ArgumentParser(
21 description='Copy data from Keep to a local file or pipe.',
22 parents=[arv_cmd.retry_opt])
23 parser.add_argument('locator', type=str,
25 Collection locator, optionally with a file path or prefix.
27 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
29 Local file or directory where the data is to be written. Default:
32 group = parser.add_mutually_exclusive_group()
33 group.add_argument('--progress', action='store_true',
35 Display human-readable progress on stderr (bytes and, if possible,
36 percentage of total data size). This is the default behavior when it
37 is not expected to interfere with the output: specifically, stderr is
38 a tty _and_ either stdout is not a tty, or output is being written to
39 named files rather than stdout.
41 group.add_argument('--no-progress', action='store_true',
43 Do not display human-readable progress on stderr.
45 group.add_argument('--batch-progress', action='store_true',
47 Display machine-readable progress on stderr (bytes and, if known,
50 group = parser.add_mutually_exclusive_group()
51 group.add_argument('--hash',
53 Display the hash of each file as it is read from Keep, using the given
54 hash algorithm. Supported algorithms include md5, sha1, sha224,
55 sha256, sha384, and sha512.
57 group.add_argument('--md5sum', action='store_const',
58 dest='hash', const='md5',
60 Display the MD5 hash of each file as it is read from Keep.
62 parser.add_argument('-n', action='store_true',
64 Do not write any data -- just read from Keep, and report md5sums if
67 parser.add_argument('-r', action='store_true',
69 Retrieve all files in the specified collection/prefix. This is the
70 default behavior if the "locator" argument ends with a forward slash.
72 group = parser.add_mutually_exclusive_group()
73 group.add_argument('-f', action='store_true',
75 Overwrite existing files while writing. The default behavior is to
76 refuse to write *anything* if any of the output files already
77 exist. As a special case, -f is not needed to write to /dev/stdout.
79 group.add_argument('--skip-existing', action='store_true',
81 Skip files that already exist. The default behavior is to refuse to
82 write *anything* if any files exist that would have to be
83 overwritten. This option causes even devices, sockets, and fifos to be
87 args = parser.parse_args()
89 if args.locator[-1] == os.sep:
93 not (args.destination and
94 os.path.isdir(args.destination))):
95 parser.error('Destination is not a directory.')
96 if not args.r and (os.path.isdir(args.destination) or
97 args.destination[-1] == os.path.sep):
98 args.destination = os.path.join(args.destination,
99 os.path.basename(args.locator))
100 logger.debug("Appended source file name to destination directory: %s",
103 if args.destination == '-':
104 args.destination = '/dev/stdout'
105 if args.destination == '/dev/stdout':
106 # Normally you have to use -f to write to a file (or device) that
107 # already exists, but "-" and "/dev/stdout" are common enough to
108 # merit a special exception.
111 args.destination = args.destination.rstrip(os.sep)
113 # Turn on --progress by default if stderr is a tty and output is
114 # either going to a named file, or going (via stdout) to something
116 if (not (args.batch_progress or args.no_progress)
117 and sys.stderr.isatty()
118 and (args.destination != '/dev/stdout'
119 or not sys.stdout.isatty())):
123 r = re.search(r'^(.*?)(/.*)?$', args.locator)
124 collection = r.group(1)
125 get_prefix = r.group(2)
126 if args.r and not get_prefix:
128 api_client = arvados.api('v1')
129 reader = arvados.CollectionReader(collection, num_retries=args.retries)
133 open_flags = os.O_CREAT | os.O_WRONLY
135 open_flags |= os.O_EXCL
137 out_fd = os.open(args.destination, open_flags)
138 with os.fdopen(out_fd, 'wb') as out_file:
139 out_file.write(reader.manifest_text())
140 except (IOError, OSError) as error:
141 abort("can't write to '{}': {}".format(args.destination, error))
142 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
143 abort("failed to download '{}': {}".format(collection, error))
146 # Scan the collection. Make an array of (stream, file, local
147 # destination filename) tuples, and add up total size to extract.
151 for s in reader.all_streams():
152 for f in s.all_files():
153 if get_prefix and get_prefix[-1] == os.sep:
154 if 0 != string.find(os.path.join(s.name(), f.name()),
157 dest_path = os.path.join(
159 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
160 if (not (args.n or args.f or args.skip_existing) and
161 os.path.exists(dest_path)):
162 abort('Local file %s already exists.' % (dest_path,))
164 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
166 dest_path = args.destination
167 todo += [(s, f, dest_path)]
168 todo_bytes += f.size()
169 except arvados.errors.NotFoundError as e:
172 # Read data, and (if not -n) write to local file(s) or pipe.
175 for s,f,outfilename in todo:
179 if args.skip_existing and os.path.exists(outfilename):
180 logger.debug('Local file %s exists. Skipping.', outfilename)
182 elif not args.f and (os.path.isfile(outfilename) or
183 os.path.isdir(outfilename)):
184 # Good thing we looked again: apparently this file wasn't
185 # here yet when we checked earlier.
186 abort('Local file %s already exists.' % (outfilename,))
188 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
190 outfile = open(outfilename, 'wb')
191 except Exception as error:
192 abort('Open(%s) failed: %s' % (outfilename, error))
194 digestor = hashlib.new(args.hash)
196 for data in f.readall():
200 digestor.update(data)
201 out_bytes += len(data)
203 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
208 else 100.0*out_bytes/todo_bytes)))
209 elif args.batch_progress:
210 sys.stderr.write('%s %d read %d total\n' %
211 (sys.argv[0], os.getpid(),
212 out_bytes, todo_bytes))
214 sys.stderr.write("%s %s/%s\n"
215 % (digestor.hexdigest(), s.name(), f.name()))
216 except KeyboardInterrupt:
217 if outfile and outfile != '/dev/stdout':
218 os.unlink(outfilename)
222 sys.stderr.write('\n')