13 logger = logging.getLogger('arvados.arv-get')
15 def abort(msg, code=1):
16 print >>sys.stderr, "arv-get:", msg
19 parser = argparse.ArgumentParser(
20 description='Copy data from Keep to a local file or pipe.')
21 parser.add_argument('locator', type=str,
23 Collection locator, optionally with a file path or prefix.
25 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
27 Local file or directory where the data is to be written. Default:
30 group = parser.add_mutually_exclusive_group()
31 group.add_argument('--progress', action='store_true',
33 Display human-readable progress on stderr (bytes and, if possible,
34 percentage of total data size). This is the default behavior when
35 stderr is a tty and stdout is not a tty.
37 group.add_argument('--no-progress', action='store_true',
39 Do not display human-readable progress on stderr.
41 group.add_argument('--batch-progress', action='store_true',
43 Display machine-readable progress on stderr (bytes and, if known,
46 group = parser.add_mutually_exclusive_group()
47 group.add_argument('--hash',
49 Display the hash of each file as it is read from Keep, using the given
50 hash algorithm. Supported algorithms include md5, sha1, sha224,
51 sha256, sha384, and sha512.
53 group.add_argument('--md5sum', action='store_const',
54 dest='hash', const='md5',
56 Display the MD5 hash of each file as it is read from Keep.
58 parser.add_argument('-n', action='store_true',
60 Do not write any data -- just read from Keep, and report md5sums if
63 parser.add_argument('-r', action='store_true',
65 Retrieve all files in the specified collection/prefix. This is the
66 default behavior if the "locator" argument ends with a forward slash.
68 group = parser.add_mutually_exclusive_group()
69 group.add_argument('-f', action='store_true',
71 Overwrite existing files while writing. The default behavior is to
72 refuse to write *anything* if any of the output files already
73 exist. As a special case, -f is not needed to write to /dev/stdout.
75 group.add_argument('--skip-existing', action='store_true',
77 Skip files that already exist. The default behavior is to refuse to
78 write *anything* if any files exist that would have to be
79 overwritten. This option causes even devices, sockets, and fifos to be
83 args = parser.parse_args()
85 if args.locator[-1] == os.sep:
89 not (args.destination and
90 os.path.isdir(args.destination))):
91 parser.error('Destination is not a directory.')
92 if not args.r and (os.path.isdir(args.destination) or
93 args.destination[-1] == os.path.sep):
94 args.destination = os.path.join(args.destination,
95 os.path.basename(args.locator))
96 logger.debug("Appended source file name to destination directory: %s",
99 # Turn on --progress by default if stderr is a tty and stdout isn't.
100 if (not (args.batch_progress or args.no_progress)
101 and os.isatty(sys.stderr.fileno())
102 and not os.isatty(sys.stdout.fileno())):
105 if args.destination == '-':
106 args.destination = '/dev/stdout'
107 if args.destination == '/dev/stdout':
108 # Normally you have to use -f to write to a file (or device) that
109 # already exists, but "-" and "/dev/stdout" are common enough to
110 # merit a special exception.
113 args.destination = args.destination.rstrip(os.sep)
116 r = re.search(r'^(.*?)(/.*)?$', args.locator)
117 collection = r.group(1)
118 get_prefix = r.group(2)
119 if args.r and not get_prefix:
127 if not args.f and os.path.exists(args.destination):
128 abort('Local file %s already exists.' % (args.destination,))
129 with open(args.destination, 'wb') as f:
131 c = arvados.api('v1').collections().get(
132 uuid=collection).execute()
133 manifest = c['manifest_text']
134 except Exception as e:
136 "Collection %s not found. " +
137 "Trying to fetch directly from Keep (deprecated).",
139 manifest = arvados.Keep.get(collection)
142 except arvados.errors.NotFoundError as e:
145 reader = arvados.CollectionReader(collection)
147 # Scan the collection. Make an array of (stream, file, local
148 # destination filename) tuples, and add up total size to extract.
151 for s in reader.all_streams():
152 for f in s.all_files():
153 if get_prefix and get_prefix[-1] == os.sep:
154 if 0 != string.find(os.path.join(s.name(), f.name()),
157 dest_path = os.path.join(
159 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
160 if (not (args.n or args.f or args.skip_existing) and
161 os.path.exists(dest_path)):
162 abort('Local file %s already exists.' % (dest_path,))
164 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
166 dest_path = args.destination
167 todo += [(s, f, dest_path)]
168 todo_bytes += f.size()
169 except arvados.errors.NotFoundError as e:
172 # Read data, and (if not -n) write to local file(s) or pipe.
175 for s,f,outfilename in todo:
179 if args.skip_existing and os.path.exists(outfilename):
180 logger.debug('Local file %s exists. Skipping.', outfilename)
182 elif not args.f and (os.path.isfile(outfilename) or
183 os.path.isdir(outfilename)):
184 # Good thing we looked again: apparently this file wasn't
185 # here yet when we checked earlier.
186 abort('Local file %s already exists.' % (outfilename,))
188 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
190 outfile = open(outfilename, 'wb')
191 except Exception as e:
192 abort('Open(%s) failed: %s' % (outfilename, e))
194 digestor = hashlib.new(args.hash)
196 for data in f.readall():
200 digestor.update(data)
201 out_bytes += len(data)
203 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
208 else 100.0*out_bytes/todo_bytes)))
209 elif args.batch_progress:
210 sys.stderr.write('%s %d read %d total\n' %
211 (sys.argv[0], os.getpid(),
212 out_bytes, todo_bytes))
214 sys.stderr.write("%s %s/%s\n"
215 % (digestor.hexdigest(), s.name(), f.name()))
216 except KeyboardInterrupt:
217 if outfile and outfile != '/dev/stdout':
218 os.unlink(outfilename)
222 sys.stderr.write('\n')