11 logger = logging.getLogger(os.path.basename(sys.argv[0]))
13 parser = argparse.ArgumentParser(
14 description='Copy data from Keep to a local file or pipe.')
15 parser.add_argument('locator', type=str,
17 Collection locator, optionally with a file path or prefix.
19 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
21 Local file or directory where the data is to be written. Default:
24 group = parser.add_mutually_exclusive_group()
25 group.add_argument('--progress', action='store_true',
27 Display human-readable progress on stderr (bytes and, if possible,
28 percentage of total data size). This is the default behavior when
29 stderr is a tty and stdout is not a tty.
31 group.add_argument('--no-progress', action='store_true',
33 Do not display human-readable progress on stderr.
35 group.add_argument('--batch-progress', action='store_true',
37 Display machine-readable progress on stderr (bytes and, if known,
40 group = parser.add_mutually_exclusive_group()
41 group.add_argument('--hash',
43 Display the hash of each file as it is read from Keep, using the given
44 hash algorithm. Supported algorithms include md5, sha1, sha224,
45 sha256, sha384, and sha512.
47 group.add_argument('--md5sum', action='store_const',
48 dest='hash', const='md5',
50 Display the MD5 hash of each file as it is read from Keep.
52 parser.add_argument('-n', action='store_true',
54 Do not write any data -- just read from Keep, and report md5sums if
57 parser.add_argument('-r', action='store_true',
59 Retrieve all files in the specified collection/prefix. This is the
60 default behavior if the "locator" argument ends with a forward slash.
62 group = parser.add_mutually_exclusive_group()
63 group.add_argument('-f', action='store_true',
65 Overwrite existing files while writing. The default behavior is to
66 refuse to write *anything* if any of the output files already
67 exist. As a special case, -f is not needed to write to /dev/stdout.
69 group.add_argument('--skip-existing', action='store_true',
71 Skip files that already exist. The default behavior is to refuse to
72 write *anything* if any files exist that would have to be
73 overwritten. This option causes even devices, sockets, and fifos to be
77 args = parser.parse_args()
79 if args.locator[-1] == os.sep:
83 not (args.destination and
84 os.path.isdir(args.destination))):
85 parser.error('Destination is not a directory.')
86 if not args.r and (os.path.isdir(args.destination) or
87 args.destination[-1] == os.path.sep):
88 args.destination = os.path.join(args.destination,
89 os.path.basename(args.locator))
90 logger.debug("Appended source file name to destination directory: %s" %
93 # Turn on --progress by default if stderr is a tty and stdout isn't.
94 if (not (args.batch_progress or args.no_progress)
95 and os.isatty(sys.stderr.fileno())
96 and not os.isatty(sys.stdout.fileno())):
99 if args.destination == '-':
100 args.destination = '/dev/stdout'
101 if args.destination == '/dev/stdout':
102 # Normally you have to use -f to write to a file (or device) that
103 # already exists, but "-" and "/dev/stdout" are common enough to
104 # merit a special exception.
107 args.destination = args.destination.rstrip(os.sep)
112 r = re.search(r'^(.*?)(/.*)?$', args.locator)
113 collection = r.group(1)
114 get_prefix = r.group(2)
115 if args.r and not get_prefix:
123 if not args.f and os.path.exists(args.destination):
124 logger.error('Local file %s already exists' % args.destination)
126 with open(args.destination, 'wb') as f:
127 f.write(arvados.Keep.get(collection))
129 except arvados.errors.NotFoundError as e:
133 reader = arvados.CollectionReader(collection)
135 # Scan the collection. Make an array of (stream, file, local
136 # destination filename) tuples, and add up total size to extract.
139 for s in reader.all_streams():
140 for f in s.all_files():
141 if get_prefix and get_prefix[-1] == os.sep:
142 if 0 != string.find(os.path.join(s.name(), f.name()),
145 dest_path = os.path.join(
147 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
148 if (not (args.n or args.f or args.skip_existing) and
149 os.path.exists(dest_path)):
150 logger.error('Local file %s already exists' % dest_path)
153 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
155 dest_path = args.destination
156 todo += [(s, f, dest_path)]
157 todo_bytes += f.size()
158 except arvados.errors.NotFoundError as e:
162 # Read data, and (if not -n) write to local file(s) or pipe.
165 for s,f,outfilename in todo:
169 if args.skip_existing and os.path.exists(outfilename):
170 logger.debug('Local file %s exists. Skipping.' % outfilename)
172 elif not args.f and (os.path.isfile(outfilename) or
173 os.path.isdir(outfilename)):
174 # Good thing we looked again: apparently this file wasn't
175 # here yet when we checked earlier.
176 logger.error('Local file %s already exists' % outfilename)
179 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
181 outfile = open(outfilename, 'wb')
182 except Exception as e:
183 logger.error('Open(%s) failed: %s' % (outfilename, e))
186 digestor = hashlib.new(args.hash)
188 for data in f.readall():
192 digestor.update(data)
193 out_bytes += len(data)
195 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
200 else 100.0*out_bytes/todo_bytes)))
201 elif args.batch_progress:
202 sys.stderr.write('%s %d read %d total\n' %
203 (sys.argv[0], os.getpid(),
204 out_bytes, todo_bytes))
206 sys.stderr.write("%s %s/%s\n"
207 % (digestor.hexdigest(), s.name(), f.name()))
208 except KeyboardInterrupt:
210 os.unlink(outfilename)
214 sys.stderr.write('\n')