11 logger = logging.getLogger(os.path.basename(sys.argv[0]))
13 parser = argparse.ArgumentParser(
14 description='Copy data from Keep to a local file or pipe.')
15 parser.add_argument('locator', type=str,
17 Collection locator, optionally with a file path or prefix.
19 parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
21 Local file or directory where the data is to be written. Default:
24 group = parser.add_mutually_exclusive_group()
25 group.add_argument('--progress', action='store_true',
27 Display human-readable progress on stderr (bytes and, if possible,
28 percentage of total data size). This is the default behavior when
29 stderr is a tty and stdout is not a tty.
31 group.add_argument('--no-progress', action='store_true',
33 Do not display human-readable progress on stderr.
35 group.add_argument('--batch-progress', action='store_true',
37 Display machine-readable progress on stderr (bytes and, if known,
40 group = parser.add_mutually_exclusive_group()
41 group.add_argument('--hash',
43 Display the hash of each file as it is read from Keep, using the given
44 hash algorithm. Supported algorithms include md5, sha1, sha224,
45 sha256, sha384, and sha512.
47 group.add_argument('--md5sum', action='store_const',
48 dest='hash', const='md5',
50 Display the MD5 hash of each file as it is read from Keep.
52 parser.add_argument('-n', action='store_true',
54 Do not write any data -- just read from Keep, and report md5sums if
57 parser.add_argument('-r', action='store_true',
59 Retrieve all files in the specified collection/prefix. This is the
60 default behavior if the "locator" argument ends with a forward slash.
63 args = parser.parse_args()
65 if args.locator[-1] == os.sep:
69 not (args.destination and
70 os.path.isdir(args.destination))):
71 parser.error('Destination is not a directory.')
72 if not args.r and (os.path.isdir(args.destination) or
73 args.destination[-1] == os.path.sep):
74 parser.error('Destination is a directory.')
76 # Turn on --progress by default if stderr is a tty and stdout isn't.
77 if (not (args.batch_progress or args.no_progress)
78 and os.isatty(sys.stderr.fileno())
79 and not os.isatty(sys.stdout.fileno())):
82 if args.destination == '-':
83 args.destination = '/dev/stdout'
84 args.destination = args.destination.rstrip(os.sep)
89 r = re.search(r'^(.*?)(/.*)?$', args.locator)
90 collection = r.group(1)
91 get_prefix = r.group(2)
92 if args.r and not get_prefix:
99 with open(args.destination, 'wb') as f:
100 f.write(arvados.Keep.get(collection))
103 reader = arvados.CollectionReader(collection)
105 # Scan the collection. Make an array of (stream, file, local
106 # destination filename) tuples, and add up total size to extract.
109 for s in reader.all_streams():
110 for f in s.all_files():
111 if get_prefix and get_prefix[-1] == os.sep:
112 if 0 != string.find(os.path.join(s.name(), f.name()),
115 dest_path = os.path.join(
117 os.path.join(s.name(), f.name())[len(get_prefix)+1:])
119 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
121 dest_path = args.destination
122 todo += [(s, f, dest_path)]
123 todo_bytes += f.size()
124 except arvados.errors.NotFoundError as e:
128 # Read data, and (if not -n) write to local file(s) or pipe.
131 for s,f,outfilename in todo:
136 arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
138 outfile = open(outfilename, 'wb')
139 except Exception as e:
140 logger.error('Open(%s) failed: %s' % (outfilename, e))
142 digestor = hashlib.new(args.hash)
144 for data in f.readall():
148 digestor.update(data)
149 out_bytes += len(data)
151 sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
156 else 100.0*out_bytes/todo_bytes)))
157 elif args.batch_progress:
158 sys.stderr.write('%s %d read %d total\n' %
159 (sys.argv[0], os.getpid(),
160 out_bytes, todo_bytes))
162 sys.stderr.write("%s %s/%s\n"
163 % (digestor.hexdigest(), s.name(), f.name()))
164 except KeyboardInterrupt:
166 os.unlink(outfilename)
170 sys.stderr.write('\n')