4 # --md5sum - display md5 of each file as read from disk
10 parser = argparse.ArgumentParser(
11 description='Copy data from the local filesystem to Keep.')
12 parser.add_argument('paths', metavar='path', type=str, nargs='*',
14 Local file or directory. Default: read from standard input.
16 parser.add_argument('--max-manifest-depth', type=int, metavar='N', default=-1,
18 Maximum depth of directory tree to represent in the manifest
19 structure. A directory structure deeper than this will be represented
20 as a single stream in the manifest. If N=0, the manifest will contain
21 a single stream. Default: -1 (unlimited), i.e., exactly one manifest
22 stream per filesystem directory that contains files.
24 group = parser.add_mutually_exclusive_group()
25 group.add_argument('--as-stream', action='store_true', dest='stream',
29 group.add_argument('--stream', action='store_true',
31 Store the file content and display the resulting manifest on
32 stdout. Do not write the manifest to Keep or save a Collection object
35 group.add_argument('--as-manifest', action='store_true', dest='manifest',
37 Synonym for --manifest.
39 group.add_argument('--in-manifest', action='store_true', dest='manifest',
41 Synonym for --manifest.
43 group.add_argument('--manifest', action='store_true',
45 Store the file data and resulting manifest in Keep, save a Collection
46 object in Arvados, and display the manifest locator (Collection uuid)
47 on stdout. This is the default behavior if more than one path argument
48 is given, or the path given is a directory, or a --filename argument
51 group.add_argument('--as-raw', action='store_true', dest='raw',
55 group.add_argument('--raw', action='store_true',
57 Store the file content and display the data block locators on stdout,
58 separated by spaces, with a trailing newline. Do not store a
59 manifest. This is the default behavior when reading data from a single
60 file or standard input.
62 parser.add_argument('--use-filename', type=str, default=None, dest='filename',
64 Synonym for --filename.
66 parser.add_argument('--filename', type=str, default=None,
68 Use the given filename in the manifest, instead of the name of the
69 local file. This is useful when "-" or "/dev/stdin" is given as an
70 input file. It can be used only if there is exactly one path given and
71 it is not a directory. Implies --manifest.
73 group = parser.add_mutually_exclusive_group()
74 group.add_argument('--progress', action='store_true',
76 Display human-readable progress on stderr (bytes and, if possible,
77 percentage of total data size). This is the default behavior when
80 group.add_argument('--no-progress', action='store_true',
82 Do not display human-readable progress on stderr, even if stderr is a
85 group.add_argument('--batch-progress', action='store_true',
87 Display machine-readable progress on stderr (bytes and, if known,
91 args = parser.parse_args()
93 if len(args.paths) == 0:
94 args.paths += ['/dev/stdin']
96 if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
99 --filename argument cannot be used when storing a directory or
102 elif not args.filename and not args.stream and not args.manifest:
103 # When reading from a single non-directory, and no --filename is
104 # given, default to writing raw blocks rather than a manifest.
107 # Turn on --progress by default if stderr is a tty.
108 if (not (args.batch_progress or args.no_progress)
109 and os.isatty(sys.stderr.fileno())):
117 class CollectionWriterWithProgress(arvados.CollectionWriter):
118 def flush_data(self, *args, **kwargs):
119 if not getattr(self, 'display_type', None):
121 if not hasattr(self, 'bytes_flushed'):
122 self.bytes_flushed = 0
123 self.bytes_flushed += self._data_buffer_len
124 super(CollectionWriterWithProgress, self).flush_data(*args, **kwargs)
125 self.bytes_flushed -= self._data_buffer_len
126 if self.display_type == 'machine':
127 sys.stderr.write('%s %d: %d written %d total\n' %
131 getattr(self, 'bytes_expected', -1)))
132 elif getattr(self, 'bytes_expected', 0) > 0:
133 pct = 100.0 * self.bytes_flushed / self.bytes_expected
134 sys.stderr.write('\r%dM / %dM %.1f%% ' %
135 (self.bytes_flushed >> 20,
136 self.bytes_expected >> 20, pct))
138 sys.stderr.write('\r%d ' % self.bytes_flushed)
139 def manifest_text(self, *args, **kwargs):
140 manifest_text = (super(CollectionWriterWithProgress, self)
141 .manifest_text(*args, **kwargs))
142 if getattr(self, 'display_type', None):
143 if self.display_type == 'human':
144 sys.stderr.write('\n')
145 self.display_type = None
149 writer = CollectionWriterWithProgress()
150 writer.display_type = 'human'
151 elif args.batch_progress:
152 writer = CollectionWriterWithProgress()
153 writer.display_type = 'machine'
155 writer = arvados.CollectionWriter()
157 args.paths = [('/dev/stdin' if p=='-' else p) for p in args.paths]
159 # Walk the given directory trees and stat files, adding up file sizes,
160 # so we can display progress as percent
161 writer.bytes_expected = 0
162 for path in args.paths:
163 if os.path.isdir(path):
164 for filename in arvados.util.listdir_recursive(path):
165 writer.bytes_expected += os.path.getsize(
166 os.path.join(path, filename))
167 elif not os.path.isfile(path):
168 del writer.bytes_expected
171 writer.bytes_expected += os.path.getsize(path)
173 # Copy file data to Keep.
174 for path in args.paths:
175 if os.path.isdir(path):
176 writer.write_directory_tree(path,
177 max_manifest_depth=args.max_manifest_depth)
179 writer.start_new_stream()
180 writer.start_new_file(args.filename or os.path.split(path)[1])
181 with open(path, 'rb') as f:
189 print writer.manifest_text(),
191 writer.finish_current_stream()
192 print string.join(writer.data_locators(), ',') + '\n'
194 # Register the resulting collection in Arvados.
195 arvados.api().collections().create(
197 'uuid': writer.finish(),
198 'manifest_text': writer.manifest_text(),
202 # Print the locator (uuid) of the new collection.
203 print writer.finish()