2752: Refactor progress reporting in arv-put's CollectionWriter.
[arvados.git] / sdk / python / tests / test_arv-put.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import re
6 import shutil
7 import tempfile
8 import unittest
9
10 import arvados
11 import arvados.commands.put as arv_put
12 from arvados_testutil import ArvadosBaseTestCase, ArvadosKeepLocalStoreTestCase
13
14 class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
15     CACHE_ARGSET = [
16         [],
17         ['/dev/null'],
18         ['/dev/null', '--filename', 'empty'],
19         ['/tmp'],
20         ['/tmp', '--max-manifest-depth', '0'],
21         ['/tmp', '--max-manifest-depth', '1']
22         ]
23
24     def tearDown(self):
25         super(ArvadosPutResumeCacheTest, self).tearDown()
26         try:
27             self.last_cache.destroy()
28         except AttributeError:
29             pass
30
31     def cache_path_from_arglist(self, arglist):
32         return arv_put.ResumeCache.make_path(arv_put.parse_arguments(arglist))
33
34     def test_cache_names_stable(self):
35         for argset in self.CACHE_ARGSET:
36             self.assertEquals(self.cache_path_from_arglist(argset),
37                               self.cache_path_from_arglist(argset),
38                               "cache name changed for {}".format(argset))
39
40     def test_cache_names_unique(self):
41         results = []
42         for argset in self.CACHE_ARGSET:
43             path = self.cache_path_from_arglist(argset)
44             self.assertNotIn(path, results)
45             results.append(path)
46
47     def test_cache_names_simple(self):
48         # The goal here is to make sure the filename doesn't use characters
49         # reserved by the filesystem.  Feel free to adjust this regexp as
50         # long as it still does that.
51         bad_chars = re.compile(r'[^-\.\w]')
52         for argset in self.CACHE_ARGSET:
53             path = self.cache_path_from_arglist(argset)
54             self.assertFalse(bad_chars.search(os.path.basename(path)),
55                              "path too exotic: {}".format(path))
56
57     def test_cache_names_ignore_argument_order(self):
58         self.assertEquals(
59             self.cache_path_from_arglist(['a', 'b', 'c']),
60             self.cache_path_from_arglist(['c', 'a', 'b']))
61         self.assertEquals(
62             self.cache_path_from_arglist(['-', '--filename', 'stdin']),
63             self.cache_path_from_arglist(['--filename', 'stdin', '-']))
64
65     def test_cache_names_ignore_irrelevant_arguments(self):
66         # Workaround: parse_arguments bails on --filename with a directory.
67         args1 = arv_put.parse_arguments(['/tmp'])
68         args2 = arv_put.parse_arguments(['/tmp'])
69         args2.filename = 'tmp'
70         self.assertEquals(arv_put.ResumeCache.make_path(args1),
71                           arv_put.ResumeCache.make_path(args2),
72                           "cache path considered --filename for directory")
73         self.assertEquals(
74             self.cache_path_from_arglist(['-']),
75             self.cache_path_from_arglist(['-', '--max-manifest-depth', '1']),
76             "cache path considered --max-manifest-depth for file")
77
78     def test_cache_names_treat_negative_manifest_depths_identically(self):
79         base_args = ['/tmp', '--max-manifest-depth']
80         self.assertEquals(
81             self.cache_path_from_arglist(base_args + ['-1']),
82             self.cache_path_from_arglist(base_args + ['-2']))
83
84     def test_cache_names_treat_stdin_consistently(self):
85         self.assertEquals(
86             self.cache_path_from_arglist(['-', '--filename', 'test']),
87             self.cache_path_from_arglist(['/dev/stdin', '--filename', 'test']))
88
89     def test_cache_names_identical_for_synonymous_names(self):
90         self.assertEquals(
91             self.cache_path_from_arglist(['.']),
92             self.cache_path_from_arglist([os.path.realpath('.')]))
93         testdir = self.make_tmpdir()
94         looplink = os.path.join(testdir, 'loop')
95         os.symlink(testdir, looplink)
96         self.assertEquals(
97             self.cache_path_from_arglist([testdir]),
98             self.cache_path_from_arglist([looplink]))
99
100     def test_cache_names_different_by_api_host(self):
101         config = arvados.config.settings()
102         orig_host = config.get('ARVADOS_API_HOST')
103         try:
104             name1 = self.cache_path_from_arglist(['.'])
105             config['ARVADOS_API_HOST'] = 'x' + (orig_host or 'localhost')
106             self.assertNotEqual(name1, self.cache_path_from_arglist(['.']))
107         finally:
108             if orig_host is None:
109                 del config['ARVADOS_API_HOST']
110             else:
111                 config['ARVADOS_API_HOST'] = orig_host
112
113     def test_basic_cache_storage(self):
114         thing = ['test', 'list']
115         with tempfile.NamedTemporaryFile() as cachefile:
116             self.last_cache = arv_put.ResumeCache(cachefile.name)
117         self.last_cache.save(thing)
118         self.assertEquals(thing, self.last_cache.load())
119
120     def test_empty_cache(self):
121         with tempfile.NamedTemporaryFile() as cachefile:
122             cache = arv_put.ResumeCache(cachefile.name)
123         self.assertRaises(ValueError, cache.load)
124
125     def test_cache_persistent(self):
126         thing = ['test', 'list']
127         path = os.path.join(self.make_tmpdir(), 'cache')
128         cache = arv_put.ResumeCache(path)
129         cache.save(thing)
130         cache.close()
131         self.last_cache = arv_put.ResumeCache(path)
132         self.assertEquals(thing, self.last_cache.load())
133
134     def test_multiple_cache_writes(self):
135         thing = ['short', 'list']
136         with tempfile.NamedTemporaryFile() as cachefile:
137             self.last_cache = arv_put.ResumeCache(cachefile.name)
138         # Start writing an object longer than the one we test, to make
139         # sure the cache file gets truncated.
140         self.last_cache.save(['long', 'long', 'list'])
141         self.last_cache.save(thing)
142         self.assertEquals(thing, self.last_cache.load())
143
144     def test_cache_is_locked(self):
145         with tempfile.NamedTemporaryFile() as cachefile:
146             cache = arv_put.ResumeCache(cachefile.name)
147             self.assertRaises(arv_put.ResumeCacheConflict,
148                               arv_put.ResumeCache, cachefile.name)
149
150     def test_cache_stays_locked(self):
151         with tempfile.NamedTemporaryFile() as cachefile:
152             self.last_cache = arv_put.ResumeCache(cachefile.name)
153             path = cachefile.name
154         self.last_cache.save('test')
155         self.assertRaises(arv_put.ResumeCacheConflict,
156                           arv_put.ResumeCache, path)
157
158     def test_destroy_cache(self):
159         cachefile = tempfile.NamedTemporaryFile(delete=False)
160         try:
161             cache = arv_put.ResumeCache(cachefile.name)
162             cache.save('test')
163             cache.destroy()
164             try:
165                 arv_put.ResumeCache(cachefile.name)
166             except arv_put.ResumeCacheConflict:
167                 self.fail("could not load cache after destroying it")
168             self.assertRaises(ValueError, cache.load)
169         finally:
170             if os.path.exists(cachefile.name):
171                 os.unlink(cachefile.name)
172
173
174 class ArvadosPutCollectionWriterTest(ArvadosKeepLocalStoreTestCase):
175     def setUp(self):
176         super(ArvadosPutCollectionWriterTest, self).setUp()
177         with tempfile.NamedTemporaryFile(delete=False) as cachefile:
178             self.cache = arv_put.ResumeCache(cachefile.name)
179             self.cache_filename = cachefile.name
180
181     def tearDown(self):
182         super(ArvadosPutCollectionWriterTest, self).tearDown()
183         if os.path.exists(self.cache_filename):
184             self.cache.destroy()
185         self.cache.close()
186
187     def test_writer_caches(self):
188         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
189         cwriter.write_file('/dev/null')
190         self.assertTrue(self.cache.load())
191         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
192
193     def test_writer_works_without_cache(self):
194         cwriter = arv_put.ArvPutCollectionWriter()
195         cwriter.write_file('/dev/null')
196         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
197
198     def test_writer_resumes_from_cache(self):
199         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
200         with self.make_test_file() as testfile:
201             cwriter.write_file(testfile.name, 'test')
202             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
203                 self.cache)
204             self.assertEquals(
205                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:test\n",
206                 new_writer.manifest_text())
207
208     def test_new_writer_from_stale_cache(self):
209         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
210         with self.make_test_file() as testfile:
211             cwriter.write_file(testfile.name, 'test')
212         new_writer = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
213         new_writer.write_file('/dev/null')
214         self.assertEquals(". 0:0:null\n", new_writer.manifest_text())
215
216     def test_new_writer_from_empty_cache(self):
217         cwriter = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
218         cwriter.write_file('/dev/null')
219         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
220
221     def test_writer_resumable_after_arbitrary_bytes(self):
222         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
223         # These bytes are intentionally not valid UTF-8.
224         with self.make_test_file('\x00\x07\xe2') as testfile:
225             cwriter.write_file(testfile.name, 'test')
226             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
227                 self.cache)
228         self.assertEquals(cwriter.manifest_text(), new_writer.manifest_text())
229
230     def test_progress_reporting(self):
231         for expect_count in (None, 8):
232             progression = []
233             cwriter = arv_put.ArvPutCollectionWriter(
234                 reporter=lambda *args: progression.append(args),
235                 bytes_expected=expect_count)
236             with self.make_test_file() as testfile:
237                 cwriter.write_file(testfile.name, 'test')
238             cwriter.finish_current_stream()
239             self.assertIn((4, expect_count), progression)
240
241
242 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
243     TEST_SIZE = os.path.getsize(__file__)
244
245     def test_expected_bytes_for_file(self):
246         self.assertEquals(self.TEST_SIZE,
247                           arv_put.expected_bytes_for([__file__]))
248
249     def test_expected_bytes_for_tree(self):
250         tree = self.make_tmpdir()
251         shutil.copyfile(__file__, os.path.join(tree, 'one'))
252         shutil.copyfile(__file__, os.path.join(tree, 'two'))
253         self.assertEquals(self.TEST_SIZE * 2,
254                           arv_put.expected_bytes_for([tree]))
255         self.assertEquals(self.TEST_SIZE * 3,
256                           arv_put.expected_bytes_for([tree, __file__]))
257
258     def test_expected_bytes_for_device(self):
259         self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
260         self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
261
262
263 class ArvadosPutReportTest(ArvadosBaseTestCase):
264     def test_machine_progress(self):
265         for count, total in [(0, 1), (0, None), (1, None), (235, 9283)]:
266             expect = ": {} written {} total\n".format(
267                 count, -1 if (total is None) else total)
268             self.assertTrue(
269                 arv_put.machine_progress(count, total).endswith(expect))
270
271     def test_known_human_progress(self):
272         for count, total in [(0, 1), (2, 4), (45, 60)]:
273             expect = '{:.1f}%'.format(count / total)
274             actual = arv_put.human_progress(count, total)
275             self.assertTrue(actual.startswith('\r'))
276             self.assertIn(expect, actual)
277
278     def test_unknown_human_progress(self):
279         for count in [1, 20, 300, 4000, 50000]:
280             self.assertTrue(re.search(r'\b{}\b'.format(count),
281                                       arv_put.human_progress(count, None)))
282
283
284 class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
285     def test_simple_file_put(self):
286         with self.make_test_file() as testfile:
287             path = testfile.name
288             arv_put.main(['--stream', '--no-progress', path])
289         self.assertTrue(
290             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
291                                         '098f6bcd4621d373cade4e832627b4f6')),
292             "did not find file stream in Keep store")
293
294
295 if __name__ == '__main__':
296     unittest.main()