Merge branch 'master' into 2755-python-sdk-permissions
[arvados.git] / sdk / python / tests / test_arv-put.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import re
6 import shutil
7 import subprocess
8 import sys
9 import tempfile
10 import time
11 import unittest
12
13 import arvados
14 import arvados.commands.put as arv_put
15 from arvados_testutil import ArvadosBaseTestCase, ArvadosKeepLocalStoreTestCase
16
17 class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
18     CACHE_ARGSET = [
19         [],
20         ['/dev/null'],
21         ['/dev/null', '--filename', 'empty'],
22         ['/tmp'],
23         ['/tmp', '--max-manifest-depth', '0'],
24         ['/tmp', '--max-manifest-depth', '1']
25         ]
26
27     def tearDown(self):
28         super(ArvadosPutResumeCacheTest, self).tearDown()
29         try:
30             self.last_cache.destroy()
31         except AttributeError:
32             pass
33
34     def cache_path_from_arglist(self, arglist):
35         return arv_put.ResumeCache.make_path(arv_put.parse_arguments(arglist))
36
37     def test_cache_names_stable(self):
38         for argset in self.CACHE_ARGSET:
39             self.assertEquals(self.cache_path_from_arglist(argset),
40                               self.cache_path_from_arglist(argset),
41                               "cache name changed for {}".format(argset))
42
43     def test_cache_names_unique(self):
44         results = []
45         for argset in self.CACHE_ARGSET:
46             path = self.cache_path_from_arglist(argset)
47             self.assertNotIn(path, results)
48             results.append(path)
49
50     def test_cache_names_simple(self):
51         # The goal here is to make sure the filename doesn't use characters
52         # reserved by the filesystem.  Feel free to adjust this regexp as
53         # long as it still does that.
54         bad_chars = re.compile(r'[^-\.\w]')
55         for argset in self.CACHE_ARGSET:
56             path = self.cache_path_from_arglist(argset)
57             self.assertFalse(bad_chars.search(os.path.basename(path)),
58                              "path too exotic: {}".format(path))
59
60     def test_cache_names_ignore_argument_order(self):
61         self.assertEquals(
62             self.cache_path_from_arglist(['a', 'b', 'c']),
63             self.cache_path_from_arglist(['c', 'a', 'b']))
64         self.assertEquals(
65             self.cache_path_from_arglist(['-', '--filename', 'stdin']),
66             self.cache_path_from_arglist(['--filename', 'stdin', '-']))
67
68     def test_cache_names_differ_for_similar_paths(self):
69         # This test needs names at / that don't exist on the real filesystem.
70         self.assertNotEqual(
71             self.cache_path_from_arglist(['/_arvputtest1', '/_arvputtest2']),
72             self.cache_path_from_arglist(['/_arvputtest1/_arvputtest2']))
73
74     def test_cache_names_ignore_irrelevant_arguments(self):
75         # Workaround: parse_arguments bails on --filename with a directory.
76         path1 = self.cache_path_from_arglist(['/tmp'])
77         args = arv_put.parse_arguments(['/tmp'])
78         args.filename = 'tmp'
79         path2 = arv_put.ResumeCache.make_path(args)
80         self.assertEquals(path1, path2,
81                          "cache path considered --filename for directory")
82         self.assertEquals(
83             self.cache_path_from_arglist(['-']),
84             self.cache_path_from_arglist(['-', '--max-manifest-depth', '1']),
85             "cache path considered --max-manifest-depth for file")
86
87     def test_cache_names_treat_negative_manifest_depths_identically(self):
88         base_args = ['/tmp', '--max-manifest-depth']
89         self.assertEquals(
90             self.cache_path_from_arglist(base_args + ['-1']),
91             self.cache_path_from_arglist(base_args + ['-2']))
92
93     def test_cache_names_treat_stdin_consistently(self):
94         self.assertEquals(
95             self.cache_path_from_arglist(['-', '--filename', 'test']),
96             self.cache_path_from_arglist(['/dev/stdin', '--filename', 'test']))
97
98     def test_cache_names_identical_for_synonymous_names(self):
99         self.assertEquals(
100             self.cache_path_from_arglist(['.']),
101             self.cache_path_from_arglist([os.path.realpath('.')]))
102         testdir = self.make_tmpdir()
103         looplink = os.path.join(testdir, 'loop')
104         os.symlink(testdir, looplink)
105         self.assertEquals(
106             self.cache_path_from_arglist([testdir]),
107             self.cache_path_from_arglist([looplink]))
108
109     def test_cache_names_different_by_api_host(self):
110         config = arvados.config.settings()
111         orig_host = config.get('ARVADOS_API_HOST')
112         try:
113             name1 = self.cache_path_from_arglist(['.'])
114             config['ARVADOS_API_HOST'] = 'x' + (orig_host or 'localhost')
115             self.assertNotEqual(name1, self.cache_path_from_arglist(['.']))
116         finally:
117             if orig_host is None:
118                 del config['ARVADOS_API_HOST']
119             else:
120                 config['ARVADOS_API_HOST'] = orig_host
121
122     def test_basic_cache_storage(self):
123         thing = ['test', 'list']
124         with tempfile.NamedTemporaryFile() as cachefile:
125             self.last_cache = arv_put.ResumeCache(cachefile.name)
126         self.last_cache.save(thing)
127         self.assertEquals(thing, self.last_cache.load())
128
129     def test_empty_cache(self):
130         with tempfile.NamedTemporaryFile() as cachefile:
131             cache = arv_put.ResumeCache(cachefile.name)
132         self.assertRaises(ValueError, cache.load)
133
134     def test_cache_persistent(self):
135         thing = ['test', 'list']
136         path = os.path.join(self.make_tmpdir(), 'cache')
137         cache = arv_put.ResumeCache(path)
138         cache.save(thing)
139         cache.close()
140         self.last_cache = arv_put.ResumeCache(path)
141         self.assertEquals(thing, self.last_cache.load())
142
143     def test_multiple_cache_writes(self):
144         thing = ['short', 'list']
145         with tempfile.NamedTemporaryFile() as cachefile:
146             self.last_cache = arv_put.ResumeCache(cachefile.name)
147         # Start writing an object longer than the one we test, to make
148         # sure the cache file gets truncated.
149         self.last_cache.save(['long', 'long', 'list'])
150         self.last_cache.save(thing)
151         self.assertEquals(thing, self.last_cache.load())
152
153     def test_cache_is_locked(self):
154         with tempfile.NamedTemporaryFile() as cachefile:
155             cache = arv_put.ResumeCache(cachefile.name)
156             self.assertRaises(arv_put.ResumeCacheConflict,
157                               arv_put.ResumeCache, cachefile.name)
158
159     def test_cache_stays_locked(self):
160         with tempfile.NamedTemporaryFile() as cachefile:
161             self.last_cache = arv_put.ResumeCache(cachefile.name)
162             path = cachefile.name
163         self.last_cache.save('test')
164         self.assertRaises(arv_put.ResumeCacheConflict,
165                           arv_put.ResumeCache, path)
166
167     def test_destroy_cache(self):
168         cachefile = tempfile.NamedTemporaryFile(delete=False)
169         try:
170             cache = arv_put.ResumeCache(cachefile.name)
171             cache.save('test')
172             cache.destroy()
173             try:
174                 arv_put.ResumeCache(cachefile.name)
175             except arv_put.ResumeCacheConflict:
176                 self.fail("could not load cache after destroying it")
177             self.assertRaises(ValueError, cache.load)
178         finally:
179             if os.path.exists(cachefile.name):
180                 os.unlink(cachefile.name)
181
182     def test_restart_cache(self):
183         path = os.path.join(self.make_tmpdir(), 'cache')
184         cache = arv_put.ResumeCache(path)
185         cache.save('test')
186         cache.restart()
187         self.assertRaises(ValueError, cache.load)
188         self.assertRaises(arv_put.ResumeCacheConflict,
189                           arv_put.ResumeCache, path)
190
191
192 class ArvadosPutCollectionWriterTest(ArvadosKeepLocalStoreTestCase):
193     def setUp(self):
194         super(ArvadosPutCollectionWriterTest, self).setUp()
195         with tempfile.NamedTemporaryFile(delete=False) as cachefile:
196             self.cache = arv_put.ResumeCache(cachefile.name)
197             self.cache_filename = cachefile.name
198
199     def tearDown(self):
200         super(ArvadosPutCollectionWriterTest, self).tearDown()
201         if os.path.exists(self.cache_filename):
202             self.cache.destroy()
203         self.cache.close()
204
205     def test_writer_caches(self):
206         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
207         cwriter.write_file('/dev/null')
208         cwriter.cache_state()
209         self.assertTrue(self.cache.load())
210         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
211
212     def test_writer_works_without_cache(self):
213         cwriter = arv_put.ArvPutCollectionWriter()
214         cwriter.write_file('/dev/null')
215         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
216
217     def test_writer_resumes_from_cache(self):
218         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
219         with self.make_test_file() as testfile:
220             cwriter.write_file(testfile.name, 'test')
221             cwriter.cache_state()
222             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
223                 self.cache)
224             self.assertEquals(
225                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:test\n",
226                 new_writer.manifest_text())
227
228     def test_new_writer_from_stale_cache(self):
229         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
230         with self.make_test_file() as testfile:
231             cwriter.write_file(testfile.name, 'test')
232         new_writer = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
233         new_writer.write_file('/dev/null')
234         self.assertEquals(". 0:0:null\n", new_writer.manifest_text())
235
236     def test_new_writer_from_empty_cache(self):
237         cwriter = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
238         cwriter.write_file('/dev/null')
239         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
240
241     def test_writer_resumable_after_arbitrary_bytes(self):
242         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
243         # These bytes are intentionally not valid UTF-8.
244         with self.make_test_file('\x00\x07\xe2') as testfile:
245             cwriter.write_file(testfile.name, 'test')
246             cwriter.cache_state()
247             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
248                 self.cache)
249         self.assertEquals(cwriter.manifest_text(), new_writer.manifest_text())
250
251     def make_progress_tester(self):
252         progression = []
253         def record_func(written, expected):
254             progression.append((written, expected))
255         return progression, record_func
256
257     def test_progress_reporting(self):
258         for expect_count in (None, 8):
259             progression, reporter = self.make_progress_tester()
260             cwriter = arv_put.ArvPutCollectionWriter(
261                 reporter=reporter, bytes_expected=expect_count)
262             with self.make_test_file() as testfile:
263                 cwriter.write_file(testfile.name, 'test')
264             cwriter.finish_current_stream()
265             self.assertIn((4, expect_count), progression)
266
267     def test_resume_progress(self):
268         cwriter = arv_put.ArvPutCollectionWriter(self.cache, bytes_expected=4)
269         with self.make_test_file() as testfile:
270             # Set up a writer with some flushed bytes.
271             cwriter.write_file(testfile.name, 'test')
272             cwriter.finish_current_stream()
273             cwriter.cache_state()
274             new_writer = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
275             self.assertEqual(new_writer.bytes_written, 4)
276
277
278 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
279     TEST_SIZE = os.path.getsize(__file__)
280
281     def test_expected_bytes_for_file(self):
282         self.assertEquals(self.TEST_SIZE,
283                           arv_put.expected_bytes_for([__file__]))
284
285     def test_expected_bytes_for_tree(self):
286         tree = self.make_tmpdir()
287         shutil.copyfile(__file__, os.path.join(tree, 'one'))
288         shutil.copyfile(__file__, os.path.join(tree, 'two'))
289         self.assertEquals(self.TEST_SIZE * 2,
290                           arv_put.expected_bytes_for([tree]))
291         self.assertEquals(self.TEST_SIZE * 3,
292                           arv_put.expected_bytes_for([tree, __file__]))
293
294     def test_expected_bytes_for_device(self):
295         self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
296         self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
297
298
299 class ArvadosPutReportTest(ArvadosBaseTestCase):
300     def test_machine_progress(self):
301         for count, total in [(0, 1), (0, None), (1, None), (235, 9283)]:
302             expect = ": {} written {} total\n".format(
303                 count, -1 if (total is None) else total)
304             self.assertTrue(
305                 arv_put.machine_progress(count, total).endswith(expect))
306
307     def test_known_human_progress(self):
308         for count, total in [(0, 1), (2, 4), (45, 60)]:
309             expect = '{:.1%}'.format(float(count) / total)
310             actual = arv_put.human_progress(count, total)
311             self.assertTrue(actual.startswith('\r'))
312             self.assertIn(expect, actual)
313
314     def test_unknown_human_progress(self):
315         for count in [1, 20, 300, 4000, 50000]:
316             self.assertTrue(re.search(r'\b{}\b'.format(count),
317                                       arv_put.human_progress(count, None)))
318
319
320 class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
321     def test_simple_file_put(self):
322         with self.make_test_file() as testfile:
323             path = testfile.name
324             arv_put.main(['--stream', '--no-progress', path])
325         self.assertTrue(
326             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
327                                         '098f6bcd4621d373cade4e832627b4f6')),
328             "did not find file stream in Keep store")
329
330     def test_short_put_from_stdin(self):
331         # Have to run this separately since arv-put can't read from the
332         # tests' stdin.
333         # arv-put usually can't stat(os.path.realpath('/dev/stdin')) in this
334         # case, because the /proc entry is already gone by the time it tries.
335         pipe = subprocess.Popen(
336             [sys.executable, arv_put.__file__, '--stream'],
337             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
338             stderr=open('/dev/null', 'w'))
339         pipe.stdin.write('stdin test\n')
340         pipe.stdin.close()
341         deadline = time.time() + 5
342         while (pipe.poll() is None) and (time.time() < deadline):
343             time.sleep(.1)
344         if pipe.returncode is None:
345             pipe.terminate()
346             self.fail("arv-put did not PUT from stdin within 5 seconds")
347         self.assertEquals(pipe.returncode, 0)
348         self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read())
349
350
351 if __name__ == '__main__':
352     unittest.main()