2752: Add ResumableCollectionWriter serialization to arv-put.
[arvados.git] / sdk / python / tests / test_arv-put.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import re
6 import tempfile
7 import unittest
8
9 import arvados
10 import arvados.commands.put as arv_put
11 from arvados_testutil import ArvadosBaseTestCase, ArvadosKeepLocalStoreTestCase
12
13 class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
14     CACHE_ARGSET = [
15         [],
16         ['/dev/null'],
17         ['/dev/null', '--filename', 'empty'],
18         ['/tmp'],
19         ['/tmp', '--max-manifest-depth', '0'],
20         ['/tmp', '--max-manifest-depth', '1']
21         ]
22
23     def tearDown(self):
24         super(ArvadosPutResumeCacheTest, self).tearDown()
25         try:
26             self.last_cache.destroy()
27         except AttributeError:
28             pass
29
30     def cache_path_from_arglist(self, arglist):
31         return arv_put.ResumeCache.make_path(arv_put.parse_arguments(arglist))
32
33     def test_cache_names_stable(self):
34         for argset in self.CACHE_ARGSET:
35             self.assertEquals(self.cache_path_from_arglist(argset),
36                               self.cache_path_from_arglist(argset),
37                               "cache name changed for {}".format(argset))
38
39     def test_cache_names_unique(self):
40         results = []
41         for argset in self.CACHE_ARGSET:
42             path = self.cache_path_from_arglist(argset)
43             self.assertNotIn(path, results)
44             results.append(path)
45
46     def test_cache_names_simple(self):
47         # The goal here is to make sure the filename doesn't use characters
48         # reserved by the filesystem.  Feel free to adjust this regexp as
49         # long as it still does that.
50         bad_chars = re.compile(r'[^-\.\w]')
51         for argset in self.CACHE_ARGSET:
52             path = self.cache_path_from_arglist(argset)
53             self.assertFalse(bad_chars.search(os.path.basename(path)),
54                              "path too exotic: {}".format(path))
55
56     def test_cache_names_ignore_argument_order(self):
57         self.assertEquals(
58             self.cache_path_from_arglist(['a', 'b', 'c']),
59             self.cache_path_from_arglist(['c', 'a', 'b']))
60         self.assertEquals(
61             self.cache_path_from_arglist(['-', '--filename', 'stdin']),
62             self.cache_path_from_arglist(['--filename', 'stdin', '-']))
63
64     def test_cache_names_ignore_irrelevant_arguments(self):
65         # Workaround: parse_arguments bails on --filename with a directory.
66         args1 = arv_put.parse_arguments(['/tmp'])
67         args2 = arv_put.parse_arguments(['/tmp'])
68         args2.filename = 'tmp'
69         self.assertEquals(arv_put.ResumeCache.make_path(args1),
70                           arv_put.ResumeCache.make_path(args2),
71                           "cache path considered --filename for directory")
72         self.assertEquals(
73             self.cache_path_from_arglist(['-']),
74             self.cache_path_from_arglist(['-', '--max-manifest-depth', '1']),
75             "cache path considered --max-manifest-depth for file")
76
77     def test_cache_names_treat_negative_manifest_depths_identically(self):
78         base_args = ['/tmp', '--max-manifest-depth']
79         self.assertEquals(
80             self.cache_path_from_arglist(base_args + ['-1']),
81             self.cache_path_from_arglist(base_args + ['-2']))
82
83     def test_cache_names_treat_stdin_consistently(self):
84         self.assertEquals(
85             self.cache_path_from_arglist(['-', '--filename', 'test']),
86             self.cache_path_from_arglist(['/dev/stdin', '--filename', 'test']))
87
88     def test_cache_names_identical_for_synonymous_names(self):
89         self.assertEquals(
90             self.cache_path_from_arglist(['.']),
91             self.cache_path_from_arglist([os.path.realpath('.')]))
92         testdir = self.make_tmpdir()
93         looplink = os.path.join(testdir, 'loop')
94         os.symlink(testdir, looplink)
95         self.assertEquals(
96             self.cache_path_from_arglist([testdir]),
97             self.cache_path_from_arglist([looplink]))
98
99     def test_cache_names_different_by_api_host(self):
100         config = arvados.config.settings()
101         orig_host = config.get('ARVADOS_API_HOST')
102         try:
103             name1 = self.cache_path_from_arglist(['.'])
104             config['ARVADOS_API_HOST'] = 'x' + (orig_host or 'localhost')
105             self.assertNotEqual(name1, self.cache_path_from_arglist(['.']))
106         finally:
107             if orig_host is None:
108                 del config['ARVADOS_API_HOST']
109             else:
110                 config['ARVADOS_API_HOST'] = orig_host
111
112     def test_basic_cache_storage(self):
113         thing = ['test', 'list']
114         with tempfile.NamedTemporaryFile() as cachefile:
115             self.last_cache = arv_put.ResumeCache(cachefile.name)
116         self.last_cache.save(thing)
117         self.assertEquals(thing, self.last_cache.load())
118
119     def test_empty_cache(self):
120         with tempfile.NamedTemporaryFile() as cachefile:
121             cache = arv_put.ResumeCache(cachefile.name)
122         self.assertRaises(ValueError, cache.load)
123
124     def test_cache_persistent(self):
125         thing = ['test', 'list']
126         path = os.path.join(self.make_tmpdir(), 'cache')
127         cache = arv_put.ResumeCache(path)
128         cache.save(thing)
129         cache.close()
130         self.last_cache = arv_put.ResumeCache(path)
131         self.assertEquals(thing, self.last_cache.load())
132
133     def test_multiple_cache_writes(self):
134         thing = ['short', 'list']
135         with tempfile.NamedTemporaryFile() as cachefile:
136             self.last_cache = arv_put.ResumeCache(cachefile.name)
137         # Start writing an object longer than the one we test, to make
138         # sure the cache file gets truncated.
139         self.last_cache.save(['long', 'long', 'list'])
140         self.last_cache.save(thing)
141         self.assertEquals(thing, self.last_cache.load())
142
143     def test_cache_is_locked(self):
144         with tempfile.NamedTemporaryFile() as cachefile:
145             cache = arv_put.ResumeCache(cachefile.name)
146             self.assertRaises(arv_put.ResumeCacheConflict,
147                               arv_put.ResumeCache, cachefile.name)
148
149     def test_cache_stays_locked(self):
150         with tempfile.NamedTemporaryFile() as cachefile:
151             self.last_cache = arv_put.ResumeCache(cachefile.name)
152             path = cachefile.name
153         self.last_cache.save('test')
154         self.assertRaises(arv_put.ResumeCacheConflict,
155                           arv_put.ResumeCache, path)
156
157     def test_destroy_cache(self):
158         cachefile = tempfile.NamedTemporaryFile(delete=False)
159         try:
160             cache = arv_put.ResumeCache(cachefile.name)
161             cache.save('test')
162             cache.destroy()
163             try:
164                 arv_put.ResumeCache(cachefile.name)
165             except arv_put.ResumeCacheConflict:
166                 self.fail("could not load cache after destroying it")
167             self.assertRaises(ValueError, cache.load)
168         finally:
169             if os.path.exists(cachefile.name):
170                 os.unlink(cachefile.name)
171
172
173 class ArvadosPutResumeCacheCollectionWriterTest(ArvadosKeepLocalStoreTestCase):
174     def setUp(self):
175         super(ArvadosPutResumeCacheCollectionWriterTest, self).setUp()
176         with tempfile.NamedTemporaryFile(delete=False) as cachefile:
177             self.cache = arv_put.ResumeCache(cachefile.name)
178             self.cache_filename = cachefile.name
179
180     def tearDown(self):
181         super(ArvadosPutResumeCacheCollectionWriterTest, self).tearDown()
182         if os.path.exists(self.cache_filename):
183             self.cache.destroy()
184         self.cache.close()
185
186     def test_writer_caches(self):
187         cwriter = arv_put.ResumeCacheCollectionWriter(self.cache)
188         cwriter.write_file('/dev/null')
189         self.assertTrue(self.cache.load())
190         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
191
192     def test_writer_works_without_cache(self):
193         cwriter = arv_put.ResumeCacheCollectionWriter()
194         cwriter.write_file('/dev/null')
195         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
196
197     def test_writer_resumes_from_cache(self):
198         cwriter = arv_put.ResumeCacheCollectionWriter(self.cache)
199         with self.make_test_file() as testfile:
200             cwriter.write_file(testfile.name, 'test')
201             new_writer = arv_put.ResumeCacheCollectionWriter.from_cache(
202                 self.cache)
203             self.assertEquals(
204                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:test\n",
205                 new_writer.manifest_text())
206
207     def test_new_writer_from_stale_cache(self):
208         cwriter = arv_put.ResumeCacheCollectionWriter(self.cache)
209         with self.make_test_file() as testfile:
210             cwriter.write_file(testfile.name, 'test')
211         new_writer = arv_put.ResumeCacheCollectionWriter.from_cache(self.cache)
212         new_writer.write_file('/dev/null')
213         self.assertEquals(". 0:0:null\n", new_writer.manifest_text())
214
215     def test_new_writer_from_empty_cache(self):
216         cwriter = arv_put.ResumeCacheCollectionWriter.from_cache(self.cache)
217         cwriter.write_file('/dev/null')
218         self.assertEquals(". 0:0:null\n", cwriter.manifest_text())
219
220     def test_writer_resumable_after_arbitrary_bytes(self):
221         cwriter = arv_put.ResumeCacheCollectionWriter(self.cache)
222         # These bytes are intentionally not valid UTF-8.
223         with self.make_test_file('\x00\x07\xe2') as testfile:
224             cwriter.write_file(testfile.name, 'test')
225             new_writer = arv_put.ResumeCacheCollectionWriter.from_cache(
226                 self.cache)
227         self.assertEquals(cwriter.manifest_text(), new_writer.manifest_text())
228
229
230 class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
231     def test_simple_file_put(self):
232         with self.make_test_file() as testfile:
233             path = testfile.name
234             arv_put.main(['--stream', '--no-progress', path])
235         self.assertTrue(
236             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
237                                         '098f6bcd4621d373cade4e832627b4f6')),
238             "did not find file stream in Keep store")
239
240
241 if __name__ == '__main__':
242     unittest.main()