Merge branch 'master' into 4823-python-sdk-writable-collection-api
[arvados.git] / sdk / python / tests / test_arv_put.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import apiclient
5 import os
6 import pwd
7 import re
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import time
13 import unittest
14 import yaml
15
16 from cStringIO import StringIO
17
18 import arvados
19 import arvados.commands.put as arv_put
20
21 from arvados_testutil import ArvadosBaseTestCase
22 import run_test_server
23
24 class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
25     CACHE_ARGSET = [
26         [],
27         ['/dev/null'],
28         ['/dev/null', '--filename', 'empty'],
29         ['/tmp'],
30         ['/tmp', '--max-manifest-depth', '0'],
31         ['/tmp', '--max-manifest-depth', '1']
32         ]
33
34     def tearDown(self):
35         super(ArvadosPutResumeCacheTest, self).tearDown()
36         try:
37             self.last_cache.destroy()
38         except AttributeError:
39             pass
40
41     def cache_path_from_arglist(self, arglist):
42         return arv_put.ResumeCache.make_path(arv_put.parse_arguments(arglist))
43
44     def test_cache_names_stable(self):
45         for argset in self.CACHE_ARGSET:
46             self.assertEquals(self.cache_path_from_arglist(argset),
47                               self.cache_path_from_arglist(argset),
48                               "cache name changed for {}".format(argset))
49
50     def test_cache_names_unique(self):
51         results = []
52         for argset in self.CACHE_ARGSET:
53             path = self.cache_path_from_arglist(argset)
54             self.assertNotIn(path, results)
55             results.append(path)
56
57     def test_cache_names_simple(self):
58         # The goal here is to make sure the filename doesn't use characters
59         # reserved by the filesystem.  Feel free to adjust this regexp as
60         # long as it still does that.
61         bad_chars = re.compile(r'[^-\.\w]')
62         for argset in self.CACHE_ARGSET:
63             path = self.cache_path_from_arglist(argset)
64             self.assertFalse(bad_chars.search(os.path.basename(path)),
65                              "path too exotic: {}".format(path))
66
67     def test_cache_names_ignore_argument_order(self):
68         self.assertEquals(
69             self.cache_path_from_arglist(['a', 'b', 'c']),
70             self.cache_path_from_arglist(['c', 'a', 'b']))
71         self.assertEquals(
72             self.cache_path_from_arglist(['-', '--filename', 'stdin']),
73             self.cache_path_from_arglist(['--filename', 'stdin', '-']))
74
75     def test_cache_names_differ_for_similar_paths(self):
76         # This test needs names at / that don't exist on the real filesystem.
77         self.assertNotEqual(
78             self.cache_path_from_arglist(['/_arvputtest1', '/_arvputtest2']),
79             self.cache_path_from_arglist(['/_arvputtest1/_arvputtest2']))
80
81     def test_cache_names_ignore_irrelevant_arguments(self):
82         # Workaround: parse_arguments bails on --filename with a directory.
83         path1 = self.cache_path_from_arglist(['/tmp'])
84         args = arv_put.parse_arguments(['/tmp'])
85         args.filename = 'tmp'
86         path2 = arv_put.ResumeCache.make_path(args)
87         self.assertEquals(path1, path2,
88                          "cache path considered --filename for directory")
89         self.assertEquals(
90             self.cache_path_from_arglist(['-']),
91             self.cache_path_from_arglist(['-', '--max-manifest-depth', '1']),
92             "cache path considered --max-manifest-depth for file")
93
94     def test_cache_names_treat_negative_manifest_depths_identically(self):
95         base_args = ['/tmp', '--max-manifest-depth']
96         self.assertEquals(
97             self.cache_path_from_arglist(base_args + ['-1']),
98             self.cache_path_from_arglist(base_args + ['-2']))
99
100     def test_cache_names_treat_stdin_consistently(self):
101         self.assertEquals(
102             self.cache_path_from_arglist(['-', '--filename', 'test']),
103             self.cache_path_from_arglist(['/dev/stdin', '--filename', 'test']))
104
105     def test_cache_names_identical_for_synonymous_names(self):
106         self.assertEquals(
107             self.cache_path_from_arglist(['.']),
108             self.cache_path_from_arglist([os.path.realpath('.')]))
109         testdir = self.make_tmpdir()
110         looplink = os.path.join(testdir, 'loop')
111         os.symlink(testdir, looplink)
112         self.assertEquals(
113             self.cache_path_from_arglist([testdir]),
114             self.cache_path_from_arglist([looplink]))
115
116     def test_cache_names_different_by_api_host(self):
117         config = arvados.config.settings()
118         orig_host = config.get('ARVADOS_API_HOST')
119         try:
120             name1 = self.cache_path_from_arglist(['.'])
121             config['ARVADOS_API_HOST'] = 'x' + (orig_host or 'localhost')
122             self.assertNotEqual(name1, self.cache_path_from_arglist(['.']))
123         finally:
124             if orig_host is None:
125                 del config['ARVADOS_API_HOST']
126             else:
127                 config['ARVADOS_API_HOST'] = orig_host
128
129     def test_basic_cache_storage(self):
130         thing = ['test', 'list']
131         with tempfile.NamedTemporaryFile() as cachefile:
132             self.last_cache = arv_put.ResumeCache(cachefile.name)
133         self.last_cache.save(thing)
134         self.assertEquals(thing, self.last_cache.load())
135
136     def test_empty_cache(self):
137         with tempfile.NamedTemporaryFile() as cachefile:
138             cache = arv_put.ResumeCache(cachefile.name)
139         self.assertRaises(ValueError, cache.load)
140
141     def test_cache_persistent(self):
142         thing = ['test', 'list']
143         path = os.path.join(self.make_tmpdir(), 'cache')
144         cache = arv_put.ResumeCache(path)
145         cache.save(thing)
146         cache.close()
147         self.last_cache = arv_put.ResumeCache(path)
148         self.assertEquals(thing, self.last_cache.load())
149
150     def test_multiple_cache_writes(self):
151         thing = ['short', 'list']
152         with tempfile.NamedTemporaryFile() as cachefile:
153             self.last_cache = arv_put.ResumeCache(cachefile.name)
154         # Start writing an object longer than the one we test, to make
155         # sure the cache file gets truncated.
156         self.last_cache.save(['long', 'long', 'list'])
157         self.last_cache.save(thing)
158         self.assertEquals(thing, self.last_cache.load())
159
160     def test_cache_is_locked(self):
161         with tempfile.NamedTemporaryFile() as cachefile:
162             cache = arv_put.ResumeCache(cachefile.name)
163             self.assertRaises(arv_put.ResumeCacheConflict,
164                               arv_put.ResumeCache, cachefile.name)
165
166     def test_cache_stays_locked(self):
167         with tempfile.NamedTemporaryFile() as cachefile:
168             self.last_cache = arv_put.ResumeCache(cachefile.name)
169             path = cachefile.name
170         self.last_cache.save('test')
171         self.assertRaises(arv_put.ResumeCacheConflict,
172                           arv_put.ResumeCache, path)
173
174     def test_destroy_cache(self):
175         cachefile = tempfile.NamedTemporaryFile(delete=False)
176         try:
177             cache = arv_put.ResumeCache(cachefile.name)
178             cache.save('test')
179             cache.destroy()
180             try:
181                 arv_put.ResumeCache(cachefile.name)
182             except arv_put.ResumeCacheConflict:
183                 self.fail("could not load cache after destroying it")
184             self.assertRaises(ValueError, cache.load)
185         finally:
186             if os.path.exists(cachefile.name):
187                 os.unlink(cachefile.name)
188
189     def test_restart_cache(self):
190         path = os.path.join(self.make_tmpdir(), 'cache')
191         cache = arv_put.ResumeCache(path)
192         cache.save('test')
193         cache.restart()
194         self.assertRaises(ValueError, cache.load)
195         self.assertRaises(arv_put.ResumeCacheConflict,
196                           arv_put.ResumeCache, path)
197
198
199 class ArvadosPutCollectionWriterTest(run_test_server.TestCaseWithServers,
200                                      ArvadosBaseTestCase):
201     def setUp(self):
202         super(ArvadosPutCollectionWriterTest, self).setUp()
203         run_test_server.authorize_with('active')
204         with tempfile.NamedTemporaryFile(delete=False) as cachefile:
205             self.cache = arv_put.ResumeCache(cachefile.name)
206             self.cache_filename = cachefile.name
207
208     def tearDown(self):
209         super(ArvadosPutCollectionWriterTest, self).tearDown()
210         if os.path.exists(self.cache_filename):
211             self.cache.destroy()
212         self.cache.close()
213
214     def test_writer_caches(self):
215         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
216         cwriter.write_file('/dev/null')
217         cwriter.cache_state()
218         self.assertTrue(self.cache.load())
219         self.assertEquals(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
220
221     def test_writer_works_without_cache(self):
222         cwriter = arv_put.ArvPutCollectionWriter()
223         cwriter.write_file('/dev/null')
224         self.assertEquals(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
225
226     def test_writer_resumes_from_cache(self):
227         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
228         with self.make_test_file() as testfile:
229             cwriter.write_file(testfile.name, 'test')
230             cwriter.cache_state()
231             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
232                 self.cache)
233             self.assertEquals(
234                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:test\n",
235                 new_writer.manifest_text())
236
237     def test_new_writer_from_stale_cache(self):
238         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
239         with self.make_test_file() as testfile:
240             cwriter.write_file(testfile.name, 'test')
241         new_writer = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
242         new_writer.write_file('/dev/null')
243         self.assertEquals(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", new_writer.manifest_text())
244
245     def test_new_writer_from_empty_cache(self):
246         cwriter = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
247         cwriter.write_file('/dev/null')
248         self.assertEquals(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
249
250     def test_writer_resumable_after_arbitrary_bytes(self):
251         cwriter = arv_put.ArvPutCollectionWriter(self.cache)
252         # These bytes are intentionally not valid UTF-8.
253         with self.make_test_file('\x00\x07\xe2') as testfile:
254             cwriter.write_file(testfile.name, 'test')
255             cwriter.cache_state()
256             new_writer = arv_put.ArvPutCollectionWriter.from_cache(
257                 self.cache)
258         self.assertEquals(cwriter.manifest_text(), new_writer.manifest_text())
259
260     def make_progress_tester(self):
261         progression = []
262         def record_func(written, expected):
263             progression.append((written, expected))
264         return progression, record_func
265
266     def test_progress_reporting(self):
267         for expect_count in (None, 8):
268             progression, reporter = self.make_progress_tester()
269             cwriter = arv_put.ArvPutCollectionWriter(
270                 reporter=reporter, bytes_expected=expect_count)
271             with self.make_test_file() as testfile:
272                 cwriter.write_file(testfile.name, 'test')
273             cwriter.finish_current_stream()
274             self.assertIn((4, expect_count), progression)
275
276     def test_resume_progress(self):
277         cwriter = arv_put.ArvPutCollectionWriter(self.cache, bytes_expected=4)
278         with self.make_test_file() as testfile:
279             # Set up a writer with some flushed bytes.
280             cwriter.write_file(testfile.name, 'test')
281             cwriter.finish_current_stream()
282             cwriter.cache_state()
283             new_writer = arv_put.ArvPutCollectionWriter.from_cache(self.cache)
284             self.assertEqual(new_writer.bytes_written, 4)
285
286
287 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
288     TEST_SIZE = os.path.getsize(__file__)
289
290     def test_expected_bytes_for_file(self):
291         self.assertEquals(self.TEST_SIZE,
292                           arv_put.expected_bytes_for([__file__]))
293
294     def test_expected_bytes_for_tree(self):
295         tree = self.make_tmpdir()
296         shutil.copyfile(__file__, os.path.join(tree, 'one'))
297         shutil.copyfile(__file__, os.path.join(tree, 'two'))
298         self.assertEquals(self.TEST_SIZE * 2,
299                           arv_put.expected_bytes_for([tree]))
300         self.assertEquals(self.TEST_SIZE * 3,
301                           arv_put.expected_bytes_for([tree, __file__]))
302
303     def test_expected_bytes_for_device(self):
304         self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
305         self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
306
307
308 class ArvadosPutReportTest(ArvadosBaseTestCase):
309     def test_machine_progress(self):
310         for count, total in [(0, 1), (0, None), (1, None), (235, 9283)]:
311             expect = ": {} written {} total\n".format(
312                 count, -1 if (total is None) else total)
313             self.assertTrue(
314                 arv_put.machine_progress(count, total).endswith(expect))
315
316     def test_known_human_progress(self):
317         for count, total in [(0, 1), (2, 4), (45, 60)]:
318             expect = '{:.1%}'.format(float(count) / total)
319             actual = arv_put.human_progress(count, total)
320             self.assertTrue(actual.startswith('\r'))
321             self.assertIn(expect, actual)
322
323     def test_unknown_human_progress(self):
324         for count in [1, 20, 300, 4000, 50000]:
325             self.assertTrue(re.search(r'\b{}\b'.format(count),
326                                       arv_put.human_progress(count, None)))
327
328
329 class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
330     MAIN_SERVER = {}
331     Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
332
333     def call_main_with_args(self, args):
334         self.main_stdout = StringIO()
335         self.main_stderr = StringIO()
336         return arv_put.main(args, self.main_stdout, self.main_stderr)
337
338     def call_main_on_test_file(self):
339         with self.make_test_file() as testfile:
340             path = testfile.name
341             self.call_main_with_args(['--stream', '--no-progress', path])
342         self.assertTrue(
343             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
344                                         '098f6bcd4621d373cade4e832627b4f6')),
345             "did not find file stream in Keep store")
346
347     def setUp(self):
348         super(ArvadosPutTest, self).setUp()
349         run_test_server.authorize_with('active')
350         arv_put.api_client = None
351
352     def tearDown(self):
353         for outbuf in ['main_stdout', 'main_stderr']:
354             if hasattr(self, outbuf):
355                 getattr(self, outbuf).close()
356                 delattr(self, outbuf)
357         super(ArvadosPutTest, self).tearDown()
358
359     def test_simple_file_put(self):
360         self.call_main_on_test_file()
361
362     def test_put_with_unwriteable_cache_dir(self):
363         orig_cachedir = arv_put.ResumeCache.CACHE_DIR
364         cachedir = self.make_tmpdir()
365         os.chmod(cachedir, 0o0)
366         arv_put.ResumeCache.CACHE_DIR = cachedir
367         try:
368             self.call_main_on_test_file()
369         finally:
370             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
371             os.chmod(cachedir, 0o700)
372
373     def test_put_with_unwritable_cache_subdir(self):
374         orig_cachedir = arv_put.ResumeCache.CACHE_DIR
375         cachedir = self.make_tmpdir()
376         os.chmod(cachedir, 0o0)
377         arv_put.ResumeCache.CACHE_DIR = os.path.join(cachedir, 'cachedir')
378         try:
379             self.call_main_on_test_file()
380         finally:
381             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
382             os.chmod(cachedir, 0o700)
383
384     def test_error_name_without_collection(self):
385         self.assertRaises(SystemExit, self.call_main_with_args,
386                           ['--name', 'test without Collection',
387                            '--stream', '/dev/null'])
388
389     def test_error_when_project_not_found(self):
390         self.assertRaises(SystemExit,
391                           self.call_main_with_args,
392                           ['--project-uuid', self.Z_UUID])
393
394     def test_error_bad_project_uuid(self):
395         self.assertRaises(SystemExit,
396                           self.call_main_with_args,
397                           ['--project-uuid', self.Z_UUID, '--stream'])
398
399 class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
400                             ArvadosBaseTestCase):
401     def _getKeepServerConfig():
402         for config_file in ['application.yml', 'application.default.yml']:
403             with open(os.path.join(run_test_server.SERVICES_SRC_DIR,
404                                    "api", "config", config_file)) as f:
405                 rails_config = yaml.load(f.read())
406                 for config_section in ['test', 'common']:
407                     try:
408                         key = rails_config[config_section]["blob_signing_key"]
409                     except (KeyError, TypeError):
410                         pass
411                     else:
412                         return {'blob_signing_key': key,
413                                 'enforce_permissions': True}
414         return {'blog_signing_key': None, 'enforce_permissions': False}
415
416     MAIN_SERVER = {}
417     KEEP_SERVER = _getKeepServerConfig()
418     PROJECT_UUID = run_test_server.fixture('groups')['aproject']['uuid']
419
420     @classmethod
421     def setUpClass(cls):
422         super(ArvPutIntegrationTest, cls).setUpClass()
423         cls.ENVIRON = os.environ.copy()
424         cls.ENVIRON['PYTHONPATH'] = ':'.join(sys.path)
425
426     def setUp(self):
427         super(ArvPutIntegrationTest, self).setUp()
428         arv_put.api_client = None
429
430     def authorize_with(self, token_name):
431         run_test_server.authorize_with(token_name)
432         for v in ["ARVADOS_API_HOST",
433                   "ARVADOS_API_HOST_INSECURE",
434                   "ARVADOS_API_TOKEN"]:
435             self.ENVIRON[v] = arvados.config.settings()[v]
436         arv_put.api_client = arvados.api('v1')
437
438     def current_user(self):
439         return arv_put.api_client.users().current().execute()
440
441     def test_check_real_project_found(self):
442         self.authorize_with('active')
443         self.assertTrue(arv_put.desired_project_uuid(arv_put.api_client, self.PROJECT_UUID, 0),
444                         "did not correctly find test fixture project")
445
446     def test_check_error_finding_nonexistent_uuid(self):
447         BAD_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
448         self.authorize_with('active')
449         try:
450             result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID,
451                                                   0)
452         except ValueError as error:
453             self.assertIn(BAD_UUID, error.message)
454         else:
455             self.assertFalse(result, "incorrectly found nonexistent project")
456
457     def test_check_error_finding_nonexistent_project(self):
458         BAD_UUID = 'zzzzz-tpzed-zzzzzzzzzzzzzzz'
459         self.authorize_with('active')
460         with self.assertRaises(apiclient.errors.HttpError):
461             result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID,
462                                                   0)
463
464     def test_short_put_from_stdin(self):
465         # Have to run this as an integration test since arv-put can't
466         # read from the tests' stdin.
467         # arv-put usually can't stat(os.path.realpath('/dev/stdin')) in this
468         # case, because the /proc entry is already gone by the time it tries.
469         pipe = subprocess.Popen(
470             [sys.executable, arv_put.__file__, '--stream'],
471             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
472             stderr=subprocess.STDOUT, env=self.ENVIRON)
473         pipe.stdin.write('stdin test\n')
474         pipe.stdin.close()
475         deadline = time.time() + 5
476         while (pipe.poll() is None) and (time.time() < deadline):
477             time.sleep(.1)
478         returncode = pipe.poll()
479         if returncode is None:
480             pipe.terminate()
481             self.fail("arv-put did not PUT from stdin within 5 seconds")
482         elif returncode != 0:
483             sys.stdout.write(pipe.stdout.read())
484             self.fail("arv-put returned exit code {}".format(returncode))
485         self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read())
486
487     def test_ArvPutSignedManifest(self):
488         # ArvPutSignedManifest runs "arv-put foo" and then attempts to get
489         # the newly created manifest from the API server, testing to confirm
490         # that the block locators in the returned manifest are signed.
491         self.authorize_with('active')
492
493         # Before doing anything, demonstrate that the collection
494         # we're about to create is not present in our test fixture.
495         manifest_uuid = "00b4e9f40ac4dd432ef89749f1c01e74+47"
496         with self.assertRaises(apiclient.errors.HttpError):
497             notfound = arv_put.api_client.collections().get(
498                 uuid=manifest_uuid).execute()
499
500         datadir = self.make_tmpdir()
501         with open(os.path.join(datadir, "foo"), "w") as f:
502             f.write("The quick brown fox jumped over the lazy dog")
503         p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
504                              stdout=subprocess.PIPE, env=self.ENVIRON)
505         (arvout, arverr) = p.communicate()
506         self.assertEqual(arverr, None)
507         self.assertEqual(p.returncode, 0)
508
509         # The manifest text stored in the API server under the same
510         # manifest UUID must use signed locators.
511         c = arv_put.api_client.collections().get(uuid=manifest_uuid).execute()
512         self.assertRegexpMatches(
513             c['manifest_text'],
514             r'^\. 08a008a01d498c404b0c30852b39d3b8\+44\+A[0-9a-f]+@[0-9a-f]+ 0:44:foo\n')
515
516         os.remove(os.path.join(datadir, "foo"))
517         os.rmdir(datadir)
518
519     def run_and_find_collection(self, text, extra_args=[]):
520         self.authorize_with('active')
521         pipe = subprocess.Popen(
522             [sys.executable, arv_put.__file__] + extra_args,
523             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
524             stderr=subprocess.PIPE, env=self.ENVIRON)
525         stdout, stderr = pipe.communicate(text)
526         collection_list = arvados.api('v1').collections().list(
527             filters=[['portable_data_hash', '=', stdout.strip()]]).execute().get('items', [])
528         self.assertEqual(1, len(collection_list))
529         return collection_list[0]
530
531     def test_put_collection_with_unnamed_project_link(self):
532         link = self.run_and_find_collection("Test unnamed collection",
533                                       ['--portable-data-hash', '--project-uuid', self.PROJECT_UUID])
534         username = pwd.getpwuid(os.getuid()).pw_name
535         self.assertRegexpMatches(
536             link['name'],
537             r'^Saved at .* by {}@'.format(re.escape(username)))
538
539     def test_put_collection_with_name_and_no_project(self):
540         link_name = 'Test Collection Link in home project'
541         collection = self.run_and_find_collection("Test named collection in home project",
542                                       ['--portable-data-hash', '--name', link_name])
543         self.assertEqual(link_name, collection['name'])
544         my_user_uuid = self.current_user()['uuid']
545         self.assertEqual(my_user_uuid, collection['owner_uuid'])
546
547     def test_put_collection_with_named_project_link(self):
548         link_name = 'Test auto Collection Link'
549         collection = self.run_and_find_collection("Test named collection",
550                                       ['--portable-data-hash',
551                                        '--name', link_name,
552                                        '--project-uuid', self.PROJECT_UUID])
553         self.assertEqual(link_name, collection['name'])
554
555
556 if __name__ == '__main__':
557     unittest.main()