Merge branch '13330-intermediates-test' of git.curoverse.com:arvados into 13330-cwl...
[arvados.git] / sdk / python / tests / test_arv_put.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from __future__ import division
7 from future import standard_library
8 standard_library.install_aliases()
9 from builtins import str
10 from builtins import range
11 import apiclient
12 import datetime
13 import hashlib
14 import json
15 import logging
16 import mock
17 import os
18 import pwd
19 import random
20 import re
21 import select
22 import shutil
23 import signal
24 import subprocess
25 import sys
26 import tempfile
27 import time
28 import unittest
29 import uuid
30 import yaml
31
32 import arvados
33 import arvados.commands.put as arv_put
34 from . import arvados_testutil as tutil
35
36 from .arvados_testutil import ArvadosBaseTestCase, fake_httplib2_response
37 from . import run_test_server
38
39 class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
40     CACHE_ARGSET = [
41         [],
42         ['/dev/null'],
43         ['/dev/null', '--filename', 'empty'],
44         ['/tmp']
45         ]
46
47     def tearDown(self):
48         super(ArvadosPutResumeCacheTest, self).tearDown()
49         try:
50             self.last_cache.destroy()
51         except AttributeError:
52             pass
53
54     def cache_path_from_arglist(self, arglist):
55         return arv_put.ResumeCache.make_path(arv_put.parse_arguments(arglist))
56
57     def test_cache_names_stable(self):
58         for argset in self.CACHE_ARGSET:
59             self.assertEqual(self.cache_path_from_arglist(argset),
60                               self.cache_path_from_arglist(argset),
61                               "cache name changed for {}".format(argset))
62
63     def test_cache_names_unique(self):
64         results = []
65         for argset in self.CACHE_ARGSET:
66             path = self.cache_path_from_arglist(argset)
67             self.assertNotIn(path, results)
68             results.append(path)
69
70     def test_cache_names_simple(self):
71         # The goal here is to make sure the filename doesn't use characters
72         # reserved by the filesystem.  Feel free to adjust this regexp as
73         # long as it still does that.
74         bad_chars = re.compile(r'[^-\.\w]')
75         for argset in self.CACHE_ARGSET:
76             path = self.cache_path_from_arglist(argset)
77             self.assertFalse(bad_chars.search(os.path.basename(path)),
78                              "path too exotic: {}".format(path))
79
80     def test_cache_names_ignore_argument_order(self):
81         self.assertEqual(
82             self.cache_path_from_arglist(['a', 'b', 'c']),
83             self.cache_path_from_arglist(['c', 'a', 'b']))
84         self.assertEqual(
85             self.cache_path_from_arglist(['-', '--filename', 'stdin']),
86             self.cache_path_from_arglist(['--filename', 'stdin', '-']))
87
88     def test_cache_names_differ_for_similar_paths(self):
89         # This test needs names at / that don't exist on the real filesystem.
90         self.assertNotEqual(
91             self.cache_path_from_arglist(['/_arvputtest1', '/_arvputtest2']),
92             self.cache_path_from_arglist(['/_arvputtest1/_arvputtest2']))
93
94     def test_cache_names_ignore_irrelevant_arguments(self):
95         # Workaround: parse_arguments bails on --filename with a directory.
96         path1 = self.cache_path_from_arglist(['/tmp'])
97         args = arv_put.parse_arguments(['/tmp'])
98         args.filename = 'tmp'
99         path2 = arv_put.ResumeCache.make_path(args)
100         self.assertEqual(path1, path2,
101                          "cache path considered --filename for directory")
102         self.assertEqual(
103             self.cache_path_from_arglist(['-']),
104             self.cache_path_from_arglist(['-', '--max-manifest-depth', '1']),
105             "cache path considered --max-manifest-depth for file")
106
107     def test_cache_names_treat_negative_manifest_depths_identically(self):
108         base_args = ['/tmp', '--max-manifest-depth']
109         self.assertEqual(
110             self.cache_path_from_arglist(base_args + ['-1']),
111             self.cache_path_from_arglist(base_args + ['-2']))
112
113     def test_cache_names_treat_stdin_consistently(self):
114         self.assertEqual(
115             self.cache_path_from_arglist(['-', '--filename', 'test']),
116             self.cache_path_from_arglist(['/dev/stdin', '--filename', 'test']))
117
118     def test_cache_names_identical_for_synonymous_names(self):
119         self.assertEqual(
120             self.cache_path_from_arglist(['.']),
121             self.cache_path_from_arglist([os.path.realpath('.')]))
122         testdir = self.make_tmpdir()
123         looplink = os.path.join(testdir, 'loop')
124         os.symlink(testdir, looplink)
125         self.assertEqual(
126             self.cache_path_from_arglist([testdir]),
127             self.cache_path_from_arglist([looplink]))
128
129     def test_cache_names_different_by_api_host(self):
130         config = arvados.config.settings()
131         orig_host = config.get('ARVADOS_API_HOST')
132         try:
133             name1 = self.cache_path_from_arglist(['.'])
134             config['ARVADOS_API_HOST'] = 'x' + (orig_host or 'localhost')
135             self.assertNotEqual(name1, self.cache_path_from_arglist(['.']))
136         finally:
137             if orig_host is None:
138                 del config['ARVADOS_API_HOST']
139             else:
140                 config['ARVADOS_API_HOST'] = orig_host
141
142     @mock.patch('arvados.keep.KeepClient.head')
143     def test_resume_cache_with_current_stream_locators(self, keep_client_head):
144         keep_client_head.side_effect = [True]
145         thing = {}
146         thing['_current_stream_locators'] = ['098f6bcd4621d373cade4e832627b4f6+4', '1f253c60a2306e0ee12fb6ce0c587904+6']
147         with tempfile.NamedTemporaryFile() as cachefile:
148             self.last_cache = arv_put.ResumeCache(cachefile.name)
149         self.last_cache.save(thing)
150         self.last_cache.close()
151         resume_cache = arv_put.ResumeCache(self.last_cache.filename)
152         self.assertNotEqual(None, resume_cache)
153
154     @mock.patch('arvados.keep.KeepClient.head')
155     def test_resume_cache_with_finished_streams(self, keep_client_head):
156         keep_client_head.side_effect = [True]
157         thing = {}
158         thing['_finished_streams'] = [['.', ['098f6bcd4621d373cade4e832627b4f6+4', '1f253c60a2306e0ee12fb6ce0c587904+6']]]
159         with tempfile.NamedTemporaryFile() as cachefile:
160             self.last_cache = arv_put.ResumeCache(cachefile.name)
161         self.last_cache.save(thing)
162         self.last_cache.close()
163         resume_cache = arv_put.ResumeCache(self.last_cache.filename)
164         self.assertNotEqual(None, resume_cache)
165
166     @mock.patch('arvados.keep.KeepClient.head')
167     def test_resume_cache_with_finished_streams_error_on_head(self, keep_client_head):
168         keep_client_head.side_effect = Exception('Locator not found')
169         thing = {}
170         thing['_finished_streams'] = [['.', ['098f6bcd4621d373cade4e832627b4f6+4', '1f253c60a2306e0ee12fb6ce0c587904+6']]]
171         with tempfile.NamedTemporaryFile() as cachefile:
172             self.last_cache = arv_put.ResumeCache(cachefile.name)
173         self.last_cache.save(thing)
174         self.last_cache.close()
175         resume_cache = arv_put.ResumeCache(self.last_cache.filename)
176         self.assertNotEqual(None, resume_cache)
177         resume_cache.check_cache()
178
179     def test_basic_cache_storage(self):
180         thing = ['test', 'list']
181         with tempfile.NamedTemporaryFile() as cachefile:
182             self.last_cache = arv_put.ResumeCache(cachefile.name)
183         self.last_cache.save(thing)
184         self.assertEqual(thing, self.last_cache.load())
185
186     def test_empty_cache(self):
187         with tempfile.NamedTemporaryFile() as cachefile:
188             cache = arv_put.ResumeCache(cachefile.name)
189         self.assertRaises(ValueError, cache.load)
190
191     def test_cache_persistent(self):
192         thing = ['test', 'list']
193         path = os.path.join(self.make_tmpdir(), 'cache')
194         cache = arv_put.ResumeCache(path)
195         cache.save(thing)
196         cache.close()
197         self.last_cache = arv_put.ResumeCache(path)
198         self.assertEqual(thing, self.last_cache.load())
199
200     def test_multiple_cache_writes(self):
201         thing = ['short', 'list']
202         with tempfile.NamedTemporaryFile() as cachefile:
203             self.last_cache = arv_put.ResumeCache(cachefile.name)
204         # Start writing an object longer than the one we test, to make
205         # sure the cache file gets truncated.
206         self.last_cache.save(['long', 'long', 'list'])
207         self.last_cache.save(thing)
208         self.assertEqual(thing, self.last_cache.load())
209
210     def test_cache_is_locked(self):
211         with tempfile.NamedTemporaryFile() as cachefile:
212             cache = arv_put.ResumeCache(cachefile.name)
213             self.assertRaises(arv_put.ResumeCacheConflict,
214                               arv_put.ResumeCache, cachefile.name)
215
216     def test_cache_stays_locked(self):
217         with tempfile.NamedTemporaryFile() as cachefile:
218             self.last_cache = arv_put.ResumeCache(cachefile.name)
219             path = cachefile.name
220         self.last_cache.save('test')
221         self.assertRaises(arv_put.ResumeCacheConflict,
222                           arv_put.ResumeCache, path)
223
224     def test_destroy_cache(self):
225         cachefile = tempfile.NamedTemporaryFile(delete=False)
226         try:
227             cache = arv_put.ResumeCache(cachefile.name)
228             cache.save('test')
229             cache.destroy()
230             try:
231                 arv_put.ResumeCache(cachefile.name)
232             except arv_put.ResumeCacheConflict:
233                 self.fail("could not load cache after destroying it")
234             self.assertRaises(ValueError, cache.load)
235         finally:
236             if os.path.exists(cachefile.name):
237                 os.unlink(cachefile.name)
238
239     def test_restart_cache(self):
240         path = os.path.join(self.make_tmpdir(), 'cache')
241         cache = arv_put.ResumeCache(path)
242         cache.save('test')
243         cache.restart()
244         self.assertRaises(ValueError, cache.load)
245         self.assertRaises(arv_put.ResumeCacheConflict,
246                           arv_put.ResumeCache, path)
247
248
249 class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
250                           ArvadosBaseTestCase):
251
252     def setUp(self):
253         super(ArvPutUploadJobTest, self).setUp()
254         run_test_server.authorize_with('active')
255         # Temp files creation
256         self.tempdir = tempfile.mkdtemp()
257         subdir = os.path.join(self.tempdir, 'subdir')
258         os.mkdir(subdir)
259         data = "x" * 1024 # 1 KB
260         for i in range(1, 5):
261             with open(os.path.join(self.tempdir, str(i)), 'w') as f:
262                 f.write(data * i)
263         with open(os.path.join(subdir, 'otherfile'), 'w') as f:
264             f.write(data * 5)
265         # Large temp file for resume test
266         _, self.large_file_name = tempfile.mkstemp()
267         fileobj = open(self.large_file_name, 'w')
268         # Make sure to write just a little more than one block
269         for _ in range((arvados.config.KEEP_BLOCK_SIZE>>20)+1):
270             data = random.choice(['x', 'y', 'z']) * 1024 * 1024 # 1 MiB
271             fileobj.write(data)
272         fileobj.close()
273         # Temp dir containing small files to be repacked
274         self.small_files_dir = tempfile.mkdtemp()
275         data = 'y' * 1024 * 1024 # 1 MB
276         for i in range(1, 70):
277             with open(os.path.join(self.small_files_dir, str(i)), 'w') as f:
278                 f.write(data + str(i))
279         self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
280         # Temp dir to hold a symlink to other temp dir
281         self.tempdir_with_symlink = tempfile.mkdtemp()
282         os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
283         os.symlink(os.path.join(self.tempdir, '1'),
284                    os.path.join(self.tempdir_with_symlink, 'linkedfile'))
285
286     def tearDown(self):
287         super(ArvPutUploadJobTest, self).tearDown()
288         shutil.rmtree(self.tempdir)
289         os.unlink(self.large_file_name)
290         shutil.rmtree(self.small_files_dir)
291         shutil.rmtree(self.tempdir_with_symlink)
292
293     def test_symlinks_are_followed_by_default(self):
294         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
295         cwriter.start(save_collection=False)
296         self.assertIn('linkeddir', cwriter.manifest_text())
297         self.assertIn('linkedfile', cwriter.manifest_text())
298         cwriter.destroy_cache()
299
300     def test_symlinks_are_not_followed_when_requested(self):
301         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
302                                           follow_links=False)
303         cwriter.start(save_collection=False)
304         self.assertNotIn('linkeddir', cwriter.manifest_text())
305         self.assertNotIn('linkedfile', cwriter.manifest_text())
306         cwriter.destroy_cache()
307
308     def test_passing_nonexistant_path_raise_exception(self):
309         uuid_str = str(uuid.uuid4())
310         with self.assertRaises(arv_put.PathDoesNotExistError):
311             cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
312
313     def test_writer_works_without_cache(self):
314         cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
315         cwriter.start(save_collection=False)
316         self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
317
318     def test_writer_works_with_cache(self):
319         with tempfile.NamedTemporaryFile() as f:
320             f.write(b'foo')
321             f.flush()
322             cwriter = arv_put.ArvPutUploadJob([f.name])
323             cwriter.start(save_collection=False)
324             self.assertEqual(0, cwriter.bytes_skipped)
325             self.assertEqual(3, cwriter.bytes_written)
326             # Don't destroy the cache, and start another upload
327             cwriter_new = arv_put.ArvPutUploadJob([f.name])
328             cwriter_new.start(save_collection=False)
329             cwriter_new.destroy_cache()
330             self.assertEqual(3, cwriter_new.bytes_skipped)
331             self.assertEqual(3, cwriter_new.bytes_written)
332
333     def make_progress_tester(self):
334         progression = []
335         def record_func(written, expected):
336             progression.append((written, expected))
337         return progression, record_func
338
339     def test_progress_reporting(self):
340         with tempfile.NamedTemporaryFile() as f:
341             f.write(b'foo')
342             f.flush()
343             for expect_count in (None, 8):
344                 progression, reporter = self.make_progress_tester()
345                 cwriter = arv_put.ArvPutUploadJob([f.name],
346                                                   reporter=reporter)
347                 cwriter.bytes_expected = expect_count
348                 cwriter.start(save_collection=False)
349                 cwriter.destroy_cache()
350                 self.assertIn((3, expect_count), progression)
351
352     def test_writer_upload_directory(self):
353         cwriter = arv_put.ArvPutUploadJob([self.tempdir])
354         cwriter.start(save_collection=False)
355         cwriter.destroy_cache()
356         self.assertEqual(1024*(1+2+3+4+5), cwriter.bytes_written)
357
358     def test_resume_large_file_upload(self):
359         def wrapped_write(*args, **kwargs):
360             data = args[1]
361             # Exit only on last block
362             if len(data) < arvados.config.KEEP_BLOCK_SIZE:
363                 # Simulate a checkpoint before quitting. Ensure block commit.
364                 self.writer._update(final=True)
365                 raise SystemExit("Simulated error")
366             return self.arvfile_write(*args, **kwargs)
367
368         with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
369                         autospec=True) as mocked_write:
370             mocked_write.side_effect = wrapped_write
371             writer = arv_put.ArvPutUploadJob([self.large_file_name],
372                                              replication_desired=1)
373             # We'll be accessing from inside the wrapper
374             self.writer = writer
375             with self.assertRaises(SystemExit):
376                 writer.start(save_collection=False)
377             # Confirm that the file was partially uploaded
378             self.assertGreater(writer.bytes_written, 0)
379             self.assertLess(writer.bytes_written,
380                             os.path.getsize(self.large_file_name))
381         # Retry the upload
382         writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
383                                           replication_desired=1)
384         writer2.start(save_collection=False)
385         self.assertEqual(writer.bytes_written + writer2.bytes_written - writer2.bytes_skipped,
386                          os.path.getsize(self.large_file_name))
387         writer2.destroy_cache()
388         del(self.writer)
389
390     # Test for bug #11002
391     def test_graceful_exit_while_repacking_small_blocks(self):
392         def wrapped_commit(*args, **kwargs):
393             raise SystemExit("Simulated error")
394
395         with mock.patch('arvados.arvfile._BlockManager.commit_bufferblock',
396                         autospec=True) as mocked_commit:
397             mocked_commit.side_effect = wrapped_commit
398             # Upload a little more than 1 block, wrapped_commit will make the first block
399             # commit to fail.
400             # arv-put should not exit with an exception by trying to commit the collection
401             # as it's in an inconsistent state.
402             writer = arv_put.ArvPutUploadJob([self.small_files_dir],
403                                              replication_desired=1)
404             try:
405                 with self.assertRaises(SystemExit):
406                     writer.start(save_collection=False)
407             except arvados.arvfile.UnownedBlockError:
408                 self.fail("arv-put command is trying to use a corrupted BlockManager. See https://dev.arvados.org/issues/11002")
409         writer.destroy_cache()
410
411     def test_no_resume_when_asked(self):
412         def wrapped_write(*args, **kwargs):
413             data = args[1]
414             # Exit only on last block
415             if len(data) < arvados.config.KEEP_BLOCK_SIZE:
416                 # Simulate a checkpoint before quitting.
417                 self.writer._update()
418                 raise SystemExit("Simulated error")
419             return self.arvfile_write(*args, **kwargs)
420
421         with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
422                         autospec=True) as mocked_write:
423             mocked_write.side_effect = wrapped_write
424             writer = arv_put.ArvPutUploadJob([self.large_file_name],
425                                              replication_desired=1)
426             # We'll be accessing from inside the wrapper
427             self.writer = writer
428             with self.assertRaises(SystemExit):
429                 writer.start(save_collection=False)
430             # Confirm that the file was partially uploaded
431             self.assertGreater(writer.bytes_written, 0)
432             self.assertLess(writer.bytes_written,
433                             os.path.getsize(self.large_file_name))
434         # Retry the upload, this time without resume
435         writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
436                                           replication_desired=1,
437                                           resume=False)
438         writer2.start(save_collection=False)
439         self.assertEqual(writer2.bytes_skipped, 0)
440         self.assertEqual(writer2.bytes_written,
441                          os.path.getsize(self.large_file_name))
442         writer2.destroy_cache()
443         del(self.writer)
444
445     def test_no_resume_when_no_cache(self):
446         def wrapped_write(*args, **kwargs):
447             data = args[1]
448             # Exit only on last block
449             if len(data) < arvados.config.KEEP_BLOCK_SIZE:
450                 # Simulate a checkpoint before quitting.
451                 self.writer._update()
452                 raise SystemExit("Simulated error")
453             return self.arvfile_write(*args, **kwargs)
454
455         with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
456                         autospec=True) as mocked_write:
457             mocked_write.side_effect = wrapped_write
458             writer = arv_put.ArvPutUploadJob([self.large_file_name],
459                                              replication_desired=1)
460             # We'll be accessing from inside the wrapper
461             self.writer = writer
462             with self.assertRaises(SystemExit):
463                 writer.start(save_collection=False)
464             # Confirm that the file was partially uploaded
465             self.assertGreater(writer.bytes_written, 0)
466             self.assertLess(writer.bytes_written,
467                             os.path.getsize(self.large_file_name))
468         # Retry the upload, this time without cache usage
469         writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
470                                           replication_desired=1,
471                                           resume=False,
472                                           use_cache=False)
473         writer2.start(save_collection=False)
474         self.assertEqual(writer2.bytes_skipped, 0)
475         self.assertEqual(writer2.bytes_written,
476                          os.path.getsize(self.large_file_name))
477         writer2.destroy_cache()
478         del(self.writer)
479
480     def test_dry_run_feature(self):
481         def wrapped_write(*args, **kwargs):
482             data = args[1]
483             # Exit only on last block
484             if len(data) < arvados.config.KEEP_BLOCK_SIZE:
485                 # Simulate a checkpoint before quitting.
486                 self.writer._update()
487                 raise SystemExit("Simulated error")
488             return self.arvfile_write(*args, **kwargs)
489
490         with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
491                         autospec=True) as mocked_write:
492             mocked_write.side_effect = wrapped_write
493             writer = arv_put.ArvPutUploadJob([self.large_file_name],
494                                              replication_desired=1)
495             # We'll be accessing from inside the wrapper
496             self.writer = writer
497             with self.assertRaises(SystemExit):
498                 writer.start(save_collection=False)
499             # Confirm that the file was partially uploaded
500             self.assertGreater(writer.bytes_written, 0)
501             self.assertLess(writer.bytes_written,
502                             os.path.getsize(self.large_file_name))
503         with self.assertRaises(arv_put.ArvPutUploadIsPending):
504             # Retry the upload using dry_run to check if there is a pending upload
505             writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
506                                               replication_desired=1,
507                                               dry_run=True)
508         # Complete the pending upload
509         writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
510                                           replication_desired=1)
511         writer3.start(save_collection=False)
512         with self.assertRaises(arv_put.ArvPutUploadNotPending):
513             # Confirm there's no pending upload with dry_run=True
514             writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
515                                               replication_desired=1,
516                                               dry_run=True)
517         # Test obvious cases
518         with self.assertRaises(arv_put.ArvPutUploadIsPending):
519             arv_put.ArvPutUploadJob([self.large_file_name],
520                                     replication_desired=1,
521                                     dry_run=True,
522                                     resume=False,
523                                     use_cache=False)
524         with self.assertRaises(arv_put.ArvPutUploadIsPending):
525             arv_put.ArvPutUploadJob([self.large_file_name],
526                                     replication_desired=1,
527                                     dry_run=True,
528                                     resume=False)
529         del(self.writer)
530
531 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
532     TEST_SIZE = os.path.getsize(__file__)
533
534     def test_expected_bytes_for_file(self):
535         writer = arv_put.ArvPutUploadJob([__file__])
536         self.assertEqual(self.TEST_SIZE,
537                          writer.bytes_expected)
538
539     def test_expected_bytes_for_tree(self):
540         tree = self.make_tmpdir()
541         shutil.copyfile(__file__, os.path.join(tree, 'one'))
542         shutil.copyfile(__file__, os.path.join(tree, 'two'))
543
544         writer = arv_put.ArvPutUploadJob([tree])
545         self.assertEqual(self.TEST_SIZE * 2,
546                          writer.bytes_expected)
547         writer = arv_put.ArvPutUploadJob([tree, __file__])
548         self.assertEqual(self.TEST_SIZE * 3,
549                          writer.bytes_expected)
550
551     def test_expected_bytes_for_device(self):
552         writer = arv_put.ArvPutUploadJob(['/dev/null'])
553         self.assertIsNone(writer.bytes_expected)
554         writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
555         self.assertIsNone(writer.bytes_expected)
556
557
558 class ArvadosPutReportTest(ArvadosBaseTestCase):
559     def test_machine_progress(self):
560         for count, total in [(0, 1), (0, None), (1, None), (235, 9283)]:
561             expect = ": {} written {} total\n".format(
562                 count, -1 if (total is None) else total)
563             self.assertTrue(
564                 arv_put.machine_progress(count, total).endswith(expect))
565
566     def test_known_human_progress(self):
567         for count, total in [(0, 1), (2, 4), (45, 60)]:
568             expect = '{:.1%}'.format(1.0*count/total)
569             actual = arv_put.human_progress(count, total)
570             self.assertTrue(actual.startswith('\r'))
571             self.assertIn(expect, actual)
572
573     def test_unknown_human_progress(self):
574         for count in [1, 20, 300, 4000, 50000]:
575             self.assertTrue(re.search(r'\b{}\b'.format(count),
576                                       arv_put.human_progress(count, None)))
577
578
579 class ArvPutLogFormatterTest(ArvadosBaseTestCase):
580     matcher = r'\(X-Request-Id: req-[a-z0-9]{20}\)'
581
582     def setUp(self):
583         super(ArvPutLogFormatterTest, self).setUp()
584         self.stderr = tutil.StringIO()
585         self.loggingHandler = logging.StreamHandler(self.stderr)
586         self.loggingHandler.setFormatter(
587             arv_put.ArvPutLogFormatter(arvados.util.new_request_id()))
588         self.logger = logging.getLogger()
589         self.logger.addHandler(self.loggingHandler)
590         self.logger.setLevel(logging.DEBUG)
591
592     def tearDown(self):
593         self.logger.removeHandler(self.loggingHandler)
594         self.stderr.close()
595         self.stderr = None
596         super(ArvPutLogFormatterTest, self).tearDown()
597
598     def test_request_id_logged_only_once_on_error(self):
599         self.logger.error('Ooops, something bad happened.')
600         self.logger.error('Another bad thing just happened.')
601         log_lines = self.stderr.getvalue().split('\n')[:-1]
602         self.assertEqual(2, len(log_lines))
603         self.assertRegex(log_lines[0], self.matcher)
604         self.assertNotRegex(log_lines[1], self.matcher)
605
606     def test_request_id_logged_only_once_on_debug(self):
607         self.logger.debug('This is just a debug message.')
608         self.logger.debug('Another message, move along.')
609         log_lines = self.stderr.getvalue().split('\n')[:-1]
610         self.assertEqual(2, len(log_lines))
611         self.assertRegex(log_lines[0], self.matcher)
612         self.assertNotRegex(log_lines[1], self.matcher)
613
614     def test_request_id_not_logged_on_info(self):
615         self.logger.info('This should be a useful message')
616         log_lines = self.stderr.getvalue().split('\n')[:-1]
617         self.assertEqual(1, len(log_lines))
618         self.assertNotRegex(log_lines[0], self.matcher)
619
620 class ArvadosPutTest(run_test_server.TestCaseWithServers,
621                      ArvadosBaseTestCase,
622                      tutil.VersionChecker):
623     MAIN_SERVER = {}
624     Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
625
626     def call_main_with_args(self, args):
627         self.main_stdout.seek(0, 0)
628         self.main_stdout.truncate(0)
629         self.main_stderr.seek(0, 0)
630         self.main_stderr.truncate(0)
631         return arv_put.main(args, self.main_stdout, self.main_stderr)
632
633     def call_main_on_test_file(self, args=[]):
634         with self.make_test_file() as testfile:
635             path = testfile.name
636             self.call_main_with_args(['--stream', '--no-progress'] + args + [path])
637         self.assertTrue(
638             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
639                                         '098f6bcd4621d373cade4e832627b4f6')),
640             "did not find file stream in Keep store")
641
642     def setUp(self):
643         super(ArvadosPutTest, self).setUp()
644         run_test_server.authorize_with('active')
645         arv_put.api_client = None
646         self.main_stdout = tutil.StringIO()
647         self.main_stderr = tutil.StringIO()
648         self.loggingHandler = logging.StreamHandler(self.main_stderr)
649         self.loggingHandler.setFormatter(
650             arv_put.ArvPutLogFormatter(arvados.util.new_request_id()))
651         logging.getLogger().addHandler(self.loggingHandler)
652
653     def tearDown(self):
654         logging.getLogger().removeHandler(self.loggingHandler)
655         for outbuf in ['main_stdout', 'main_stderr']:
656             if hasattr(self, outbuf):
657                 getattr(self, outbuf).close()
658                 delattr(self, outbuf)
659         super(ArvadosPutTest, self).tearDown()
660
661     def test_version_argument(self):
662         with tutil.redirected_streams(
663                 stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
664             with self.assertRaises(SystemExit):
665                 self.call_main_with_args(['--version'])
666         self.assertVersionOutput(out, err)
667
668     def test_simple_file_put(self):
669         self.call_main_on_test_file()
670
671     def test_put_with_unwriteable_cache_dir(self):
672         orig_cachedir = arv_put.ResumeCache.CACHE_DIR
673         cachedir = self.make_tmpdir()
674         os.chmod(cachedir, 0o0)
675         arv_put.ResumeCache.CACHE_DIR = cachedir
676         try:
677             self.call_main_on_test_file()
678         finally:
679             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
680             os.chmod(cachedir, 0o700)
681
682     def test_put_with_unwritable_cache_subdir(self):
683         orig_cachedir = arv_put.ResumeCache.CACHE_DIR
684         cachedir = self.make_tmpdir()
685         os.chmod(cachedir, 0o0)
686         arv_put.ResumeCache.CACHE_DIR = os.path.join(cachedir, 'cachedir')
687         try:
688             self.call_main_on_test_file()
689         finally:
690             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
691             os.chmod(cachedir, 0o700)
692
693     def test_put_block_replication(self):
694         self.call_main_on_test_file()
695         with mock.patch('arvados.collection.KeepClient.local_store_put') as put_mock:
696             put_mock.return_value = 'acbd18db4cc2f85cedef654fccc4a4d8+3'
697             self.call_main_on_test_file(['--replication', '1'])
698             self.call_main_on_test_file(['--replication', '4'])
699             self.call_main_on_test_file(['--replication', '5'])
700             self.assertEqual(
701                 [x[-1].get('copies') for x in put_mock.call_args_list],
702                 [1, 4, 5])
703
704     def test_normalize(self):
705         testfile1 = self.make_test_file()
706         testfile2 = self.make_test_file()
707         test_paths = [testfile1.name, testfile2.name]
708         # Reverse-sort the paths, so normalization must change their order.
709         test_paths.sort(reverse=True)
710         self.call_main_with_args(['--stream', '--no-progress', '--normalize'] +
711                                  test_paths)
712         manifest = self.main_stdout.getvalue()
713         # Assert the second file we specified appears first in the manifest.
714         file_indices = [manifest.find(':' + os.path.basename(path))
715                         for path in test_paths]
716         self.assertGreater(*file_indices)
717
718     def test_error_name_without_collection(self):
719         self.assertRaises(SystemExit, self.call_main_with_args,
720                           ['--name', 'test without Collection',
721                            '--stream', '/dev/null'])
722
723     def test_error_when_project_not_found(self):
724         self.assertRaises(SystemExit,
725                           self.call_main_with_args,
726                           ['--project-uuid', self.Z_UUID])
727
728     def test_error_bad_project_uuid(self):
729         self.assertRaises(SystemExit,
730                           self.call_main_with_args,
731                           ['--project-uuid', self.Z_UUID, '--stream'])
732
733     def test_error_when_multiple_storage_classes_specified(self):
734         self.assertRaises(SystemExit,
735                           self.call_main_with_args,
736                           ['--storage-classes', 'hot,cold'])
737
738     def test_error_when_excluding_absolute_path(self):
739         tmpdir = self.make_tmpdir()
740         self.assertRaises(SystemExit,
741                           self.call_main_with_args,
742                           ['--exclude', '/some/absolute/path/*',
743                            tmpdir])
744
745     def test_api_error_handling(self):
746         coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
747         coll_save_mock.side_effect = arvados.errors.ApiError(
748             fake_httplib2_response(403), b'{}')
749         with mock.patch('arvados.collection.Collection.save_new',
750                         new=coll_save_mock):
751             with self.assertRaises(SystemExit) as exc_test:
752                 self.call_main_with_args(['/dev/null'])
753             self.assertLess(0, exc_test.exception.args[0])
754             self.assertLess(0, coll_save_mock.call_count)
755             self.assertEqual("", self.main_stdout.getvalue())
756
757     def test_request_id_logging_on_error(self):
758         matcher = r'\(X-Request-Id: req-[a-z0-9]{20}\)\n'
759         coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
760         coll_save_mock.side_effect = arvados.errors.ApiError(
761             fake_httplib2_response(403), b'{}')
762         with mock.patch('arvados.collection.Collection.save_new',
763                         new=coll_save_mock):
764             with self.assertRaises(SystemExit) as exc_test:
765                 self.call_main_with_args(['/dev/null'])
766             self.assertRegex(
767                 self.main_stderr.getvalue(), matcher)
768
769
770 class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
771                             ArvadosBaseTestCase):
772     def _getKeepServerConfig():
773         for config_file, mandatory in [
774                 ['application.yml', False], ['application.default.yml', True]]:
775             path = os.path.join(run_test_server.SERVICES_SRC_DIR,
776                                 "api", "config", config_file)
777             if not mandatory and not os.path.exists(path):
778                 continue
779             with open(path) as f:
780                 rails_config = yaml.load(f.read())
781                 for config_section in ['test', 'common']:
782                     try:
783                         key = rails_config[config_section]["blob_signing_key"]
784                     except (KeyError, TypeError):
785                         pass
786                     else:
787                         return {'blob_signing_key': key,
788                                 'enforce_permissions': True}
789         return {'blog_signing_key': None, 'enforce_permissions': False}
790
791     MAIN_SERVER = {}
792     KEEP_SERVER = _getKeepServerConfig()
793     PROJECT_UUID = run_test_server.fixture('groups')['aproject']['uuid']
794
795     @classmethod
796     def setUpClass(cls):
797         super(ArvPutIntegrationTest, cls).setUpClass()
798         cls.ENVIRON = os.environ.copy()
799         cls.ENVIRON['PYTHONPATH'] = ':'.join(sys.path)
800
801     def datetime_to_hex(self, dt):
802         return hex(int(time.mktime(dt.timetuple())))[2:]
803
804     def setUp(self):
805         super(ArvPutIntegrationTest, self).setUp()
806         arv_put.api_client = None
807
808     def authorize_with(self, token_name):
809         run_test_server.authorize_with(token_name)
810         for v in ["ARVADOS_API_HOST",
811                   "ARVADOS_API_HOST_INSECURE",
812                   "ARVADOS_API_TOKEN"]:
813             self.ENVIRON[v] = arvados.config.settings()[v]
814         arv_put.api_client = arvados.api('v1')
815
816     def current_user(self):
817         return arv_put.api_client.users().current().execute()
818
819     def test_check_real_project_found(self):
820         self.authorize_with('active')
821         self.assertTrue(arv_put.desired_project_uuid(arv_put.api_client, self.PROJECT_UUID, 0),
822                         "did not correctly find test fixture project")
823
824     def test_check_error_finding_nonexistent_uuid(self):
825         BAD_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
826         self.authorize_with('active')
827         try:
828             result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID,
829                                                   0)
830         except ValueError as error:
831             self.assertIn(BAD_UUID, str(error))
832         else:
833             self.assertFalse(result, "incorrectly found nonexistent project")
834
835     def test_check_error_finding_nonexistent_project(self):
836         BAD_UUID = 'zzzzz-tpzed-zzzzzzzzzzzzzzz'
837         self.authorize_with('active')
838         with self.assertRaises(apiclient.errors.HttpError):
839             result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID,
840                                                   0)
841
842     def test_short_put_from_stdin(self):
843         # Have to run this as an integration test since arv-put can't
844         # read from the tests' stdin.
845         # arv-put usually can't stat(os.path.realpath('/dev/stdin')) in this
846         # case, because the /proc entry is already gone by the time it tries.
847         pipe = subprocess.Popen(
848             [sys.executable, arv_put.__file__, '--stream'],
849             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
850             stderr=subprocess.STDOUT, env=self.ENVIRON)
851         pipe.stdin.write(b'stdin test\n')
852         pipe.stdin.close()
853         deadline = time.time() + 5
854         while (pipe.poll() is None) and (time.time() < deadline):
855             time.sleep(.1)
856         returncode = pipe.poll()
857         if returncode is None:
858             pipe.terminate()
859             self.fail("arv-put did not PUT from stdin within 5 seconds")
860         elif returncode != 0:
861             sys.stdout.write(pipe.stdout.read())
862             self.fail("arv-put returned exit code {}".format(returncode))
863         self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11',
864                       pipe.stdout.read().decode())
865
866     def test_sigint_logs_request_id(self):
867         # Start arv-put, give it a chance to start up, send SIGINT,
868         # and check that its output includes the X-Request-Id.
869         input_stream = subprocess.Popen(
870             ['sleep', '10'],
871             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
872         pipe = subprocess.Popen(
873             [sys.executable, arv_put.__file__, '--stream'],
874             stdin=input_stream.stdout, stdout=subprocess.PIPE,
875             stderr=subprocess.STDOUT, env=self.ENVIRON)
876         # Wait for arv-put child process to print something (i.e., a
877         # log message) so we know its signal handler is installed.
878         select.select([pipe.stdout], [], [], 10)
879         pipe.send_signal(signal.SIGINT)
880         deadline = time.time() + 5
881         while (pipe.poll() is None) and (time.time() < deadline):
882             time.sleep(.1)
883         returncode = pipe.poll()
884         input_stream.terminate()
885         if returncode is None:
886             pipe.terminate()
887             self.fail("arv-put did not exit within 5 seconds")
888         self.assertRegex(pipe.stdout.read().decode(), r'\(X-Request-Id: req-[a-z0-9]{20}\)')
889
890     def test_ArvPutSignedManifest(self):
891         # ArvPutSignedManifest runs "arv-put foo" and then attempts to get
892         # the newly created manifest from the API server, testing to confirm
893         # that the block locators in the returned manifest are signed.
894         self.authorize_with('active')
895
896         # Before doing anything, demonstrate that the collection
897         # we're about to create is not present in our test fixture.
898         manifest_uuid = "00b4e9f40ac4dd432ef89749f1c01e74+47"
899         with self.assertRaises(apiclient.errors.HttpError):
900             notfound = arv_put.api_client.collections().get(
901                 uuid=manifest_uuid).execute()
902
903         datadir = self.make_tmpdir()
904         with open(os.path.join(datadir, "foo"), "w") as f:
905             f.write("The quick brown fox jumped over the lazy dog")
906         p = subprocess.Popen([sys.executable, arv_put.__file__,
907                               os.path.join(datadir, 'foo')],
908                              stdout=subprocess.PIPE,
909                              stderr=subprocess.PIPE,
910                              env=self.ENVIRON)
911         (out, err) = p.communicate()
912         self.assertRegex(err.decode(), r'INFO: Collection saved as ')
913         self.assertEqual(p.returncode, 0)
914
915         # The manifest text stored in the API server under the same
916         # manifest UUID must use signed locators.
917         c = arv_put.api_client.collections().get(uuid=manifest_uuid).execute()
918         self.assertRegex(
919             c['manifest_text'],
920             r'^\. 08a008a01d498c404b0c30852b39d3b8\+44\+A[0-9a-f]+@[0-9a-f]+ 0:44:foo\n')
921
922         os.remove(os.path.join(datadir, "foo"))
923         os.rmdir(datadir)
924
925     def run_and_find_collection(self, text, extra_args=[]):
926         self.authorize_with('active')
927         pipe = subprocess.Popen(
928             [sys.executable, arv_put.__file__] + extra_args,
929             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
930             stderr=subprocess.PIPE, env=self.ENVIRON)
931         stdout, stderr = pipe.communicate(text.encode())
932         self.assertRegex(stderr.decode(), r'INFO: Collection (updated:|saved as)')
933         search_key = ('portable_data_hash'
934                       if '--portable-data-hash' in extra_args else 'uuid')
935         collection_list = arvados.api('v1').collections().list(
936             filters=[[search_key, '=', stdout.decode().strip()]]
937         ).execute().get('items', [])
938         self.assertEqual(1, len(collection_list))
939         return collection_list[0]
940
941     def test_expired_token_invalidates_cache(self):
942         self.authorize_with('active')
943         tmpdir = self.make_tmpdir()
944         with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
945             f.write('foo')
946         # Upload a directory and get the cache file name
947         p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
948                              stdout=subprocess.PIPE,
949                              stderr=subprocess.PIPE,
950                              env=self.ENVIRON)
951         (out, err) = p.communicate()
952         self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
953         self.assertEqual(p.returncode, 0)
954         cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
955                                    err.decode()).groups()[0]
956         self.assertTrue(os.path.isfile(cache_filepath))
957         # Load the cache file contents and modify the manifest to simulate
958         # an expired access token
959         with open(cache_filepath, 'r') as c:
960             cache = json.load(c)
961         self.assertRegex(cache['manifest'], r'\+A\S+\@')
962         a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
963         cache['manifest'] = re.sub(
964             r'\@.*? ',
965             "@{} ".format(self.datetime_to_hex(a_month_ago)),
966             cache['manifest'])
967         with open(cache_filepath, 'w') as c:
968             c.write(json.dumps(cache))
969         # Re-run the upload and expect to get an invalid cache message
970         p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
971                              stdout=subprocess.PIPE,
972                              stderr=subprocess.PIPE,
973                              env=self.ENVIRON)
974         (out, err) = p.communicate()
975         self.assertRegex(
976             err.decode(),
977             r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch')
978         self.assertEqual(p.returncode, 0)
979         # Confirm that the resulting cache is different from the last run.
980         with open(cache_filepath, 'r') as c2:
981             new_cache = json.load(c2)
982         self.assertNotEqual(cache['manifest'], new_cache['manifest'])
983
984     def test_put_collection_with_later_update(self):
985         tmpdir = self.make_tmpdir()
986         with open(os.path.join(tmpdir, 'file1'), 'w') as f:
987             f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box')
988         col = self.run_and_find_collection("", ['--no-progress', tmpdir])
989         self.assertNotEqual(None, col['uuid'])
990         # Add a new file to the directory
991         with open(os.path.join(tmpdir, 'file2'), 'w') as f:
992             f.write('The quick brown fox jumped over the lazy dog')
993         updated_col = self.run_and_find_collection("", ['--no-progress', '--update-collection', col['uuid'], tmpdir])
994         self.assertEqual(col['uuid'], updated_col['uuid'])
995         # Get the manifest and check that the new file is being included
996         c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
997         self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
998
999     def test_upload_directory_reference_without_trailing_slash(self):
1000         tmpdir1 = self.make_tmpdir()
1001         tmpdir2 = self.make_tmpdir()
1002         with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
1003             f.write('This is foo')
1004         with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
1005             f.write('This is not foo')
1006         # Upload one directory and one file
1007         col = self.run_and_find_collection("", ['--no-progress',
1008                                                 tmpdir1,
1009                                                 os.path.join(tmpdir2, 'bar')])
1010         self.assertNotEqual(None, col['uuid'])
1011         c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
1012         # Check that 'foo' was written inside a subcollection
1013         # OTOH, 'bar' should have been directly uploaded on the root collection
1014         self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
1015
1016     def test_upload_directory_reference_with_trailing_slash(self):
1017         tmpdir1 = self.make_tmpdir()
1018         tmpdir2 = self.make_tmpdir()
1019         with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
1020             f.write('This is foo')
1021         with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
1022             f.write('This is not foo')
1023         # Upload one directory (with trailing slash) and one file
1024         col = self.run_and_find_collection("", ['--no-progress',
1025                                                 tmpdir1 + os.sep,
1026                                                 os.path.join(tmpdir2, 'bar')])
1027         self.assertNotEqual(None, col['uuid'])
1028         c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
1029         # Check that 'foo' and 'bar' were written at the same level
1030         self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
1031
1032     def test_put_collection_with_high_redundancy(self):
1033         # Write empty data: we're not testing CollectionWriter, just
1034         # making sure collections.create tells the API server what our
1035         # desired replication level is.
1036         collection = self.run_and_find_collection("", ['--replication', '4'])
1037         self.assertEqual(4, collection['replication_desired'])
1038
1039     def test_put_collection_with_default_redundancy(self):
1040         collection = self.run_and_find_collection("")
1041         self.assertEqual(None, collection['replication_desired'])
1042
1043     def test_put_collection_with_unnamed_project_link(self):
1044         link = self.run_and_find_collection(
1045             "Test unnamed collection",
1046             ['--portable-data-hash', '--project-uuid', self.PROJECT_UUID])
1047         username = pwd.getpwuid(os.getuid()).pw_name
1048         self.assertRegex(
1049             link['name'],
1050             r'^Saved at .* by {}@'.format(re.escape(username)))
1051
1052     def test_put_collection_with_name_and_no_project(self):
1053         link_name = 'Test Collection Link in home project'
1054         collection = self.run_and_find_collection(
1055             "Test named collection in home project",
1056             ['--portable-data-hash', '--name', link_name])
1057         self.assertEqual(link_name, collection['name'])
1058         my_user_uuid = self.current_user()['uuid']
1059         self.assertEqual(my_user_uuid, collection['owner_uuid'])
1060
1061     def test_put_collection_with_named_project_link(self):
1062         link_name = 'Test auto Collection Link'
1063         collection = self.run_and_find_collection("Test named collection",
1064                                       ['--portable-data-hash',
1065                                        '--name', link_name,
1066                                        '--project-uuid', self.PROJECT_UUID])
1067         self.assertEqual(link_name, collection['name'])
1068
1069     def test_put_collection_with_storage_classes_specified(self):
1070         collection = self.run_and_find_collection("", ['--storage-classes', 'hot'])
1071
1072         self.assertEqual(len(collection['storage_classes_desired']), 1)
1073         self.assertEqual(collection['storage_classes_desired'][0], 'hot')
1074
1075     def test_put_collection_without_storage_classes_specified(self):
1076         collection = self.run_and_find_collection("")
1077
1078         self.assertEqual(len(collection['storage_classes_desired']), 1)
1079         self.assertEqual(collection['storage_classes_desired'][0], 'default')
1080
1081     def test_exclude_filename_pattern(self):
1082         tmpdir = self.make_tmpdir()
1083         tmpsubdir = os.path.join(tmpdir, 'subdir')
1084         os.mkdir(tmpsubdir)
1085         for fname in ['file1', 'file2', 'file3']:
1086             with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
1087                 f.write("This is %s" % fname)
1088             with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
1089                 f.write("This is %s" % fname)
1090         col = self.run_and_find_collection("", ['--no-progress',
1091                                                 '--exclude', '*2.txt',
1092                                                 '--exclude', 'file3.*',
1093                                                  tmpdir])
1094         self.assertNotEqual(None, col['uuid'])
1095         c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
1096         # None of the file2.txt & file3.txt should have been uploaded
1097         self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
1098         self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt')
1099         self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt')
1100
1101     def test_exclude_filepath_pattern(self):
1102         tmpdir = self.make_tmpdir()
1103         tmpsubdir = os.path.join(tmpdir, 'subdir')
1104         os.mkdir(tmpsubdir)
1105         for fname in ['file1', 'file2', 'file3']:
1106             with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
1107                 f.write("This is %s" % fname)
1108             with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
1109                 f.write("This is %s" % fname)
1110         col = self.run_and_find_collection("", ['--no-progress',
1111                                                 '--exclude', 'subdir/*2.txt',
1112                                                 '--exclude', './file1.*',
1113                                                  tmpdir])
1114         self.assertNotEqual(None, col['uuid'])
1115         c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
1116         # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded
1117         self.assertNotRegex(c['manifest_text'],
1118                             r'^\./%s.*:file1.txt' % os.path.basename(tmpdir))
1119         self.assertNotRegex(c['manifest_text'],
1120                             r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir))
1121         self.assertRegex(c['manifest_text'],
1122                          r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
1123         self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
1124
1125     def test_silent_mode_no_errors(self):
1126         self.authorize_with('active')
1127         tmpdir = self.make_tmpdir()
1128         with open(os.path.join(tmpdir, 'test.txt'), 'w') as f:
1129             f.write('hello world')
1130         pipe = subprocess.Popen(
1131             [sys.executable, arv_put.__file__] + ['--silent', tmpdir],
1132             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
1133             stderr=subprocess.PIPE, env=self.ENVIRON)
1134         stdout, stderr = pipe.communicate()
1135         # No console output should occur on normal operations
1136         self.assertNotRegex(stderr.decode(), r'.+')
1137         self.assertNotRegex(stdout.decode(), r'.+')
1138
1139     def test_silent_mode_does_not_avoid_error_messages(self):
1140         self.authorize_with('active')
1141         pipe = subprocess.Popen(
1142             [sys.executable, arv_put.__file__] + ['--silent',
1143                                                   '/path/not/existant'],
1144             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
1145             stderr=subprocess.PIPE, env=self.ENVIRON)
1146         stdout, stderr = pipe.communicate()
1147         # Error message should be displayed when errors happen
1148         self.assertRegex(stderr.decode(), r'.*ERROR:.*')
1149         self.assertNotRegex(stdout.decode(), r'.+')
1150
1151
1152 if __name__ == '__main__':
1153     unittest.main()