+ def run_and_find_collection(self, text, extra_args=[]):
+ self.authorize_with('active')
+ pipe = subprocess.Popen(
+ [sys.executable, arv_put.__file__] + extra_args,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=self.ENVIRON)
+ stdout, stderr = pipe.communicate(text.encode())
+ self.assertRegex(stderr.decode(), r'INFO: Collection (updated:|saved as)')
+ search_key = ('portable_data_hash'
+ if '--portable-data-hash' in extra_args else 'uuid')
+ collection_list = arvados.api('v1').collections().list(
+ filters=[[search_key, '=', stdout.decode().strip()]]
+ ).execute().get('items', [])
+ self.assertEqual(1, len(collection_list))
+ return collection_list[0]
+
+ def test_expired_token_invalidates_cache(self):
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+ f.write('foo')
+ # Upload a directory and get the cache file name
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+ self.assertEqual(p.returncode, 0)
+ cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+ err.decode()).groups()[0]
+ self.assertTrue(os.path.isfile(cache_filepath))
+ # Load the cache file contents and modify the manifest to simulate
+ # an expired access token
+ with open(cache_filepath, 'r') as c:
+ cache = json.load(c)
+ self.assertRegex(cache['manifest'], r'\+A\S+\@')
+ a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
+ cache['manifest'] = re.sub(
+ r'\@.*? ',
+ "@{} ".format(self.datetime_to_hex(a_month_ago)),
+ cache['manifest'])
+ with open(cache_filepath, 'w') as c:
+ c.write(json.dumps(cache))
+ # Re-run the upload and expect to get an invalid cache message
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(
+ err.decode(),
+ r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch')
+ self.assertEqual(p.returncode, 0)
+ # Confirm that the resulting cache is different from the last run.
+ with open(cache_filepath, 'r') as c2:
+ new_cache = json.load(c2)
+ self.assertNotEqual(cache['manifest'], new_cache['manifest'])
+
+ def test_put_collection_with_later_update(self):
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'file1'), 'w') as f:
+ f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box')
+ col = self.run_and_find_collection("", ['--no-progress', tmpdir])
+ self.assertNotEqual(None, col['uuid'])
+ # Add a new file to the directory
+ with open(os.path.join(tmpdir, 'file2'), 'w') as f:
+ f.write('The quick brown fox jumped over the lazy dog')
+ updated_col = self.run_and_find_collection("", ['--no-progress', '--update-collection', col['uuid'], tmpdir])
+ self.assertEqual(col['uuid'], updated_col['uuid'])
+ # Get the manifest and check that the new file is being included
+ c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
+ self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+ def test_upload_directory_reference_without_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' was written inside a subcollection
+ # OTOH, 'bar' should have been directly uploaded on the root collection
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+ def test_upload_directory_reference_with_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory (with trailing slash) and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1 + os.sep,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' and 'bar' were written at the same level
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
+
+ def test_put_collection_with_high_redundancy(self):
+ # Write empty data: we're not testing CollectionWriter, just
+ # making sure collections.create tells the API server what our
+ # desired replication level is.
+ collection = self.run_and_find_collection("", ['--replication', '4'])
+ self.assertEqual(4, collection['replication_desired'])
+
+ def test_put_collection_with_default_redundancy(self):
+ collection = self.run_and_find_collection("")
+ self.assertEqual(None, collection['replication_desired'])
+
+ def test_put_collection_with_unnamed_project_link(self):
+ link = self.run_and_find_collection(
+ "Test unnamed collection",
+ ['--portable-data-hash', '--project-uuid', self.PROJECT_UUID])
+ username = pwd.getpwuid(os.getuid()).pw_name
+ self.assertRegex(
+ link['name'],
+ r'^Saved at .* by {}@'.format(re.escape(username)))
+
+ def test_put_collection_with_name_and_no_project(self):
+ link_name = 'Test Collection Link in home project'
+ collection = self.run_and_find_collection(
+ "Test named collection in home project",
+ ['--portable-data-hash', '--name', link_name])
+ self.assertEqual(link_name, collection['name'])
+ my_user_uuid = self.current_user()['uuid']
+ self.assertEqual(my_user_uuid, collection['owner_uuid'])
+
+ def test_put_collection_with_named_project_link(self):
+ link_name = 'Test auto Collection Link'
+ collection = self.run_and_find_collection("Test named collection",
+ ['--portable-data-hash',
+ '--name', link_name,
+ '--project-uuid', self.PROJECT_UUID])
+ self.assertEqual(link_name, collection['name'])
+
+ def test_exclude_filename_pattern(self):
+ tmpdir = self.make_tmpdir()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(tmpsubdir)
+ for fname in ['file1', 'file2', 'file3']:
+ with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ col = self.run_and_find_collection("", ['--no-progress',
+ '--exclude', '*2.txt',
+ '--exclude', 'file3.*',
+ tmpdir])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # None of the file2.txt & file3.txt should have been uploaded
+ self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
+ self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt')
+ self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt')
+
+ def test_exclude_filepath_pattern(self):
+ tmpdir = self.make_tmpdir()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(tmpsubdir)
+ for fname in ['file1', 'file2', 'file3']:
+ with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ col = self.run_and_find_collection("", ['--no-progress',
+ '--exclude', 'subdir/*2.txt',
+ '--exclude', './file1.*',
+ tmpdir])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded
+ self.assertNotRegex(c['manifest_text'],
+ r'^\./%s.*:file1.txt' % os.path.basename(tmpdir))
+ self.assertNotRegex(c['manifest_text'],
+ r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir))
+ self.assertRegex(c['manifest_text'],
+ r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
+ self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
+
+ def test_silent_mode_no_errors(self):
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'test.txt'), 'w') as f:
+ f.write('hello world')
+ pipe = subprocess.Popen(
+ [sys.executable, arv_put.__file__] + ['--silent', tmpdir],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=self.ENVIRON)
+ stdout, stderr = pipe.communicate()
+ # No console output should occur on normal operations
+ self.assertNotRegex(stderr.decode(), r'.+')
+ self.assertNotRegex(stdout.decode(), r'.+')
+
+ def test_silent_mode_does_not_avoid_error_messages(self):
+ self.authorize_with('active')
+ pipe = subprocess.Popen(
+ [sys.executable, arv_put.__file__] + ['--silent',
+ '/path/not/existant'],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=self.ENVIRON)
+ stdout, stderr = pipe.communicate()
+ # Error message should be displayed when errors happen
+ self.assertRegex(stderr.decode(), r'.*ERROR:.*')
+ self.assertNotRegex(stdout.decode(), r'.+')
+