X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e627df2797dae0d6fa95da61f1a58bb9fafe8240..HEAD:/sdk/python/tests/test_arv_put.py diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index fac970c95a..3b8269f99c 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -4,20 +4,13 @@ # # SPDX-License-Identifier: Apache-2.0 -from __future__ import absolute_import -from __future__ import division -from future import standard_library -standard_library.install_aliases() -from builtins import str -from builtins import range -from functools import partial import apiclient import ciso8601 +import copy import datetime -import hashlib import json import logging -import mock +import multiprocessing import os import pwd import random @@ -31,10 +24,15 @@ import tempfile import time import unittest import uuid -import yaml + +import pytest +from functools import partial +from pathlib import Path +from unittest import mock import arvados import arvados.commands.put as arv_put +import arvados.util from . import arvados_testutil as tutil from .arvados_testutil import ArvadosBaseTestCase, fake_httplib2_response @@ -250,6 +248,76 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase): arv_put.ResumeCache, path) +class TestArvadosPutResumeCacheDir: + @pytest.fixture + def args(self, tmp_path): + return arv_put.parse_arguments([str(tmp_path)]) + + @pytest.mark.parametrize('cache_dir', [None, 'test-put']) + def test_cache_subdir(self, tmp_path, monkeypatch, cache_dir, args): + if cache_dir is None: + cache_dir = arv_put.ResumeCache.CACHE_DIR + else: + monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', cache_dir) + monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', tmp_path.__truediv__) + actual = arv_put.ResumeCache.make_path(args) + assert isinstance(actual, str) + assert Path(actual).parent == (tmp_path / cache_dir) + + def test_cache_relative_dir(self, tmp_path, monkeypatch, args): + expected = Path('rel', 'dir') + monkeypatch.setattr(Path, 'home', lambda: tmp_path) + monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', str(expected)) + actual = arv_put.ResumeCache.make_path(args) + assert isinstance(actual, str) + parent = Path(actual).parent + assert parent == (tmp_path / expected) + assert parent.is_dir() + + def test_cache_absolute_dir(self, tmp_path, monkeypatch, args): + expected = tmp_path / 'arv-put' + monkeypatch.setattr(Path, 'home', lambda: tmp_path / 'home') + monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', str(expected)) + actual = arv_put.ResumeCache.make_path(args) + assert isinstance(actual, str) + parent = Path(actual).parent + assert parent == expected + assert parent.is_dir() + + +class TestArvadosPutUploadJobCacheDir: + @pytest.mark.parametrize('cache_dir', [None, 'test-put']) + def test_cache_subdir(self, tmp_path, monkeypatch, cache_dir): + def storage_path(self, subdir='.', mode=0o700): + path = tmp_path / subdir + path.mkdir(mode=mode) + return path + if cache_dir is None: + cache_dir = arv_put.ArvPutUploadJob.CACHE_DIR + else: + monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', cache_dir) + monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', storage_path) + job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True) + job.destroy_cache() + assert Path(job._cache_filename).parent == (tmp_path / cache_dir) + + def test_cache_relative_dir(self, tmp_path, monkeypatch): + expected = Path('rel', 'dir') + monkeypatch.setattr(Path, 'home', lambda: tmp_path) + monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', str(expected)) + job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True) + job.destroy_cache() + assert Path(job._cache_filename).parent == (tmp_path / expected) + + def test_cache_absolute_dir(self, tmp_path, monkeypatch): + expected = tmp_path / 'arv-put' + monkeypatch.setattr(Path, 'home', lambda: tmp_path / 'home') + monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', str(expected)) + job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True) + job.destroy_cache() + assert Path(job._cache_filename).parent == expected + + class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): @@ -294,6 +362,26 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, shutil.rmtree(self.small_files_dir) shutil.rmtree(self.tempdir_with_symlink) + def test_non_regular_files_are_ignored_except_symlinks_to_dirs(self): + def pfunc(x): + with open(x, 'w') as f: + f.write('test') + fifo_filename = 'fifo-file' + fifo_path = os.path.join(self.tempdir_with_symlink, fifo_filename) + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + os.mkfifo(fifo_path) + producer = multiprocessing.Process(target=pfunc, args=(fifo_path,)) + producer.start() + cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) + cwriter.start(save_collection=False) + if producer.exitcode is None: + # If the producer is still running, kill it. This should always be + # before any assertion that may fail. + producer.terminate() + producer.join(1) + self.assertIn('linkeddir', cwriter.manifest_text()) + self.assertNotIn(fifo_filename, cwriter.manifest_text()) + def test_symlinks_are_followed_by_default(self): self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile'))) @@ -554,7 +642,7 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, class CachedManifestValidationTest(ArvadosBaseTestCase): class MockedPut(arv_put.ArvPutUploadJob): def __init__(self, cached_manifest=None): - self._state = arv_put.ArvPutUploadJob.EMPTY_STATE + self._state = copy.deepcopy(arv_put.ArvPutUploadJob.EMPTY_STATE) self._state['manifest'] = cached_manifest self._api_client = mock.MagicMock() self.logger = mock.MagicMock() @@ -794,6 +882,7 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, def test_put_block_replication(self): self.call_main_on_test_file() + arv_put.api_client = None with mock.patch('arvados.collection.KeepClient.local_store_put') as put_mock: put_mock.return_value = 'acbd18db4cc2f85cedef654fccc4a4d8+3' self.call_main_on_test_file(['--replication', '1']) @@ -1055,43 +1144,53 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, r'INFO: Cache expired, starting from scratch.*') self.assertEqual(p.returncode, 0) - def test_invalid_signature_invalidates_cache(self): - self.authorize_with('active') - tmpdir = self.make_tmpdir() - with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f: - f.write('foo') - # Upload a directory and get the cache file name - p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=self.ENVIRON) - (_, err) = p.communicate() - self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') - self.assertEqual(p.returncode, 0) - cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', - err.decode()).groups()[0] - self.assertTrue(os.path.isfile(cache_filepath)) - # Load the cache file contents and modify the manifest to simulate - # an invalid access token - with open(cache_filepath, 'r') as c: - cache = json.load(c) - self.assertRegex(cache['manifest'], r'\+A\S+\@') - cache['manifest'] = re.sub( - r'\+A.*\@', - "+Aabcdef0123456789abcdef0123456789abcdef01@", - cache['manifest']) - with open(cache_filepath, 'w') as c: - c.write(json.dumps(cache)) - # Re-run the upload and expect to get an invalid cache message - p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=self.ENVIRON) - (_, err) = p.communicate() - self.assertRegex( - err.decode(), - r'ERROR: arv-put: Resume cache contains invalid signature.*') - self.assertEqual(p.returncode, 1) + def test_invalid_signature_in_cache(self): + for batch_mode in [False, True]: + self.authorize_with('active') + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f: + f.write('foo') + # Upload a directory and get the cache file name + arv_put_args = [tmpdir] + if batch_mode: + arv_put_args = ['--batch'] + arv_put_args + p = subprocess.Popen([sys.executable, arv_put.__file__] + arv_put_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (_, err) = p.communicate() + self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') + self.assertEqual(p.returncode, 0) + cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', + err.decode()).groups()[0] + self.assertTrue(os.path.isfile(cache_filepath)) + # Load the cache file contents and modify the manifest to simulate + # an invalid access token + with open(cache_filepath, 'r') as c: + cache = json.load(c) + self.assertRegex(cache['manifest'], r'\+A\S+\@') + cache['manifest'] = re.sub( + r'\+A.*\@', + "+Aabcdef0123456789abcdef0123456789abcdef01@", + cache['manifest']) + with open(cache_filepath, 'w') as c: + c.write(json.dumps(cache)) + # Re-run the upload and expect to get an invalid cache message + p = subprocess.Popen([sys.executable, arv_put.__file__] + arv_put_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (_, err) = p.communicate() + if not batch_mode: + self.assertRegex( + err.decode(), + r'ERROR: arv-put: Resume cache contains invalid signature.*') + self.assertEqual(p.returncode, 1) + else: + self.assertRegex( + err.decode(), + r'Invalid signatures on cache file \'.*\' while being run in \'batch mode\' -- continuing anyways.*') + self.assertEqual(p.returncode, 0) def test_single_expired_signature_reuploads_file(self): self.authorize_with('active')