sdk/python/tests/test_arv_get.py

   1 # Copyright (C) The Arvados Authors. All rights reserved.
   2 #
   3 # SPDX-License-Identifier: Apache-2.0
   4
   5 from __future__ import absolute_import
   6 from future.utils import listitems
   7 import io
   8 import logging
   9 import os
  10 import re
  11 import shutil
  12 import tempfile
  13
  14 from unittest import mock
  15
  16 import arvados
  17 import arvados.collection as collection
  18 import arvados.commands.get as arv_get
  19 from . import run_test_server
  20
  21 from . import arvados_testutil as tutil
  22 from .arvados_testutil import ArvadosBaseTestCase
  23
  24 class ArvadosGetTestCase(run_test_server.TestCaseWithServers,
  25                          tutil.VersionChecker,
  26                          ArvadosBaseTestCase):
  27     MAIN_SERVER = {}
  28     KEEP_SERVER = {}
  29
  30     def setUp(self):
  31         super(ArvadosGetTestCase, self).setUp()
  32         self.tempdir = tempfile.mkdtemp()
  33         self.col_loc, self.col_pdh, self.col_manifest = self.write_test_collection()
  34
  35         self.stdout = tutil.BytesIO()
  36         self.stderr = tutil.StringIO()
  37         self.loggingHandler = logging.StreamHandler(self.stderr)
  38         self.loggingHandler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
  39         logging.getLogger().addHandler(self.loggingHandler)
  40
  41     def tearDown(self):
  42         logging.getLogger().removeHandler(self.loggingHandler)
  43         super(ArvadosGetTestCase, self).tearDown()
  44         shutil.rmtree(self.tempdir)
  45
  46     def write_test_collection(self,
  47                               strip_manifest=False,
  48                               contents = {
  49                                   'foo.txt' : 'foo',
  50                                   'bar.txt' : 'bar',
  51                                   'subdir/baz.txt' : 'baz',
  52                               }):
  53         api = arvados.api()
  54         c = collection.Collection(api_client=api)
  55         for path, data in listitems(contents):
  56             with c.open(path, 'wb') as f:
  57                 f.write(data)
  58         c.save_new()
  59
  60         api.close_connections()
  61
  62         return (c.manifest_locator(),
  63                 c.portable_data_hash(),
  64                 c.manifest_text(strip=strip_manifest))
  65
  66     def run_get(self, args):
  67         self.stdout.seek(0, 0)
  68         self.stdout.truncate(0)
  69         self.stderr.seek(0, 0)
  70         self.stderr.truncate(0)
  71         return arv_get.main(args, self.stdout, self.stderr)
  72
  73     def test_version_argument(self):
  74         with tutil.redirected_streams(
  75                 stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
  76             with self.assertRaises(SystemExit):
  77                 self.run_get(['--version'])
  78         self.assertVersionOutput(out, err)
  79
  80     def test_get_single_file(self):
  81         # Get the file using the collection's locator
  82         r = self.run_get(["{}/subdir/baz.txt".format(self.col_loc), '-'])
  83         self.assertEqual(0, r)
  84         self.assertEqual(b'baz', self.stdout.getvalue())
  85         # Then, try by PDH
  86         r = self.run_get(["{}/subdir/baz.txt".format(self.col_pdh), '-'])
  87         self.assertEqual(0, r)
  88         self.assertEqual(b'baz', self.stdout.getvalue())
  89
  90     def test_get_block(self):
  91         # Get raw data using a block locator
  92         blk = re.search(r' (acbd18\S+\+A\S+) ', self.col_manifest).group(1)
  93         r = self.run_get([blk, '-'])
  94         self.assertEqual(0, r)
  95         self.assertEqual(b'foo', self.stdout.getvalue())
  96
  97     def test_get_multiple_files(self):
  98         # Download the entire collection to the temp directory
  99         r = self.run_get(["{}/".format(self.col_loc), self.tempdir])
 100         self.assertEqual(0, r)
 101         with open(os.path.join(self.tempdir, "foo.txt"), "r") as f:
 102             self.assertEqual("foo", f.read())
 103         with open(os.path.join(self.tempdir, "bar.txt"), "r") as f:
 104             self.assertEqual("bar", f.read())
 105         with open(os.path.join(self.tempdir, "subdir", "baz.txt"), "r") as f:
 106             self.assertEqual("baz", f.read())
 107
 108     def test_get_collection_unstripped_manifest(self):
 109         dummy_token = "+Axxxxxxx"
 110         # Get the collection manifest by UUID
 111         r = self.run_get([self.col_loc, self.tempdir])
 112         self.assertEqual(0, r)
 113         m_from_collection = re.sub(r"\+A[0-9a-f@]+", dummy_token, self.col_manifest)
 114         with open(os.path.join(self.tempdir, self.col_loc), "r") as f:
 115             # Replace manifest tokens before comparison to avoid races
 116             m_from_file = re.sub(r"\+A[0-9a-f@]+", dummy_token, f.read())
 117             self.assertEqual(m_from_collection, m_from_file)
 118         # Get the collection manifest by PDH
 119         r = self.run_get([self.col_pdh, self.tempdir])
 120         self.assertEqual(0, r)
 121         with open(os.path.join(self.tempdir, self.col_pdh), "r") as f:
 122             # Replace manifest tokens before comparison to avoid races
 123             m_from_file = re.sub(r"\+A[0-9a-f@]+", dummy_token, f.read())
 124             self.assertEqual(m_from_collection, m_from_file)
 125
 126     def test_get_collection_stripped_manifest(self):
 127         col_loc, col_pdh, col_manifest = self.write_test_collection(
 128             strip_manifest=True)
 129         # Get the collection manifest by UUID
 130         r = self.run_get(['--strip-manifest', col_loc, self.tempdir])
 131         self.assertEqual(0, r)
 132         with open(os.path.join(self.tempdir, col_loc), "r") as f:
 133             self.assertEqual(col_manifest, f.read())
 134         # Get the collection manifest by PDH
 135         r = self.run_get(['--strip-manifest', col_pdh, self.tempdir])
 136         self.assertEqual(0, r)
 137         with open(os.path.join(self.tempdir, col_pdh), "r") as f:
 138             self.assertEqual(col_manifest, f.read())
 139
 140     def test_invalid_collection(self):
 141         # Asking for an invalid collection should generate an error.
 142         r = self.run_get(['this-uuid-seems-to-be-fake', self.tempdir])
 143         self.assertNotEqual(0, r)
 144
 145     def test_invalid_file_request(self):
 146         # Asking for an inexistant file within a collection should generate an error.
 147         r = self.run_get(["{}/im-not-here.txt".format(self.col_loc), self.tempdir])
 148         self.assertNotEqual(0, r)
 149
 150     def test_invalid_destination(self):
 151         # Asking to place the collection's files on a non existant directory
 152         # should generate an error.
 153         r = self.run_get([self.col_loc, "/fake/subdir/"])
 154         self.assertNotEqual(0, r)
 155
 156     def test_preexistent_destination(self):
 157         # Asking to place a file with the same path as a local one should
 158         # generate an error and avoid overwrites.
 159         with open(os.path.join(self.tempdir, "foo.txt"), "w") as f:
 160             f.write("another foo")
 161         r = self.run_get(["{}/foo.txt".format(self.col_loc), self.tempdir])
 162         self.assertNotEqual(0, r)
 163         with open(os.path.join(self.tempdir, "foo.txt"), "r") as f:
 164             self.assertEqual("another foo", f.read())
 165
 166     def test_no_progress_when_stderr_not_a_tty(self):
 167         # Create a collection with a big file (>64MB) to force the progress
 168         # to be printed
 169         c = collection.Collection()
 170         with c.open('bigfile.txt', 'wb') as f:
 171             for _ in range(65):
 172                 f.write("x" * 1024 * 1024)
 173         c.save_new()
 174         tmpdir = self.make_tmpdir()
 175         # Simulate a TTY stderr
 176         stderr = mock.MagicMock()
 177         stdout = tutil.BytesIO()
 178
 179         # Confirm that progress is written to stderr when is a tty
 180         stderr.isatty.return_value = True
 181         r = arv_get.main(['{}/bigfile.txt'.format(c.manifest_locator()),
 182                           '{}/bigfile.txt'.format(tmpdir)],
 183                          stdout, stderr)
 184         self.assertEqual(0, r)
 185         self.assertEqual(b'', stdout.getvalue())
 186         self.assertTrue(stderr.write.called)
 187
 188         # Clean up and reset stderr mock
 189         os.remove('{}/bigfile.txt'.format(tmpdir))
 190         stderr = mock.MagicMock()
 191         stdout = tutil.BytesIO()
 192
 193         # Confirm that progress is not written to stderr when isn't a tty
 194         stderr.isatty.return_value = False
 195         r = arv_get.main(['{}/bigfile.txt'.format(c.manifest_locator()),
 196                           '{}/bigfile.txt'.format(tmpdir)],
 197                          stdout, stderr)
 198         self.assertEqual(0, r)
 199         self.assertEqual(b'', stdout.getvalue())
 200         self.assertFalse(stderr.write.called)
 201
 202     request_id_regex = r'INFO: X-Request-Id: req-[a-z0-9]{20}\n'
 203
 204     def test_request_id_logging_on(self):
 205         r = self.run_get(["-v", "{}/".format(self.col_loc), self.tempdir])
 206         self.assertEqual(0, r)
 207         self.assertRegex(self.stderr.getvalue(), self.request_id_regex)
 208
 209     def test_request_id_logging_off(self):
 210         r = self.run_get(["{}/".format(self.col_loc), self.tempdir])
 211         self.assertEqual(0, r)
 212         self.assertNotRegex(self.stderr.getvalue(), self.request_id_regex)