17004: Fix lingering resource error
[arvados.git] / sdk / python / tests / test_arv_get.py
index d89231776683cf82c1037bdbc52a79d7916c72b2..73ef2475b98a8bd24a5ba9d9cc067f667bde895d 100644 (file)
@@ -1,18 +1,28 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
 
+from __future__ import absolute_import
+from future.utils import listitems
 import io
+import logging
+import mock
+import os
+import re
 import shutil
 import tempfile
 
 import arvados
 import arvados.collection as collection
 import arvados.commands.get as arv_get
-import run_test_server
+from . import run_test_server
 
-from arvados_testutil import redirected_streams
+from . import arvados_testutil as tutil
+from .arvados_testutil import ArvadosBaseTestCase
 
-class ArvadosGetTestCase(run_test_server.TestCaseWithServers):
+class ArvadosGetTestCase(run_test_server.TestCaseWithServers,
+                         tutil.VersionChecker,
+                         ArvadosBaseTestCase):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
 
@@ -21,64 +31,110 @@ class ArvadosGetTestCase(run_test_server.TestCaseWithServers):
         self.tempdir = tempfile.mkdtemp()
         self.col_loc, self.col_pdh, self.col_manifest = self.write_test_collection()
 
+        self.stdout = tutil.BytesIO()
+        self.stderr = tutil.StringIO()
+        self.loggingHandler = logging.StreamHandler(self.stderr)
+        self.loggingHandler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
+        logging.getLogger().addHandler(self.loggingHandler)
+
     def tearDown(self):
+        logging.getLogger().removeHandler(self.loggingHandler)
         super(ArvadosGetTestCase, self).tearDown()
         shutil.rmtree(self.tempdir)
 
     def write_test_collection(self,
+                              strip_manifest=False,
                               contents = {
                                   'foo.txt' : 'foo',
                                   'bar.txt' : 'bar',
                                   'subdir/baz.txt' : 'baz',
                               }):
-        c = collection.Collection()
-        for path, data in contents.items():
-            with c.open(path, 'w') as f:
+        api = arvados.api()
+        c = collection.Collection(api_client=api)
+        for path, data in listitems(contents):
+            with c.open(path, 'wb') as f:
                 f.write(data)
         c.save_new()
-        return (c.manifest_locator(), c.portable_data_hash(), c.manifest_text())
+
+        api.close_connections()
+
+        return (c.manifest_locator(),
+                c.portable_data_hash(),
+                c.manifest_text(strip=strip_manifest))
 
     def run_get(self, args):
-        self.stdout = io.BytesIO()
-        self.stderr = io.BytesIO()
+        self.stdout.seek(0, 0)
+        self.stdout.truncate(0)
+        self.stderr.seek(0, 0)
+        self.stderr.truncate(0)
         return arv_get.main(args, self.stdout, self.stderr)
 
     def test_version_argument(self):
-        err = io.BytesIO()
-        out = io.BytesIO()
-        with redirected_streams(stdout=out, stderr=err):
+        with tutil.redirected_streams(
+                stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
             with self.assertRaises(SystemExit):
                 self.run_get(['--version'])
-        self.assertEqual(out.getvalue(), '')
-        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+        self.assertVersionOutput(out, err)
 
     def test_get_single_file(self):
         # Get the file using the collection's locator
         r = self.run_get(["{}/subdir/baz.txt".format(self.col_loc), '-'])
         self.assertEqual(0, r)
-        self.assertEqual('baz', self.stdout.getvalue())
+        self.assertEqual(b'baz', self.stdout.getvalue())
         # Then, try by PDH
         r = self.run_get(["{}/subdir/baz.txt".format(self.col_pdh), '-'])
         self.assertEqual(0, r)
-        self.assertEqual('baz', self.stdout.getvalue())
+        self.assertEqual(b'baz', self.stdout.getvalue())
+
+    def test_get_block(self):
+        # Get raw data using a block locator
+        blk = re.search(' (acbd18\S+\+A\S+) ', self.col_manifest).group(1)
+        r = self.run_get([blk, '-'])
+        self.assertEqual(0, r)
+        self.assertEqual(b'foo', self.stdout.getvalue())
 
     def test_get_multiple_files(self):
         # Download the entire collection to the temp directory
         r = self.run_get(["{}/".format(self.col_loc), self.tempdir])
         self.assertEqual(0, r)
-        with open("{}/foo.txt".format(self.tempdir), "r") as f:
+        with open(os.path.join(self.tempdir, "foo.txt"), "r") as f:
             self.assertEqual("foo", f.read())
-        with open("{}/bar.txt".format(self.tempdir), "r") as f:
+        with open(os.path.join(self.tempdir, "bar.txt"), "r") as f:
             self.assertEqual("bar", f.read())
-        with open("{}/subdir/baz.txt".format(self.tempdir), "r") as f:
+        with open(os.path.join(self.tempdir, "subdir", "baz.txt"), "r") as f:
             self.assertEqual("baz", f.read())
 
-    def test_get_collection_manifest(self):
-        # Get the collection manifest
+    def test_get_collection_unstripped_manifest(self):
+        dummy_token = "+Axxxxxxx"
+        # Get the collection manifest by UUID
         r = self.run_get([self.col_loc, self.tempdir])
         self.assertEqual(0, r)
-        with open("{}/{}".format(self.tempdir, self.col_loc), "r") as f:
-            self.assertEqual(self.col_manifest, f.read())
+        m_from_collection = re.sub(r"\+A[0-9a-f@]+", dummy_token, self.col_manifest)
+        with open(os.path.join(self.tempdir, self.col_loc), "r") as f:
+            # Replace manifest tokens before comparison to avoid races
+            m_from_file = re.sub(r"\+A[0-9a-f@]+", dummy_token, f.read())
+            self.assertEqual(m_from_collection, m_from_file)
+        # Get the collection manifest by PDH
+        r = self.run_get([self.col_pdh, self.tempdir])
+        self.assertEqual(0, r)
+        with open(os.path.join(self.tempdir, self.col_pdh), "r") as f:
+            # Replace manifest tokens before comparison to avoid races
+            m_from_file = re.sub(r"\+A[0-9a-f@]+", dummy_token, f.read())
+            self.assertEqual(m_from_collection, m_from_file)
+
+    def test_get_collection_stripped_manifest(self):
+        col_loc, col_pdh, col_manifest = self.write_test_collection(
+            strip_manifest=True)
+        # Get the collection manifest by UUID
+        r = self.run_get(['--strip-manifest', col_loc, self.tempdir])
+        self.assertEqual(0, r)
+        with open(os.path.join(self.tempdir, col_loc), "r") as f:
+            self.assertEqual(col_manifest, f.read())
+        # Get the collection manifest by PDH
+        r = self.run_get(['--strip-manifest', col_pdh, self.tempdir])
+        self.assertEqual(0, r)
+        with open(os.path.join(self.tempdir, col_pdh), "r") as f:
+            self.assertEqual(col_manifest, f.read())
 
     def test_invalid_collection(self):
         # Asking for an invalid collection should generate an error.
@@ -99,10 +155,57 @@ class ArvadosGetTestCase(run_test_server.TestCaseWithServers):
     def test_preexistent_destination(self):
         # Asking to place a file with the same path as a local one should
         # generate an error and avoid overwrites.
-        with open("{}/foo.txt".format(self.tempdir), "w") as f:
+        with open(os.path.join(self.tempdir, "foo.txt"), "w") as f:
             f.write("another foo")
         r = self.run_get(["{}/foo.txt".format(self.col_loc), self.tempdir])
         self.assertNotEqual(0, r)
-        with open("{}/foo.txt".format(self.tempdir), "r") as f:
+        with open(os.path.join(self.tempdir, "foo.txt"), "r") as f:
             self.assertEqual("another foo", f.read())
 
+    def test_no_progress_when_stderr_not_a_tty(self):
+        # Create a collection with a big file (>64MB) to force the progress
+        # to be printed
+        c = collection.Collection()
+        with c.open('bigfile.txt', 'wb') as f:
+            for _ in range(65):
+                f.write("x" * 1024 * 1024)
+        c.save_new()
+        tmpdir = self.make_tmpdir()
+        # Simulate a TTY stderr
+        stderr = mock.MagicMock()
+        stdout = tutil.BytesIO()
+
+        # Confirm that progress is written to stderr when is a tty
+        stderr.isatty.return_value = True
+        r = arv_get.main(['{}/bigfile.txt'.format(c.manifest_locator()),
+                          '{}/bigfile.txt'.format(tmpdir)],
+                         stdout, stderr)
+        self.assertEqual(0, r)
+        self.assertEqual(b'', stdout.getvalue())
+        self.assertTrue(stderr.write.called)
+
+        # Clean up and reset stderr mock
+        os.remove('{}/bigfile.txt'.format(tmpdir))
+        stderr = mock.MagicMock()
+        stdout = tutil.BytesIO()
+
+        # Confirm that progress is not written to stderr when isn't a tty
+        stderr.isatty.return_value = False
+        r = arv_get.main(['{}/bigfile.txt'.format(c.manifest_locator()),
+                          '{}/bigfile.txt'.format(tmpdir)],
+                         stdout, stderr)
+        self.assertEqual(0, r)
+        self.assertEqual(b'', stdout.getvalue())
+        self.assertFalse(stderr.write.called)
+
+    request_id_regex = r'INFO: X-Request-Id: req-[a-z0-9]{20}\n'
+
+    def test_request_id_logging_on(self):
+        r = self.run_get(["-v", "{}/".format(self.col_loc), self.tempdir])
+        self.assertEqual(0, r)
+        self.assertRegex(self.stderr.getvalue(), self.request_id_regex)
+
+    def test_request_id_logging_off(self):
+        r = self.run_get(["{}/".format(self.col_loc), self.tempdir])
+        self.assertEqual(0, r)
+        self.assertNotRegex(self.stderr.getvalue(), self.request_id_regex)