15954: Fix api pidfile race.
[arvados.git] / sdk / python / tests / test_collections.py
index 4e777bdf5f94c12992bcb9eb8bf61d264fbce364..66f062c167250b2873ea6a047554b5986f17ee46 100644 (file)
@@ -14,6 +14,8 @@ import random
 import re
 import sys
 import tempfile
 import re
 import sys
 import tempfile
+import datetime
+import ciso8601
 import time
 import unittest
 
 import time
 import unittest
 
@@ -802,6 +804,18 @@ class CollectionMethods(run_test_server.TestCaseWithServers):
         self.assertEqual(fn0, c.items()[0][0])
         self.assertEqual(fn1, c.items()[1][0])
 
         self.assertEqual(fn0, c.items()[0][0])
         self.assertEqual(fn1, c.items()[1][0])
 
+    def test_get_properties(self):
+        c = Collection()
+        self.assertEqual(c.get_properties(), {})
+        c.save_new(properties={"foo":"bar"})
+        self.assertEqual(c.get_properties(), {"foo":"bar"})
+
+    def test_get_trash_at(self):
+        c = Collection()
+        self.assertEqual(c.get_trash_at(), None)
+        c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
+
 
 class CollectionOpenModes(run_test_server.TestCaseWithServers):
 
 
 class CollectionOpenModes(run_test_server.TestCaseWithServers):
 
@@ -822,17 +836,58 @@ class CollectionOpenModes(run_test_server.TestCaseWithServers):
         with c.open('foo', 'wb') as f:
             f.write('foo')
         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
         with c.open('foo', 'wb') as f:
             f.write('foo')
         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
-            if sys.version_info >= (3, 0):
-                with self.assertRaises(NotImplementedError):
-                    c.open('foo', mode)
-            else:
-                with c.open('foo', mode) as f:
-                    if mode[0] == 'r' and '+' not in mode:
-                        self.assertEqual('foo', f.read(3))
-                    else:
-                        f.write('bar')
-                        f.seek(-3, os.SEEK_CUR)
-                        self.assertEqual('bar', f.read(3))
+            with c.open('foo', mode) as f:
+                if mode[0] == 'r' and '+' not in mode:
+                    self.assertEqual('foo', f.read(3))
+                else:
+                    f.write('bar')
+                    f.seek(0, os.SEEK_SET)
+                    self.assertEqual('bar', f.read(3))
+
+
+class TextModes(run_test_server.TestCaseWithServers):
+
+    def setUp(self):
+        arvados.config.KEEP_BLOCK_SIZE = 4
+        if sys.version_info < (3, 0):
+            import unicodedata
+            self.sailboat = unicodedata.lookup('SAILBOAT')
+            self.snowman = unicodedata.lookup('SNOWMAN')
+        else:
+            self.sailboat = '\N{SAILBOAT}'
+            self.snowman = '\N{SNOWMAN}'
+
+    def tearDown(self):
+        arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
+
+    def test_read_sailboat_across_block_boundary(self):
+        c = Collection()
+        f = c.open('sailboats', 'wb')
+        data = self.sailboat.encode('utf-8')
+        f.write(data)
+        f.write(data[:1])
+        f.write(data[1:])
+        f.write(b'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
+
+        f = c.open('sailboats', 'r')
+        string = f.readline()
+        self.assertEqual(string, self.sailboat+self.sailboat+'\n')
+        f.close()
+
+    def test_write_snowman_across_block_boundary(self):
+        c = Collection()
+        f = c.open('snowmany', 'w')
+        data = self.snowman
+        f.write(data+data+'\n'+data+'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
+
+        f = c.open('snowmany', 'r')
+        self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
+        self.assertEqual(f.readline(), self.snowman+'\n')
+        f.close()
 
 
 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
 
 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
@@ -897,10 +952,49 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertIs(c.find("./nonexistant.txt"), None)
         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
 
         self.assertIs(c.find("./nonexistant.txt"), None)
         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
 
+    def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertEqual(c.portable_manifest_text(), manifest)
+
+    def test_other_special_chars_on_file_token(self):
+        cases = [
+            ('\\000', '\0'),
+            ('\\011', '\t'),
+            ('\\012', '\n'),
+            ('\\072', ':'),
+            ('\\134400', '\\400'),
+        ]
+        for encoded, decoded in cases:
+            manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
+            c = Collection(manifest)
+            self.assertEqual(c.portable_manifest_text(), manifest)
+            self.assertIn('some%sfile.txt' % decoded, c.keys())
+
+    def test_escaped_paths_do_get_unescaped_on_listing(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertIn('\\056 Test', c.keys())
+        self.assertIn('\\056', c['\\056 Test'].keys())
+
+    def test_make_empty_dir_with_escaped_chars(self):
+        c = Collection()
+        c.mkdirs('./Empty\\056Dir')
+        self.assertEqual(c.portable_manifest_text(),
+                         './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+    def test_make_empty_dir_with_spaces(self):
+        c = Collection()
+        c.mkdirs('./foo bar/baz waz')
+        self.assertEqual(c.portable_manifest_text(),
+                         './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
@@ -1148,11 +1242,14 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
+    local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
+    remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
 
     def setUp(self):
         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
 
 
     def setUp(self):
         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
 
-    def test_repacked_block_submission_get_permission_token(self):
+    @mock.patch('arvados.keep.KeepClient.put', autospec=True)
+    def test_repacked_block_submission_get_permission_token(self, mocked_put):
         '''
         Make sure that those blocks that are committed after repacking small ones,
         get their permission tokens assigned on the collection manifest.
         '''
         Make sure that those blocks that are committed after repacking small ones,
         get their permission tokens assigned on the collection manifest.
@@ -1162,19 +1259,65 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
             time.sleep(1)
             return self.keep_put(*args, **kwargs)
 
             time.sleep(1)
             return self.keep_put(*args, **kwargs)
 
-        re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
-
-        with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
-            mocked_put.side_effect = wrapped_keep_put
-            c = Collection()
-            # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
-            # small ones before finishing the upload.
-            for i in range(70):
-                f = c.open("file_{}.txt".format(i), 'wb')
-                f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
-                f.close(flush=False)
-            # We should get 2 blocks with their tokens
-            self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
+        mocked_put.side_effect = wrapped_keep_put
+        c = Collection()
+        # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
+        # small ones before finishing the upload.
+        for i in range(70):
+            f = c.open("file_{}.txt".format(i), 'wb')
+            f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
+            f.close(flush=False)
+        # We should get 2 blocks with their tokens
+        self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save_new(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 0)
+        c.save_new()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 1)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        # Remote collection
+        remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
+        # Local collection
+        local_c = Collection()
+        with local_c.open('barfile.txt', 'wb') as f:
+            f.write('bar')
+        local_c.save_new()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
+        # Copy remote file to local collection
+        local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
+        # Save local collection: remote block should be copied
+        local_c.save()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
 
 
 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
 
 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
@@ -1273,9 +1416,9 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
         c["b1"].writeto(0, b"1b", 0)
 
 
         c["b1"].writeto(0, b"1b", 0)
 
-        self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
-        self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
-        self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
+        self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
+        self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
+        self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
@@ -1300,17 +1443,43 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
     def test_create_and_save(self):
         c = self.create_count_txt()
 
     def test_create_and_save(self):
         c = self.create_count_txt()
-        c.save()
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['archive'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
+
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
-        c.save_new()
+        c.save_new(properties={'type' : 'Intermediate'},
+                   storage_classes=['archive'],
+                   trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
+
+    def test_create_and_save_after_commiting(self):
+        c = self.create_count_txt()
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['hot'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        c.save(properties={'type' : 'Output'},
+               storage_classes=['cold'],
+               trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
+
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
+        self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()