Fix manifest comparisons in new collection tests
[arvados.git] / sdk / python / tests / test_collections.py
index a56d4f68f157e0e76534da441c9707a8670960e7..b3df8803400f75f5684b7b0ee78bed91637ba530 100644 (file)
@@ -9,11 +9,11 @@ import arvados
 import copy
 import mock
 import os
-import pprint
 import random
 import re
 import sys
-import tempfile
+import datetime
+import ciso8601
 import time
 import unittest
 
@@ -320,7 +320,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         def __init__(self, content, num_retries=0):
             self.content = content
 
-        def get(self, locator, num_retries=0):
+        def get(self, locator, num_retries=0, prefetch=False):
             return self.content[locator]
 
     def test_stream_reader(self):
@@ -802,6 +802,18 @@ class CollectionMethods(run_test_server.TestCaseWithServers):
         self.assertEqual(fn0, c.items()[0][0])
         self.assertEqual(fn1, c.items()[1][0])
 
+    def test_get_properties(self):
+        c = Collection()
+        self.assertEqual(c.get_properties(), {})
+        c.save_new(properties={"foo":"bar"})
+        self.assertEqual(c.get_properties(), {"foo":"bar"})
+
+    def test_get_trash_at(self):
+        c = Collection()
+        self.assertEqual(c.get_trash_at(), None)
+        c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
+
 
 class CollectionOpenModes(run_test_server.TestCaseWithServers):
 
@@ -822,17 +834,58 @@ class CollectionOpenModes(run_test_server.TestCaseWithServers):
         with c.open('foo', 'wb') as f:
             f.write('foo')
         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
-            if sys.version_info >= (3, 0):
-                with self.assertRaises(NotImplementedError):
-                    c.open('foo', mode)
-            else:
-                with c.open('foo', mode) as f:
-                    if mode[0] == 'r' and '+' not in mode:
-                        self.assertEqual('foo', f.read(3))
-                    else:
-                        f.write('bar')
-                        f.seek(-3, os.SEEK_CUR)
-                        self.assertEqual('bar', f.read(3))
+            with c.open('foo', mode) as f:
+                if mode[0] == 'r' and '+' not in mode:
+                    self.assertEqual('foo', f.read(3))
+                else:
+                    f.write('bar')
+                    f.seek(0, os.SEEK_SET)
+                    self.assertEqual('bar', f.read(3))
+
+
+class TextModes(run_test_server.TestCaseWithServers):
+
+    def setUp(self):
+        arvados.config.KEEP_BLOCK_SIZE = 4
+        if sys.version_info < (3, 0):
+            import unicodedata
+            self.sailboat = unicodedata.lookup('SAILBOAT')
+            self.snowman = unicodedata.lookup('SNOWMAN')
+        else:
+            self.sailboat = '\N{SAILBOAT}'
+            self.snowman = '\N{SNOWMAN}'
+
+    def tearDown(self):
+        arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
+
+    def test_read_sailboat_across_block_boundary(self):
+        c = Collection()
+        f = c.open('sailboats', 'wb')
+        data = self.sailboat.encode('utf-8')
+        f.write(data)
+        f.write(data[:1])
+        f.write(data[1:])
+        f.write(b'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
+
+        f = c.open('sailboats', 'r')
+        string = f.readline()
+        self.assertEqual(string, self.sailboat+self.sailboat+'\n')
+        f.close()
+
+    def test_write_snowman_across_block_boundary(self):
+        c = Collection()
+        f = c.open('snowmany', 'w')
+        data = self.snowman
+        f.write(data+data+'\n'+data+'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
+
+        f = c.open('snowmany', 'r')
+        self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
+        self.assertEqual(f.readline(), self.snowman+'\n')
+        f.close()
 
 
 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
@@ -843,7 +896,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         c1.save_new()
         loc = c1.manifest_locator()
         c2 = Collection(loc)
-        self.assertEqual(c1.manifest_text, c2.manifest_text)
+        self.assertEqual(c1.manifest_text(), c2.manifest_text())
         self.assertEqual(c1.replication_desired, c2.replication_desired)
 
     def test_replication_desired_not_loaded_if_provided(self):
@@ -852,9 +905,40 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         c1.save_new()
         loc = c1.manifest_locator()
         c2 = Collection(loc, replication_desired=2)
-        self.assertEqual(c1.manifest_text, c2.manifest_text)
+        self.assertEqual(c1.manifest_text(), c2.manifest_text())
         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
 
+    def test_storage_classes_desired_kept_on_load(self):
+        m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
+        c1 = Collection(m, storage_classes_desired=['archival'])
+        c1.save_new()
+        loc = c1.manifest_locator()
+        c2 = Collection(loc)
+        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
+
+    def test_storage_classes_change_after_save(self):
+        m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
+        c1 = Collection(m, storage_classes_desired=['archival'])
+        c1.save_new()
+        loc = c1.manifest_locator()
+        c2 = Collection(loc)
+        self.assertEqual(['archival'], c2.storage_classes_desired())
+        c2.save(storage_classes=['highIO'])
+        self.assertEqual(['highIO'], c2.storage_classes_desired())
+        c3 = Collection(loc)
+        self.assertEqual(c1.manifest_text(), c3.manifest_text())
+        self.assertEqual(['highIO'], c3.storage_classes_desired())
+
+    def test_storage_classes_desired_not_loaded_if_provided(self):
+        m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
+        c1 = Collection(m, storage_classes_desired=['archival'])
+        c1.save_new()
+        loc = c1.manifest_locator()
+        c2 = Collection(loc, storage_classes_desired=['default'])
+        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
+
     def test_init_manifest(self):
         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
@@ -885,6 +969,20 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         with self.assertRaises(arvados.errors.ArgumentError):
             c.remove("")
 
+    def test_remove_recursive(self):
+        c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
+        self.assertIn("a", c)
+        self.assertEqual(1, len(c["a"].keys()))
+        # cannot remove non-empty directory with default recursive=False
+        with self.assertRaises(OSError):
+            c.remove("a/b")
+        with self.assertRaises(OSError):
+            c.remove("a/b/c/d")
+        c.remove("a/b", recursive=True)
+        self.assertEqual(0, len(c["a"].keys()))
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
+
     def test_find(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
         self.assertIs(c.find("."), c)
@@ -897,10 +995,49 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertIs(c.find("./nonexistant.txt"), None)
         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
 
+    def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertEqual(c.portable_manifest_text(), manifest)
+
+    def test_other_special_chars_on_file_token(self):
+        cases = [
+            ('\\000', '\0'),
+            ('\\011', '\t'),
+            ('\\012', '\n'),
+            ('\\072', ':'),
+            ('\\134400', '\\400'),
+        ]
+        for encoded, decoded in cases:
+            manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
+            c = Collection(manifest)
+            self.assertEqual(c.portable_manifest_text(), manifest)
+            self.assertIn('some%sfile.txt' % decoded, c.keys())
+
+    def test_escaped_paths_do_get_unescaped_on_listing(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertIn('\\056 Test', c.keys())
+        self.assertIn('\\056', c['\\056 Test'].keys())
+
+    def test_make_empty_dir_with_escaped_chars(self):
+        c = Collection()
+        c.mkdirs('./Empty\\056Dir')
+        self.assertEqual(c.portable_manifest_text(),
+                         './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+    def test_make_empty_dir_with_spaces(self):
+        c = Collection()
+        c.mkdirs('./foo bar/baz waz')
+        self.assertEqual(c.portable_manifest_text(),
+                         './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
@@ -1148,11 +1285,24 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
+    local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
+    remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
 
     def setUp(self):
         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
 
-    def test_repacked_block_submission_get_permission_token(self):
+    @mock.patch('arvados.keep.KeepClient.put', autospec=True)
+    def test_storage_classes_desired(self, put_mock):
+        put_mock.side_effect = self.keep_put
+        c = Collection(storage_classes_desired=['default'])
+        with c.open("file.txt", 'wb') as f:
+            f.write('content')
+        c.save_new()
+        _, kwargs = put_mock.call_args
+        self.assertEqual(['default'], kwargs['classes'])
+
+    @mock.patch('arvados.keep.KeepClient.put', autospec=True)
+    def test_repacked_block_submission_get_permission_token(self, mocked_put):
         '''
         Make sure that those blocks that are committed after repacking small ones,
         get their permission tokens assigned on the collection manifest.
@@ -1162,22 +1312,87 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
             time.sleep(1)
             return self.keep_put(*args, **kwargs)
 
-        re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
-
-        with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
-            mocked_put.side_effect = wrapped_keep_put
-            c = Collection()
-            # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
-            # small ones before finishing the upload.
-            for i in range(70):
-                f = c.open("file_{}.txt".format(i), 'wb')
-                f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
-                f.close(flush=False)
-            # We should get 2 blocks with their tokens
-            self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
+        mocked_put.side_effect = wrapped_keep_put
+        c = Collection()
+        # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
+        # small ones before finishing the upload.
+        for i in range(70):
+            f = c.open("file_{}.txt".format(i), 'wb')
+            f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
+            f.close(flush=False)
+        # We should get 2 blocks with their tokens
+        self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save_new(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 0)
+        c.save_new()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 1)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        # Remote collection
+        remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
+        # Local collection
+        local_c = Collection()
+        with local_c.open('barfile.txt', 'wb') as f:
+            f.write('bar')
+        local_c.save_new()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
+        # Copy remote file to local collection
+        local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
+        # Save local collection: remote block should be copied
+        local_c.save()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
 
 
 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
+    def test_preserve_version_on_save(self):
+        c = Collection()
+        c.save_new(preserve_version=True)
+        coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
+        self.assertEqual(coll_record['version'], 1)
+        self.assertEqual(coll_record['preserve_version'], True)
+        with c.open("foo.txt", "wb") as foo:
+            foo.write(b"foo")
+        c.save(preserve_version=True)
+        coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
+        self.assertEqual(coll_record['version'], 2)
+        self.assertEqual(coll_record['preserve_version'], True)
+        with c.open("bar.txt", "wb") as foo:
+            foo.write(b"bar")
+        c.save(preserve_version=False)
+        coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
+        self.assertEqual(coll_record['version'], 3)
+        self.assertEqual(coll_record['preserve_version'], False)
+
     def test_get_manifest_text_only_committed(self):
         c = Collection()
         with c.open("count.txt", "wb") as f:
@@ -1300,29 +1515,43 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
     def test_create_and_save(self):
         c = self.create_count_txt()
-        c.save(storage_classes=['archive'])
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['archive'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
 
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
 
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
-        c.save_new(storage_classes=['archive'])
+        c.save_new(properties={'type' : 'Intermediate'},
+                   storage_classes=['archive'],
+                   trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
 
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
 
-    def test_update_storage_classes_desired_if_collection_is_commited(self):
+    def test_create_and_save_after_commiting(self):
         c = self.create_count_txt()
-        c.save(storage_classes=['hot'])
-        c.save(storage_classes=['cold'])
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['hot'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        c.save(properties={'type' : 'Output'},
+               storage_classes=['cold'],
+               trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
 
         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
+        self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()