+ self.assertRegex(
+ c1.portable_manifest_text(),
+ r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+
+ def test_notify(self):
+ c1 = Collection()
+ events = []
+ c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
+ f = c1.open("foo.txt", "wb")
+ self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
+
+ def test_open_w(self):
+ c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
+ self.assertEqual(c1["count1.txt"].size(), 10)
+ c1.open("count1.txt", "wb").close()
+ self.assertEqual(c1["count1.txt"].size(), 0)
+
+
+class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
+ MAIN_SERVER = {}
+ KEEP_SERVER = {}
+
+ def setUp(self):
+ self.keep_put = getattr(arvados.keep.KeepClient, 'put')
+
+ def test_repacked_block_submission_get_permission_token(self):
+ '''
+ Make sure that those blocks that are committed after repacking small ones,
+ get their permission tokens assigned on the collection manifest.
+ '''
+ def wrapped_keep_put(*args, **kwargs):
+ # Simulate slow put operations
+ time.sleep(1)
+ return self.keep_put(*args, **kwargs)
+
+ re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
+
+ with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
+ mocked_put.side_effect = wrapped_keep_put
+ c = Collection()
+ # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
+ # small ones before finishing the upload.
+ for i in range(70):
+ f = c.open("file_{}.txt".format(i), 'wb')
+ f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
+ f.close(flush=False)
+ # We should get 2 blocks with their tokens
+ self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
+
+
+class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
+ def test_get_manifest_text_only_committed(self):
+ c = Collection()
+ with c.open("count.txt", "wb") as f:
+ # One file committed
+ with c.open("foo.txt", "wb") as foo:
+ foo.write(b"foo")
+ foo.flush() # Force block commit
+ f.write(b"0123456789")
+ # Other file not committed. Block not written to keep yet.
+ self.assertEqual(
+ c._get_manifest_text(".",
+ strip=False,
+ normalize=False,
+ only_committed=True),
+ '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
+ # And now with the file closed...
+ f.flush() # Force block commit
+ self.assertEqual(
+ c._get_manifest_text(".",
+ strip=False,
+ normalize=False,
+ only_committed=True),
+ ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
+
+ def test_only_small_blocks_are_packed_together(self):
+ c = Collection()
+ # Write a couple of small files,
+ f = c.open("count.txt", "wb")
+ f.write(b"0123456789")
+ f.close(flush=False)
+ foo = c.open("foo.txt", "wb")
+ foo.write(b"foo")
+ foo.close(flush=False)
+ # Then, write a big file, it shouldn't be packed with the ones above
+ big = c.open("bigfile.txt", "wb")
+ big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
+ big.close(flush=False)
+ self.assertEqual(
+ c.manifest_text("."),
+ '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
+
+ def test_flush_after_small_block_packing(self):
+ c = Collection()
+ # Write a couple of small files,
+ f = c.open("count.txt", "wb")
+ f.write(b"0123456789")
+ f.close(flush=False)
+ foo = c.open("foo.txt", "wb")
+ foo.write(b"foo")
+ foo.close(flush=False)
+
+ self.assertEqual(
+ c.manifest_text(),
+ '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+ f = c.open("count.txt", "rb+")
+ f.close(flush=True)
+
+ self.assertEqual(
+ c.manifest_text(),
+ '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+ def test_write_after_small_block_packing2(self):
+ c = Collection()
+ # Write a couple of small files,
+ f = c.open("count.txt", "wb")
+ f.write(b"0123456789")
+ f.close(flush=False)
+ foo = c.open("foo.txt", "wb")
+ foo.write(b"foo")
+ foo.close(flush=False)
+
+ self.assertEqual(
+ c.manifest_text(),
+ '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
+
+ f = c.open("count.txt", "rb+")
+ f.write(b"abc")
+ f.close(flush=False)
+
+ self.assertEqual(
+ c.manifest_text(),
+ '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
+
+
+ def test_small_block_packing_with_overwrite(self):
+ c = Collection()
+ c.open("b1", "wb").close()
+ c["b1"].writeto(0, b"b1", 0)
+
+ c.open("b2", "wb").close()
+ c["b2"].writeto(0, b"b2", 0)
+
+ c["b1"].writeto(0, b"1b", 0)
+
+ self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
+ self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
+ self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
+
+
+class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
+ MAIN_SERVER = {}
+ KEEP_SERVER = {}
+
+ def create_count_txt(self):
+ # Create an empty collection, save it to the API server, then write a
+ # file, but don't save it.
+
+ c = Collection()
+ c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
+ self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
+ self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
+
+ with c.open("count.txt", "wb") as f:
+ f.write(b"0123456789")
+
+ self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+
+ return c
+
+ def test_create_and_save(self):
+ c = self.create_count_txt()
+ c.save()
+ self.assertRegex(
+ c.manifest_text(),
+ r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+
+ def test_create_and_save_new(self):
+ c = self.create_count_txt()
+ c.save_new()
+ self.assertRegex(
+ c.manifest_text(),
+ r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+
+ def test_create_diff_apply(self):
+ c1 = self.create_count_txt()
+ c1.save()
+
+ c2 = Collection(c1.manifest_locator())
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
+
+ diff = c1.diff(c2)
+
+ self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
+
+ c1.apply(diff)
+ self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
+
+ def test_diff_apply_with_token(self):
+ baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
+ c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+ other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
+
+ diff = baseline.diff(other)
+ self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
+
+ c.apply(diff)
+
+ self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
+
+
+ def test_create_and_update(self):
+ c1 = self.create_count_txt()
+ c1.save()
+
+ c2 = arvados.collection.Collection(c1.manifest_locator())
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
+
+ c2.save()
+
+ self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
+ c1.update()
+ self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
+
+
+ def test_create_and_update_with_conflict(self):
+ c1 = self.create_count_txt()
+ c1.save()
+
+ with c1.open("count.txt", "wb") as f:
+ f.write(b"XYZ")
+
+ c2 = arvados.collection.Collection(c1.manifest_locator())
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
+
+ c2.save()
+
+ c1.update()
+ self.assertRegex(
+ c1.manifest_text(),
+ r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+
+ def test_pdh_is_native_str(self):
+ c1 = self.create_count_txt()
+ pdh = c1.portable_data_hash()
+ self.assertEqual(type(''), type(pdh))