Merge branch '12418-glob-empty-collection' closes #12418
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import pprint
13 import random
14 import re
15 import sys
16 import tempfile
17 import time
18 import unittest
19
20 from . import run_test_server
21 from arvados._ranges import Range, LocatorAndRange
22 from arvados.collection import Collection, CollectionReader
23 from . import arvados_testutil as tutil
24
25 class TestResumableWriter(arvados.ResumableCollectionWriter):
26     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
27
28     def current_state(self):
29         return self.dump_state(copy.deepcopy)
30
31
32 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
33                              tutil.ArvadosBaseTestCase):
34     MAIN_SERVER = {}
35
36     @classmethod
37     def setUpClass(cls):
38         super(ArvadosCollectionsTest, cls).setUpClass()
39         run_test_server.authorize_with('active')
40         cls.api_client = arvados.api('v1')
41         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
42                                              local_store=cls.local_store)
43
44     def write_foo_bar_baz(self):
45         cw = arvados.CollectionWriter(self.api_client)
46         self.assertEqual(cw.current_stream_name(), '.',
47                          'current_stream_name() should be "." now')
48         cw.set_current_file_name('foo.txt')
49         cw.write(b'foo')
50         self.assertEqual(cw.current_file_name(), 'foo.txt',
51                          'current_file_name() should be foo.txt now')
52         cw.start_new_file('bar.txt')
53         cw.write(b'bar')
54         cw.start_new_stream('baz')
55         cw.write(b'baz')
56         cw.set_current_file_name('baz.txt')
57         self.assertEqual(cw.manifest_text(),
58                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
59                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
60                          "wrong manifest: got {}".format(cw.manifest_text()))
61         cw.finish()
62         return cw.portable_data_hash()
63
64     def test_pdh_is_native_str(self):
65         pdh = self.write_foo_bar_baz()
66         self.assertEqual(type(''), type(pdh))
67
68     def test_keep_local_store(self):
69         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
70         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
71
72     def test_local_collection_writer(self):
73         self.assertEqual(self.write_foo_bar_baz(),
74                          '23ca013983d6239e98931cc779e68426+114',
75                          'wrong locator hash: ' + self.write_foo_bar_baz())
76
77     def test_local_collection_reader(self):
78         foobarbaz = self.write_foo_bar_baz()
79         cr = arvados.CollectionReader(
80             foobarbaz + '+Xzizzle', self.api_client)
81         got = []
82         for s in cr.all_streams():
83             for f in s.all_files():
84                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
85         expected = [[3, '.', 'foo.txt', b'foo'],
86                     [3, '.', 'bar.txt', b'bar'],
87                     [3, './baz', 'baz.txt', b'baz']]
88         self.assertEqual(got,
89                          expected)
90         stream0 = cr.all_streams()[0]
91         self.assertEqual(stream0.readfrom(0, 0),
92                          b'',
93                          'reading zero bytes should have returned empty string')
94         self.assertEqual(stream0.readfrom(0, 2**26),
95                          b'foobar',
96                          'reading entire stream failed')
97         self.assertEqual(stream0.readfrom(2**26, 0),
98                          b'',
99                          'reading zero bytes should have returned empty string')
100         self.assertEqual(3, len(cr))
101         self.assertTrue(cr)
102
103     def _test_subset(self, collection, expected):
104         cr = arvados.CollectionReader(collection, self.api_client)
105         for s in cr.all_streams():
106             for ex in expected:
107                 if ex[0] == s:
108                     f = s.files()[ex[2]]
109                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
110                     self.assertEqual(got,
111                                      ex,
112                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
113
114     def test_collection_manifest_subset(self):
115         foobarbaz = self.write_foo_bar_baz()
116         self._test_subset(foobarbaz,
117                           [[3, '.',     'bar.txt', b'bar'],
118                            [3, '.',     'foo.txt', b'foo'],
119                            [3, './baz', 'baz.txt', b'baz']])
120         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
121                            (self.keep_client.put(b"foo"),
122                             self.keep_client.put(b"bar"))),
123                           [[3, '.', 'bar.txt', b'bar'],
124                            [3, '.', 'foo.txt', b'foo']])
125         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
126                            (self.keep_client.put(b"foo"),
127                             self.keep_client.put(b"bar"))),
128                           [[2, '.', 'fo.txt', b'fo'],
129                            [4, '.', 'obar.txt', b'obar']])
130         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
131                            (self.keep_client.put(b"foo"),
132                             self.keep_client.put(b"bar"))),
133                           [[2, '.', 'ar.txt', b'ar'],
134                            [2, '.', 'fo.txt', b'fo'],
135                            [2, '.', 'ob.txt', b'ob'],
136                            [0, '.', 'zero.txt', b'']])
137
138     def test_collection_empty_file(self):
139         cw = arvados.CollectionWriter(self.api_client)
140         cw.start_new_file('zero.txt')
141         cw.write(b'')
142
143         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
144         self.check_manifest_file_sizes(cw.manifest_text(), [0])
145         cw = arvados.CollectionWriter(self.api_client)
146         cw.start_new_file('zero.txt')
147         cw.write(b'')
148         cw.start_new_file('one.txt')
149         cw.write(b'1')
150         cw.start_new_stream('foo')
151         cw.start_new_file('zero.txt')
152         cw.write(b'')
153         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
154
155     def test_no_implicit_normalize(self):
156         cw = arvados.CollectionWriter(self.api_client)
157         cw.start_new_file('b')
158         cw.write(b'b')
159         cw.start_new_file('a')
160         cw.write(b'')
161         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
162         self.check_manifest_file_sizes(
163             arvados.CollectionReader(
164                 cw.manifest_text()).manifest_text(normalize=True),
165             [0,1])
166
167     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
168         cr = arvados.CollectionReader(manifest_text, self.api_client)
169         got_sizes = []
170         for f in cr.all_files():
171             got_sizes += [f.size()]
172         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
173
174     def test_normalized_collection(self):
175         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
176 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
177 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
178 """
179         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
180                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
181 """)
182
183         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
184 """
185         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
186
187         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
188 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
189 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
190 """
191         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
192                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
193 """)
194
195         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
196 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
197 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
198 """
199         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
200                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
201 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
202 """)
203
204         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
205 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
206 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
207 """
208         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
209                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
210 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
211 """)
212
213         with self.data_file('1000G_ref_manifest') as f6:
214             m6 = f6.read()
215             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
216
217         with self.data_file('jlake_manifest') as f7:
218             m7 = f7.read()
219             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
220
221         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
222 """
223         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
224
225     def test_locators_and_ranges(self):
226         blocks2 = [Range('a', 0, 10),
227                    Range('b', 10, 10),
228                    Range('c', 20, 10),
229                    Range('d', 30, 10),
230                    Range('e', 40, 10),
231                    Range('f', 50, 10)]
232
233         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
234         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
235         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
236         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
237         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
238         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
241
242         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
247         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
250
251         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
256         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
257
258
259         blocks3 = [Range('a', 0, 10),
260                   Range('b', 10, 10),
261                   Range('c', 20, 10),
262                   Range('d', 30, 10),
263                   Range('e', 40, 10),
264                   Range('f', 50, 10),
265                    Range('g', 60, 10)]
266
267         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
268         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
269         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
270         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
271         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
272         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
274
275
276         blocks = [Range('a', 0, 10),
277                   Range('b', 10, 15),
278                   Range('c', 25, 5)]
279         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
280         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
281         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
282         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
283
284         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
285                                                                       LocatorAndRange('b', 15, 0, 1)])
286         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
287                                                                       LocatorAndRange('b', 15, 0, 2)])
288         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
289                                                                       LocatorAndRange('b', 15, 0, 15)])
290
291         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
292                                                                       LocatorAndRange('b', 15, 0, 15),
293                                                                       LocatorAndRange('c', 5, 0, 5)])
294         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
295                                                                       LocatorAndRange('b', 15, 0, 15),
296                                                                       LocatorAndRange('c', 5, 0, 5)])
297         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
298                                                                       LocatorAndRange('b', 15, 0, 15),
299                                                                       LocatorAndRange('c', 5, 0, 5)])
300
301         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
302
303         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
304                                                                       LocatorAndRange('b', 15, 0, 15)])
305
306         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
307                                                                       LocatorAndRange('b', 15, 0, 15),
308                                                                       LocatorAndRange('c', 5, 0, 3)])
309
310         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
311
312         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
313                                                                       LocatorAndRange('b', 15, 0, 14)])
314         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
315         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
316                                                                        LocatorAndRange('c', 5, 0, 1)])
317
318     class MockKeep(object):
319         def __init__(self, content, num_retries=0):
320             self.content = content
321
322         def get(self, locator, num_retries=0):
323             return self.content[locator]
324
325     def test_stream_reader(self):
326         keepblocks = {
327             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
328             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
329             'cccccccccccccccccccccccccccccccc+5': b'z0123',
330         }
331         mk = self.MockKeep(keepblocks)
332
333         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
334
335         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
336
337         self.assertEqual(sr.readfrom(0, 30), content[0:30])
338         self.assertEqual(sr.readfrom(2, 30), content[2:30])
339
340         self.assertEqual(sr.readfrom(2, 8), content[2:10])
341         self.assertEqual(sr.readfrom(0, 10), content[0:10])
342
343         self.assertEqual(sr.readfrom(0, 5), content[0:5])
344         self.assertEqual(sr.readfrom(5, 5), content[5:10])
345         self.assertEqual(sr.readfrom(10, 5), content[10:15])
346         self.assertEqual(sr.readfrom(15, 5), content[15:20])
347         self.assertEqual(sr.readfrom(20, 5), content[20:25])
348         self.assertEqual(sr.readfrom(25, 5), content[25:30])
349         self.assertEqual(sr.readfrom(30, 5), b'')
350
351     def test_extract_file(self):
352         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
353 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
354 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
355 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
356 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
357 """
358
359         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
360
361         self.assertEqual(m2,
362                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
363         files = arvados.CollectionReader(
364             m2, self.api_client).all_streams()[0].files()
365
366         self.assertEqual(files['md5sum.txt'].as_manifest(),
367                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
368         self.assertEqual(files['md6sum.txt'].as_manifest(),
369                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
370         self.assertEqual(files['md7sum.txt'].as_manifest(),
371                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
372         self.assertEqual(files['md9sum.txt'].as_manifest(),
373                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
374
375     def test_write_directory_tree(self):
376         cwriter = arvados.CollectionWriter(self.api_client)
377         cwriter.write_directory_tree(self.build_directory_tree(
378                 ['basefile', 'subdir/subfile']))
379         self.assertEqual(cwriter.manifest_text(),
380                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
381 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
382
383     def test_write_named_directory_tree(self):
384         cwriter = arvados.CollectionWriter(self.api_client)
385         cwriter.write_directory_tree(self.build_directory_tree(
386                 ['basefile', 'subdir/subfile']), 'root')
387         self.assertEqual(
388             cwriter.manifest_text(),
389             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
390 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
391
392     def test_write_directory_tree_in_one_stream(self):
393         cwriter = arvados.CollectionWriter(self.api_client)
394         cwriter.write_directory_tree(self.build_directory_tree(
395                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
396         self.assertEqual(cwriter.manifest_text(),
397                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
398
399     def test_write_directory_tree_with_limited_recursion(self):
400         cwriter = arvados.CollectionWriter(self.api_client)
401         cwriter.write_directory_tree(
402             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
403             max_manifest_depth=1)
404         self.assertEqual(cwriter.manifest_text(),
405                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
406 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
407
408     def test_write_directory_tree_with_zero_recursion(self):
409         cwriter = arvados.CollectionWriter(self.api_client)
410         content = 'd1/d2/f3d1/f2f1'
411         blockhash = tutil.str_keep_locator(content)
412         cwriter.write_directory_tree(
413             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
414             max_manifest_depth=0)
415         self.assertEqual(
416             cwriter.manifest_text(),
417             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
418
419     def test_write_one_file(self):
420         cwriter = arvados.CollectionWriter(self.api_client)
421         with self.make_test_file() as testfile:
422             cwriter.write_file(testfile.name)
423             self.assertEqual(
424                 cwriter.manifest_text(),
425                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
426                     os.path.basename(testfile.name)))
427
428     def test_write_named_file(self):
429         cwriter = arvados.CollectionWriter(self.api_client)
430         with self.make_test_file() as testfile:
431             cwriter.write_file(testfile.name, 'foo')
432             self.assertEqual(cwriter.manifest_text(),
433                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
434
435     def test_write_multiple_files(self):
436         cwriter = arvados.CollectionWriter(self.api_client)
437         for letter in 'ABC':
438             with self.make_test_file(letter.encode()) as testfile:
439                 cwriter.write_file(testfile.name, letter)
440         self.assertEqual(
441             cwriter.manifest_text(),
442             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
443
444     def test_basic_resume(self):
445         cwriter = TestResumableWriter()
446         with self.make_test_file() as testfile:
447             cwriter.write_file(testfile.name, 'test')
448             resumed = TestResumableWriter.from_state(cwriter.current_state())
449         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
450                           "resumed CollectionWriter had different manifest")
451
452     def test_resume_fails_when_missing_dependency(self):
453         cwriter = TestResumableWriter()
454         with self.make_test_file() as testfile:
455             cwriter.write_file(testfile.name, 'test')
456         self.assertRaises(arvados.errors.StaleWriterStateError,
457                           TestResumableWriter.from_state,
458                           cwriter.current_state())
459
460     def test_resume_fails_when_dependency_mtime_changed(self):
461         cwriter = TestResumableWriter()
462         with self.make_test_file() as testfile:
463             cwriter.write_file(testfile.name, 'test')
464             os.utime(testfile.name, (0, 0))
465             self.assertRaises(arvados.errors.StaleWriterStateError,
466                               TestResumableWriter.from_state,
467                               cwriter.current_state())
468
469     def test_resume_fails_when_dependency_is_nonfile(self):
470         cwriter = TestResumableWriter()
471         cwriter.write_file('/dev/null', 'empty')
472         self.assertRaises(arvados.errors.StaleWriterStateError,
473                           TestResumableWriter.from_state,
474                           cwriter.current_state())
475
476     def test_resume_fails_when_dependency_size_changed(self):
477         cwriter = TestResumableWriter()
478         with self.make_test_file() as testfile:
479             cwriter.write_file(testfile.name, 'test')
480             orig_mtime = os.fstat(testfile.fileno()).st_mtime
481             testfile.write(b'extra')
482             testfile.flush()
483             os.utime(testfile.name, (orig_mtime, orig_mtime))
484             self.assertRaises(arvados.errors.StaleWriterStateError,
485                               TestResumableWriter.from_state,
486                               cwriter.current_state())
487
488     def test_resume_fails_with_expired_locator(self):
489         cwriter = TestResumableWriter()
490         state = cwriter.current_state()
491         # Add an expired locator to the state.
492         state['_current_stream_locators'].append(''.join([
493                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
494         self.assertRaises(arvados.errors.StaleWriterStateError,
495                           TestResumableWriter.from_state, state)
496
497     def test_arbitrary_objects_not_resumable(self):
498         cwriter = TestResumableWriter()
499         with open('/dev/null') as badfile:
500             self.assertRaises(arvados.errors.AssertionError,
501                               cwriter.write_file, badfile)
502
503     def test_arbitrary_writes_not_resumable(self):
504         cwriter = TestResumableWriter()
505         self.assertRaises(arvados.errors.AssertionError,
506                           cwriter.write, "badtext")
507
508     def test_read_arbitrary_data_with_collection_reader(self):
509         # arv-get relies on this to do "arv-get {keep-locator} -".
510         self.write_foo_bar_baz()
511         self.assertEqual(
512             'foobar',
513             arvados.CollectionReader(
514                 '3858f62230ac3c915f300c664312c63f+6'
515                 ).manifest_text())
516
517
518 class CollectionTestMixin(tutil.ApiClientMock):
519     API_COLLECTIONS = run_test_server.fixture('collections')
520     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
521     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
522     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
523     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
524     ALT_COLLECTION = API_COLLECTIONS['bar_file']
525     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
526     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
527
528     def api_client_mock(self, status=200):
529         client = super(CollectionTestMixin, self).api_client_mock()
530         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
531         return client
532
533
534 @tutil.skip_sleep
535 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
536     def mock_get_collection(self, api_mock, code, fixturename):
537         body = self.API_COLLECTIONS.get(fixturename)
538         self._mock_api_call(api_mock.collections().get, code, body)
539
540     def api_client_mock(self, status=200):
541         client = super(CollectionReaderTestCase, self).api_client_mock()
542         self.mock_get_collection(client, status, 'foo_file')
543         return client
544
545     def test_init_no_default_retries(self):
546         client = self.api_client_mock(200)
547         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
548         reader.manifest_text()
549         client.collections().get().execute.assert_called_with(num_retries=0)
550
551     def test_uuid_init_success(self):
552         client = self.api_client_mock(200)
553         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
554                                           num_retries=3)
555         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
556                          reader.manifest_text())
557         client.collections().get().execute.assert_called_with(num_retries=3)
558
559     def test_uuid_init_failure_raises_api_error(self):
560         client = self.api_client_mock(500)
561         with self.assertRaises(arvados.errors.ApiError):
562             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
563
564     def test_locator_init(self):
565         client = self.api_client_mock(200)
566         # Ensure Keep will not return anything if asked.
567         with tutil.mock_keep_responses(None, 404):
568             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
569                                               api_client=client)
570             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
571
572     def test_locator_init_fallback_to_keep(self):
573         # crunch-job needs this to read manifests that have only ever
574         # been written to Keep.
575         client = self.api_client_mock(200)
576         self.mock_get_collection(client, 404, None)
577         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
578             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
579                                               api_client=client)
580             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
581
582     def test_uuid_init_no_fallback_to_keep(self):
583         # Do not look up a collection UUID in Keep.
584         client = self.api_client_mock(404)
585         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
586             with self.assertRaises(arvados.errors.ApiError):
587                 reader = arvados.CollectionReader(self.DEFAULT_UUID,
588                                                   api_client=client)
589
590     def test_try_keep_first_if_permission_hint(self):
591         # To verify that CollectionReader tries Keep first here, we
592         # mock API server to return the wrong data.
593         client = self.api_client_mock(200)
594         with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
595             self.assertEqual(
596                 self.ALT_MANIFEST,
597                 arvados.CollectionReader(
598                     self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98',
599                     api_client=client).manifest_text())
600
601     def test_init_num_retries_propagated(self):
602         # More of an integration test...
603         client = self.api_client_mock(200)
604         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
605                                           num_retries=3)
606         with tutil.mock_keep_responses('foo', 500, 500, 200):
607             self.assertEqual(b'foo',
608                              b''.join(f.read(9) for f in reader.all_files()))
609
610     def test_read_nonnormalized_manifest_with_collection_reader(self):
611         # client should be able to use CollectionReader on a manifest without normalizing it
612         client = self.api_client_mock(500)
613         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
614         reader = arvados.CollectionReader(
615             nonnormal,
616             api_client=client, num_retries=0)
617         # Ensure stripped_manifest() doesn't mangle our manifest in
618         # any way other than stripping hints.
619         self.assertEqual(
620             re.sub('\+[^\d\s\+]+', '', nonnormal),
621             reader.stripped_manifest())
622         # Ensure stripped_manifest() didn't mutate our reader.
623         self.assertEqual(nonnormal, reader.manifest_text())
624         # Ensure the files appear in the order given in the manifest.
625         self.assertEqual(
626             [[6, '.', 'foo.txt'],
627              [0, '.', 'bar.txt']],
628             [[f.size(), f.stream_name(), f.name()]
629              for f in reader.all_streams()[0].all_files()])
630
631     def test_read_empty_collection(self):
632         client = self.api_client_mock(200)
633         self.mock_get_collection(client, 200, 'empty')
634         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
635                                           api_client=client)
636         self.assertEqual('', reader.manifest_text())
637         self.assertEqual(0, len(reader))
638         self.assertFalse(reader)
639
640     def test_api_response(self):
641         client = self.api_client_mock()
642         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
643         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
644
645     def test_api_response_with_collection_from_keep(self):
646         client = self.api_client_mock()
647         self.mock_get_collection(client, 404, 'foo')
648         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
649             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
650                                               api_client=client)
651             api_response = reader.api_response()
652         self.assertIsNone(api_response)
653
654     def check_open_file(self, coll_file, stream_name, file_name, file_size):
655         self.assertFalse(coll_file.closed, "returned file is not open")
656         self.assertEqual(stream_name, coll_file.stream_name())
657         self.assertEqual(file_name, coll_file.name)
658         self.assertEqual(file_size, coll_file.size())
659
660     def test_open_collection_file_one_argument(self):
661         client = self.api_client_mock(200)
662         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
663         cfile = reader.open('./foo', 'rb')
664         self.check_open_file(cfile, '.', 'foo', 3)
665
666     def test_open_deep_file(self):
667         coll_name = 'collection_with_files_in_subdir'
668         client = self.api_client_mock(200)
669         self.mock_get_collection(client, 200, coll_name)
670         reader = arvados.CollectionReader(
671             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
672         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
673         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
674                              32)
675
676     def test_open_nonexistent_stream(self):
677         client = self.api_client_mock(200)
678         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
679         self.assertRaises(IOError, reader.open, './nonexistent/foo')
680
681     def test_open_nonexistent_file(self):
682         client = self.api_client_mock(200)
683         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
684         self.assertRaises(IOError, reader.open, 'nonexistent')
685
686
687 @tutil.skip_sleep
688 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
689     def mock_keep(self, body, *codes, **headers):
690         headers.setdefault('x-keep-replicas-stored', 2)
691         return tutil.mock_keep_responses(body, *codes, **headers)
692
693     def foo_writer(self, **kwargs):
694         kwargs.setdefault('api_client', self.api_client_mock())
695         writer = arvados.CollectionWriter(**kwargs)
696         writer.start_new_file('foo')
697         writer.write(b'foo')
698         return writer
699
700     def test_write_whole_collection(self):
701         writer = self.foo_writer()
702         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
703             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
704
705     def test_write_no_default(self):
706         writer = self.foo_writer()
707         with self.mock_keep(None, 500):
708             with self.assertRaises(arvados.errors.KeepWriteError):
709                 writer.finish()
710
711     def test_write_insufficient_replicas_via_proxy(self):
712         writer = self.foo_writer(replication=3)
713         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
714             with self.assertRaises(arvados.errors.KeepWriteError):
715                 writer.manifest_text()
716
717     def test_write_insufficient_replicas_via_disks(self):
718         client = mock.MagicMock(name='api_client')
719         with self.mock_keep(
720                 None, 200, 200,
721                 **{'x-keep-replicas-stored': 1}) as keepmock:
722             self.mock_keep_services(client, status=200, service_type='disk', count=2)
723             writer = self.foo_writer(api_client=client, replication=3)
724             with self.assertRaises(arvados.errors.KeepWriteError):
725                 writer.manifest_text()
726
727     def test_write_three_replicas(self):
728         client = mock.MagicMock(name='api_client')
729         with self.mock_keep(
730                 "", 500, 500, 500, 200, 200, 200,
731                 **{'x-keep-replicas-stored': 1}) as keepmock:
732             self.mock_keep_services(client, status=200, service_type='disk', count=6)
733             writer = self.foo_writer(api_client=client, replication=3)
734             writer.manifest_text()
735             self.assertEqual(6, keepmock.call_count)
736
737     def test_write_whole_collection_through_retries(self):
738         writer = self.foo_writer(num_retries=2)
739         with self.mock_keep(self.DEFAULT_DATA_HASH,
740                             500, 500, 200, 500, 500, 200):
741             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
742
743     def test_flush_data_retries(self):
744         writer = self.foo_writer(num_retries=2)
745         foo_hash = self.DEFAULT_MANIFEST.split()[1]
746         with self.mock_keep(foo_hash, 500, 200):
747             writer.flush_data()
748         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
749
750     def test_one_open(self):
751         client = self.api_client_mock()
752         writer = arvados.CollectionWriter(client)
753         with writer.open('out') as out_file:
754             self.assertEqual('.', writer.current_stream_name())
755             self.assertEqual('out', writer.current_file_name())
756             out_file.write(b'test data')
757             data_loc = tutil.str_keep_locator('test data')
758         self.assertTrue(out_file.closed, "writer file not closed after context")
759         self.assertRaises(ValueError, out_file.write, 'extra text')
760         with self.mock_keep(data_loc, 200) as keep_mock:
761             self.assertEqual(". {} 0:9:out\n".format(data_loc),
762                              writer.manifest_text())
763
764     def test_open_writelines(self):
765         client = self.api_client_mock()
766         writer = arvados.CollectionWriter(client)
767         with writer.open('six') as out_file:
768             out_file.writelines(['12', '34', '56'])
769             data_loc = tutil.str_keep_locator('123456')
770         with self.mock_keep(data_loc, 200) as keep_mock:
771             self.assertEqual(". {} 0:6:six\n".format(data_loc),
772                              writer.manifest_text())
773
774     def test_open_flush(self):
775         client = self.api_client_mock()
776         data_loc1 = tutil.str_keep_locator('flush1')
777         data_loc2 = tutil.str_keep_locator('flush2')
778         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
779             writer = arvados.CollectionWriter(client)
780             with writer.open('flush_test') as out_file:
781                 out_file.write(b'flush1')
782                 out_file.flush()
783                 out_file.write(b'flush2')
784             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
785                                                                 data_loc2),
786                              writer.manifest_text())
787
788     def test_two_opens_same_stream(self):
789         client = self.api_client_mock()
790         writer = arvados.CollectionWriter(client)
791         with writer.open('.', '1') as out_file:
792             out_file.write(b'1st')
793         with writer.open('.', '2') as out_file:
794             out_file.write(b'2nd')
795         data_loc = tutil.str_keep_locator('1st2nd')
796         with self.mock_keep(data_loc, 200) as keep_mock:
797             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
798                              writer.manifest_text())
799
800     def test_two_opens_two_streams(self):
801         client = self.api_client_mock()
802         data_loc1 = tutil.str_keep_locator('file')
803         data_loc2 = tutil.str_keep_locator('indir')
804         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
805             writer = arvados.CollectionWriter(client)
806             with writer.open('file') as out_file:
807                 out_file.write(b'file')
808             with writer.open('./dir', 'indir') as out_file:
809                 out_file.write(b'indir')
810             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
811                 data_loc1, data_loc2)
812             self.assertEqual(expected, writer.manifest_text())
813
814     def test_dup_open_fails(self):
815         client = self.api_client_mock()
816         writer = arvados.CollectionWriter(client)
817         file1 = writer.open('one')
818         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
819
820
821 class CollectionMethods(run_test_server.TestCaseWithServers):
822
823     def test_keys_values_items_support_indexing(self):
824         c = Collection()
825         with c.open('foo', 'wb') as f:
826             f.write(b'foo')
827         with c.open('bar', 'wb') as f:
828             f.write(b'bar')
829         self.assertEqual(2, len(c.keys()))
830         if sys.version_info < (3, 0):
831             # keys() supports indexing only for python2 callers.
832             fn0 = c.keys()[0]
833             fn1 = c.keys()[1]
834         else:
835             fn0, fn1 = c.keys()
836         self.assertEqual(2, len(c.values()))
837         f0 = c.values()[0]
838         f1 = c.values()[1]
839         self.assertEqual(2, len(c.items()))
840         self.assertEqual(fn0, c.items()[0][0])
841         self.assertEqual(fn1, c.items()[1][0])
842
843
844 class CollectionOpenModes(run_test_server.TestCaseWithServers):
845
846     def test_open_binary_modes(self):
847         c = Collection()
848         for mode in ['wb', 'wb+', 'ab', 'ab+']:
849             with c.open('foo', mode) as f:
850                 f.write(b'foo')
851
852     def test_open_invalid_modes(self):
853         c = Collection()
854         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
855             with self.assertRaises(Exception):
856                 c.open('foo', mode)
857
858     def test_open_text_modes(self):
859         c = Collection()
860         with c.open('foo', 'wb') as f:
861             f.write('foo')
862         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
863             if sys.version_info >= (3, 0):
864                 with self.assertRaises(NotImplementedError):
865                     c.open('foo', mode)
866             else:
867                 with c.open('foo', mode) as f:
868                     if mode[0] == 'r' and '+' not in mode:
869                         self.assertEqual('foo', f.read(3))
870                     else:
871                         f.write('bar')
872                         f.seek(-3, os.SEEK_CUR)
873                         self.assertEqual('bar', f.read(3))
874
875
876 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
877
878     def test_replication_desired_kept_on_load(self):
879         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
880         c1 = Collection(m, replication_desired=1)
881         c1.save_new()
882         loc = c1.manifest_locator()
883         c2 = Collection(loc)
884         self.assertEqual(c1.manifest_text, c2.manifest_text)
885         self.assertEqual(c1.replication_desired, c2.replication_desired)
886
887     def test_replication_desired_not_loaded_if_provided(self):
888         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
889         c1 = Collection(m, replication_desired=1)
890         c1.save_new()
891         loc = c1.manifest_locator()
892         c2 = Collection(loc, replication_desired=2)
893         self.assertEqual(c1.manifest_text, c2.manifest_text)
894         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
895
896     def test_init_manifest(self):
897         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
898 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
899 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
900 """
901         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
902         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
903
904     def test_init_manifest_with_collision(self):
905         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
906 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
907 """
908         with self.assertRaises(arvados.errors.ArgumentError):
909             self.assertEqual(m1, CollectionReader(m1))
910
911     def test_init_manifest_with_error(self):
912         m1 = """. 0:43:md5sum.txt"""
913         with self.assertRaises(arvados.errors.ArgumentError):
914             self.assertEqual(m1, CollectionReader(m1))
915
916     def test_remove(self):
917         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
918         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
919         self.assertIn("count1.txt", c)
920         c.remove("count1.txt")
921         self.assertNotIn("count1.txt", c)
922         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
923         with self.assertRaises(arvados.errors.ArgumentError):
924             c.remove("")
925
926     def test_find(self):
927         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
928         self.assertIs(c.find("."), c)
929         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
930         self.assertIs(c.find("count1.txt"), c["count1.txt"])
931         with self.assertRaises(IOError):
932             c.find("/.")
933         with self.assertRaises(arvados.errors.ArgumentError):
934             c.find("")
935         self.assertIs(c.find("./nonexistant.txt"), None)
936         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
937
938     def test_remove_in_subdir(self):
939         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
940         c.remove("foo/count2.txt")
941         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
942
943     def test_remove_empty_subdir(self):
944         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
945         c.remove("foo/count2.txt")
946         c.remove("foo")
947         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
948
949     def test_remove_nonempty_subdir(self):
950         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
951         with self.assertRaises(IOError):
952             c.remove("foo")
953         c.remove("foo", recursive=True)
954         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
955
956     def test_copy_to_file_in_dir(self):
957         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
958         c.copy("count1.txt", "foo/count2.txt")
959         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
960
961     def test_copy_file(self):
962         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
963         c.copy("count1.txt", "count2.txt")
964         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
965
966     def test_copy_to_existing_dir(self):
967         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
968         c.copy("count1.txt", "foo")
969         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
970
971     def test_copy_to_new_dir(self):
972         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
973         c.copy("count1.txt", "foo/")
974         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
975
976     def test_rename_file(self):
977         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
978         c.rename("count1.txt", "count2.txt")
979         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
980
981     def test_move_file_to_dir(self):
982         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
983         c.mkdirs("foo")
984         c.rename("count1.txt", "foo/count2.txt")
985         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
986
987     def test_move_file_to_other(self):
988         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
989         c2 = Collection()
990         c2.rename("count1.txt", "count2.txt", source_collection=c1)
991         self.assertEqual("", c1.manifest_text())
992         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
993
994     def test_clone(self):
995         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
996         cl = c.clone()
997         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
998
999     def test_diff_del_add(self):
1000         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1001         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1002         d = c2.diff(c1)
1003         self.assertEqual(sorted(d), [
1004             ('add', './count1.txt', c1["count1.txt"]),
1005             ('del', './count2.txt', c2["count2.txt"]),
1006         ])
1007         d = c1.diff(c2)
1008         self.assertEqual(sorted(d), [
1009             ('add', './count2.txt', c2["count2.txt"]),
1010             ('del', './count1.txt', c1["count1.txt"]),
1011         ])
1012         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1013         c1.apply(d)
1014         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1015
1016     def test_diff_same(self):
1017         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1018         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1019         d = c2.diff(c1)
1020         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1021         d = c1.diff(c2)
1022         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1023
1024         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1025         c1.apply(d)
1026         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1027
1028     def test_diff_mod(self):
1029         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1030         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1031         d = c2.diff(c1)
1032         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1033         d = c1.diff(c2)
1034         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1035
1036         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1037         c1.apply(d)
1038         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1039
1040     def test_diff_add(self):
1041         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1042         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1043         d = c2.diff(c1)
1044         self.assertEqual(sorted(d), [
1045             ('del', './count2.txt', c2["count2.txt"]),
1046             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1047         ])
1048         d = c1.diff(c2)
1049         self.assertEqual(sorted(d), [
1050             ('add', './count2.txt', c2["count2.txt"]),
1051             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1052         ])
1053
1054         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1055         c1.apply(d)
1056         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1057
1058     def test_diff_add_in_subcollection(self):
1059         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1060         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1061         d = c2.diff(c1)
1062         self.assertEqual(sorted(d), [
1063             ('del', './foo', c2["foo"]),
1064             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1065         ])
1066         d = c1.diff(c2)
1067         self.assertEqual(sorted(d), [
1068             ('add', './foo', c2["foo"]),
1069             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1070         ])
1071         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1072         c1.apply(d)
1073         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1074
1075     def test_diff_del_add_in_subcollection(self):
1076         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1077         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1078         d = c2.diff(c1)
1079         self.assertEqual(sorted(d), [
1080             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1081             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1082             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1083         ])
1084         d = c1.diff(c2)
1085         self.assertEqual(sorted(d), [
1086             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1087             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1088             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1089         ])
1090
1091         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1092         c1.apply(d)
1093         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1094
1095     def test_diff_mod_in_subcollection(self):
1096         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1097         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1098         d = c2.diff(c1)
1099         self.assertEqual(sorted(d), [
1100             ('mod', './foo', c2["foo"], c1["foo"]),
1101             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1102         ])
1103         d = c1.diff(c2)
1104         self.assertEqual(sorted(d), [
1105             ('mod', './foo', c1["foo"], c2["foo"]),
1106             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1107         ])
1108
1109         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1110         c1.apply(d)
1111         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1112
1113     def test_conflict_keep_local_change(self):
1114         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1115         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1116         d = c1.diff(c2)
1117         self.assertEqual(sorted(d), [
1118             ('add', './count2.txt', c2["count2.txt"]),
1119             ('del', './count1.txt', c1["count1.txt"]),
1120         ])
1121         f = c1.open("count1.txt", "wb")
1122         f.write(b"zzzzz")
1123
1124         # c1 changed, so it should not be deleted.
1125         c1.apply(d)
1126         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1127
1128     def test_conflict_mod(self):
1129         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1130         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1131         d = c1.diff(c2)
1132         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1133         f = c1.open("count1.txt", "wb")
1134         f.write(b"zzzzz")
1135
1136         # c1 changed, so c2 mod will go to a conflict file
1137         c1.apply(d)
1138         self.assertRegex(
1139             c1.portable_manifest_text(),
1140             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1141
1142     def test_conflict_add(self):
1143         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1144         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1145         d = c1.diff(c2)
1146         self.assertEqual(sorted(d), [
1147             ('add', './count1.txt', c2["count1.txt"]),
1148             ('del', './count2.txt', c1["count2.txt"]),
1149         ])
1150         f = c1.open("count1.txt", "wb")
1151         f.write(b"zzzzz")
1152
1153         # c1 added count1.txt, so c2 add will go to a conflict file
1154         c1.apply(d)
1155         self.assertRegex(
1156             c1.portable_manifest_text(),
1157             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1158
1159     def test_conflict_del(self):
1160         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1161         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1162         d = c1.diff(c2)
1163         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1164         c1.remove("count1.txt")
1165
1166         # c1 deleted, so c2 mod will go to a conflict file
1167         c1.apply(d)
1168         self.assertRegex(
1169             c1.portable_manifest_text(),
1170             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1171
1172     def test_notify(self):
1173         c1 = Collection()
1174         events = []
1175         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1176         f = c1.open("foo.txt", "wb")
1177         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1178
1179     def test_open_w(self):
1180         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1181         self.assertEqual(c1["count1.txt"].size(), 10)
1182         c1.open("count1.txt", "wb").close()
1183         self.assertEqual(c1["count1.txt"].size(), 0)
1184
1185
1186 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1187     MAIN_SERVER = {}
1188     KEEP_SERVER = {}
1189
1190     def setUp(self):
1191         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1192
1193     def test_repacked_block_submission_get_permission_token(self):
1194         '''
1195         Make sure that those blocks that are committed after repacking small ones,
1196         get their permission tokens assigned on the collection manifest.
1197         '''
1198         def wrapped_keep_put(*args, **kwargs):
1199             # Simulate slow put operations
1200             time.sleep(1)
1201             return self.keep_put(*args, **kwargs)
1202
1203         re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1204
1205         with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
1206             mocked_put.side_effect = wrapped_keep_put
1207             c = Collection()
1208             # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1209             # small ones before finishing the upload.
1210             for i in range(70):
1211                 f = c.open("file_{}.txt".format(i), 'wb')
1212                 f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1213                 f.close(flush=False)
1214             # We should get 2 blocks with their tokens
1215             self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
1216
1217
1218 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1219     def test_get_manifest_text_only_committed(self):
1220         c = Collection()
1221         with c.open("count.txt", "wb") as f:
1222             # One file committed
1223             with c.open("foo.txt", "wb") as foo:
1224                 foo.write(b"foo")
1225                 foo.flush() # Force block commit
1226             f.write(b"0123456789")
1227             # Other file not committed. Block not written to keep yet.
1228             self.assertEqual(
1229                 c._get_manifest_text(".",
1230                                      strip=False,
1231                                      normalize=False,
1232                                      only_committed=True),
1233                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1234             # And now with the file closed...
1235             f.flush() # Force block commit
1236         self.assertEqual(
1237             c._get_manifest_text(".",
1238                                  strip=False,
1239                                  normalize=False,
1240                                  only_committed=True),
1241             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1242
1243     def test_only_small_blocks_are_packed_together(self):
1244         c = Collection()
1245         # Write a couple of small files,
1246         f = c.open("count.txt", "wb")
1247         f.write(b"0123456789")
1248         f.close(flush=False)
1249         foo = c.open("foo.txt", "wb")
1250         foo.write(b"foo")
1251         foo.close(flush=False)
1252         # Then, write a big file, it shouldn't be packed with the ones above
1253         big = c.open("bigfile.txt", "wb")
1254         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1255         big.close(flush=False)
1256         self.assertEqual(
1257             c.manifest_text("."),
1258             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1259
1260     def test_flush_after_small_block_packing(self):
1261         c = Collection()
1262         # Write a couple of small files,
1263         f = c.open("count.txt", "wb")
1264         f.write(b"0123456789")
1265         f.close(flush=False)
1266         foo = c.open("foo.txt", "wb")
1267         foo.write(b"foo")
1268         foo.close(flush=False)
1269
1270         self.assertEqual(
1271             c.manifest_text(),
1272             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1273
1274         f = c.open("count.txt", "rb+")
1275         f.close(flush=True)
1276
1277         self.assertEqual(
1278             c.manifest_text(),
1279             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1280
1281     def test_write_after_small_block_packing2(self):
1282         c = Collection()
1283         # Write a couple of small files,
1284         f = c.open("count.txt", "wb")
1285         f.write(b"0123456789")
1286         f.close(flush=False)
1287         foo = c.open("foo.txt", "wb")
1288         foo.write(b"foo")
1289         foo.close(flush=False)
1290
1291         self.assertEqual(
1292             c.manifest_text(),
1293             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1294
1295         f = c.open("count.txt", "rb+")
1296         f.write(b"abc")
1297         f.close(flush=False)
1298
1299         self.assertEqual(
1300             c.manifest_text(),
1301             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1302
1303
1304     def test_small_block_packing_with_overwrite(self):
1305         c = Collection()
1306         c.open("b1", "wb").close()
1307         c["b1"].writeto(0, b"b1", 0)
1308
1309         c.open("b2", "wb").close()
1310         c["b2"].writeto(0, b"b2", 0)
1311
1312         c["b1"].writeto(0, b"1b", 0)
1313
1314         self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1315         self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1316         self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1317
1318
1319 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1320     MAIN_SERVER = {}
1321     KEEP_SERVER = {}
1322
1323     def create_count_txt(self):
1324         # Create an empty collection, save it to the API server, then write a
1325         # file, but don't save it.
1326
1327         c = Collection()
1328         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1329         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1330         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1331
1332         with c.open("count.txt", "wb") as f:
1333             f.write(b"0123456789")
1334
1335         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1336
1337         return c
1338
1339     def test_create_and_save(self):
1340         c = self.create_count_txt()
1341         c.save()
1342         self.assertRegex(
1343             c.manifest_text(),
1344             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1345
1346     def test_create_and_save_new(self):
1347         c = self.create_count_txt()
1348         c.save_new()
1349         self.assertRegex(
1350             c.manifest_text(),
1351             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1352
1353     def test_create_diff_apply(self):
1354         c1 = self.create_count_txt()
1355         c1.save()
1356
1357         c2 = Collection(c1.manifest_locator())
1358         with c2.open("count.txt", "wb") as f:
1359             f.write(b"abcdefg")
1360
1361         diff = c1.diff(c2)
1362
1363         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1364
1365         c1.apply(diff)
1366         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1367
1368     def test_diff_apply_with_token(self):
1369         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1370         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1371         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1372
1373         diff = baseline.diff(other)
1374         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1375
1376         c.apply(diff)
1377
1378         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1379
1380
1381     def test_create_and_update(self):
1382         c1 = self.create_count_txt()
1383         c1.save()
1384
1385         c2 = arvados.collection.Collection(c1.manifest_locator())
1386         with c2.open("count.txt", "wb") as f:
1387             f.write(b"abcdefg")
1388
1389         c2.save()
1390
1391         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1392         c1.update()
1393         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1394
1395
1396     def test_create_and_update_with_conflict(self):
1397         c1 = self.create_count_txt()
1398         c1.save()
1399
1400         with c1.open("count.txt", "wb") as f:
1401             f.write(b"XYZ")
1402
1403         c2 = arvados.collection.Collection(c1.manifest_locator())
1404         with c2.open("count.txt", "wb") as f:
1405             f.write(b"abcdefg")
1406
1407         c2.save()
1408
1409         c1.update()
1410         self.assertRegex(
1411             c1.manifest_text(),
1412             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1413
1414     def test_pdh_is_native_str(self):
1415         c1 = self.create_count_txt()
1416         pdh = c1.portable_data_hash()
1417         self.assertEqual(type(''), type(pdh))
1418
1419
1420 if __name__ == '__main__':
1421     unittest.main()