Merge branch '8784-dir-listings'
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import pprint
13 import random
14 import re
15 import sys
16 import tempfile
17 import time
18 import unittest
19
20 from . import run_test_server
21 from arvados._ranges import Range, LocatorAndRange
22 from arvados.collection import Collection, CollectionReader
23 from . import arvados_testutil as tutil
24
25 class TestResumableWriter(arvados.ResumableCollectionWriter):
26     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
27
28     def current_state(self):
29         return self.dump_state(copy.deepcopy)
30
31
32 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
33                              tutil.ArvadosBaseTestCase):
34     MAIN_SERVER = {}
35
36     @classmethod
37     def setUpClass(cls):
38         super(ArvadosCollectionsTest, cls).setUpClass()
39         run_test_server.authorize_with('active')
40         cls.api_client = arvados.api('v1')
41         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
42                                              local_store=cls.local_store)
43
44     def write_foo_bar_baz(self):
45         cw = arvados.CollectionWriter(self.api_client)
46         self.assertEqual(cw.current_stream_name(), '.',
47                          'current_stream_name() should be "." now')
48         cw.set_current_file_name('foo.txt')
49         cw.write(b'foo')
50         self.assertEqual(cw.current_file_name(), 'foo.txt',
51                          'current_file_name() should be foo.txt now')
52         cw.start_new_file('bar.txt')
53         cw.write(b'bar')
54         cw.start_new_stream('baz')
55         cw.write(b'baz')
56         cw.set_current_file_name('baz.txt')
57         self.assertEqual(cw.manifest_text(),
58                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
59                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
60                          "wrong manifest: got {}".format(cw.manifest_text()))
61         cw.finish()
62         return cw.portable_data_hash()
63
64     def test_pdh_is_native_str(self):
65         pdh = self.write_foo_bar_baz()
66         self.assertEqual(type(''), type(pdh))
67
68     def test_keep_local_store(self):
69         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
70         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
71
72     def test_local_collection_writer(self):
73         self.assertEqual(self.write_foo_bar_baz(),
74                          '23ca013983d6239e98931cc779e68426+114',
75                          'wrong locator hash: ' + self.write_foo_bar_baz())
76
77     def test_local_collection_reader(self):
78         foobarbaz = self.write_foo_bar_baz()
79         cr = arvados.CollectionReader(
80             foobarbaz + '+Xzizzle', self.api_client)
81         got = []
82         for s in cr.all_streams():
83             for f in s.all_files():
84                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
85         expected = [[3, '.', 'foo.txt', b'foo'],
86                     [3, '.', 'bar.txt', b'bar'],
87                     [3, './baz', 'baz.txt', b'baz']]
88         self.assertEqual(got,
89                          expected)
90         stream0 = cr.all_streams()[0]
91         self.assertEqual(stream0.readfrom(0, 0),
92                          b'',
93                          'reading zero bytes should have returned empty string')
94         self.assertEqual(stream0.readfrom(0, 2**26),
95                          b'foobar',
96                          'reading entire stream failed')
97         self.assertEqual(stream0.readfrom(2**26, 0),
98                          b'',
99                          'reading zero bytes should have returned empty string')
100
101     def _test_subset(self, collection, expected):
102         cr = arvados.CollectionReader(collection, self.api_client)
103         for s in cr.all_streams():
104             for ex in expected:
105                 if ex[0] == s:
106                     f = s.files()[ex[2]]
107                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
108                     self.assertEqual(got,
109                                      ex,
110                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
111
112     def test_collection_manifest_subset(self):
113         foobarbaz = self.write_foo_bar_baz()
114         self._test_subset(foobarbaz,
115                           [[3, '.',     'bar.txt', b'bar'],
116                            [3, '.',     'foo.txt', b'foo'],
117                            [3, './baz', 'baz.txt', b'baz']])
118         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
119                            (self.keep_client.put(b"foo"),
120                             self.keep_client.put(b"bar"))),
121                           [[3, '.', 'bar.txt', b'bar'],
122                            [3, '.', 'foo.txt', b'foo']])
123         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
124                            (self.keep_client.put(b"foo"),
125                             self.keep_client.put(b"bar"))),
126                           [[2, '.', 'fo.txt', b'fo'],
127                            [4, '.', 'obar.txt', b'obar']])
128         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
129                            (self.keep_client.put(b"foo"),
130                             self.keep_client.put(b"bar"))),
131                           [[2, '.', 'ar.txt', b'ar'],
132                            [2, '.', 'fo.txt', b'fo'],
133                            [2, '.', 'ob.txt', b'ob'],
134                            [0, '.', 'zero.txt', b'']])
135
136     def test_collection_empty_file(self):
137         cw = arvados.CollectionWriter(self.api_client)
138         cw.start_new_file('zero.txt')
139         cw.write(b'')
140
141         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
142         self.check_manifest_file_sizes(cw.manifest_text(), [0])
143         cw = arvados.CollectionWriter(self.api_client)
144         cw.start_new_file('zero.txt')
145         cw.write(b'')
146         cw.start_new_file('one.txt')
147         cw.write(b'1')
148         cw.start_new_stream('foo')
149         cw.start_new_file('zero.txt')
150         cw.write(b'')
151         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
152
153     def test_no_implicit_normalize(self):
154         cw = arvados.CollectionWriter(self.api_client)
155         cw.start_new_file('b')
156         cw.write(b'b')
157         cw.start_new_file('a')
158         cw.write(b'')
159         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
160         self.check_manifest_file_sizes(
161             arvados.CollectionReader(
162                 cw.manifest_text()).manifest_text(normalize=True),
163             [0,1])
164
165     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
166         cr = arvados.CollectionReader(manifest_text, self.api_client)
167         got_sizes = []
168         for f in cr.all_files():
169             got_sizes += [f.size()]
170         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
171
172     def test_normalized_collection(self):
173         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
174 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
175 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
176 """
177         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
178                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
179 """)
180
181         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
182 """
183         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
184
185         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
186 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
187 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
188 """
189         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
190                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
191 """)
192
193         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
194 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
195 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
196 """
197         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
198                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
199 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
200 """)
201
202         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
203 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
204 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
205 """
206         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
207                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
208 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
209 """)
210
211         with self.data_file('1000G_ref_manifest') as f6:
212             m6 = f6.read()
213             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
214
215         with self.data_file('jlake_manifest') as f7:
216             m7 = f7.read()
217             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
218
219         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
220 """
221         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
222
223     def test_locators_and_ranges(self):
224         blocks2 = [Range('a', 0, 10),
225                    Range('b', 10, 10),
226                    Range('c', 20, 10),
227                    Range('d', 30, 10),
228                    Range('e', 40, 10),
229                    Range('f', 50, 10)]
230
231         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
232         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
233         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
234         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
235         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
236         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
237         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
238         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
239
240         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
247         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
248
249         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
255
256
257         blocks3 = [Range('a', 0, 10),
258                   Range('b', 10, 10),
259                   Range('c', 20, 10),
260                   Range('d', 30, 10),
261                   Range('e', 40, 10),
262                   Range('f', 50, 10),
263                    Range('g', 60, 10)]
264
265         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
266         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
267         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
268         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
269         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
270         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
271         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
272
273
274         blocks = [Range('a', 0, 10),
275                   Range('b', 10, 15),
276                   Range('c', 25, 5)]
277         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
278         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
279         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
280         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
281
282         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
283                                                                       LocatorAndRange('b', 15, 0, 1)])
284         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
285                                                                       LocatorAndRange('b', 15, 0, 2)])
286         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
287                                                                       LocatorAndRange('b', 15, 0, 15)])
288
289         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
290                                                                       LocatorAndRange('b', 15, 0, 15),
291                                                                       LocatorAndRange('c', 5, 0, 5)])
292         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
293                                                                       LocatorAndRange('b', 15, 0, 15),
294                                                                       LocatorAndRange('c', 5, 0, 5)])
295         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
296                                                                       LocatorAndRange('b', 15, 0, 15),
297                                                                       LocatorAndRange('c', 5, 0, 5)])
298
299         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
300
301         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
302                                                                       LocatorAndRange('b', 15, 0, 15)])
303
304         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
305                                                                       LocatorAndRange('b', 15, 0, 15),
306                                                                       LocatorAndRange('c', 5, 0, 3)])
307
308         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
309
310         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
311                                                                       LocatorAndRange('b', 15, 0, 14)])
312         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
313         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
314                                                                        LocatorAndRange('c', 5, 0, 1)])
315
316     class MockKeep(object):
317         def __init__(self, content, num_retries=0):
318             self.content = content
319
320         def get(self, locator, num_retries=0):
321             return self.content[locator]
322
323     def test_stream_reader(self):
324         keepblocks = {
325             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
326             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
327             'cccccccccccccccccccccccccccccccc+5': b'z0123',
328         }
329         mk = self.MockKeep(keepblocks)
330
331         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
332
333         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
334
335         self.assertEqual(sr.readfrom(0, 30), content[0:30])
336         self.assertEqual(sr.readfrom(2, 30), content[2:30])
337
338         self.assertEqual(sr.readfrom(2, 8), content[2:10])
339         self.assertEqual(sr.readfrom(0, 10), content[0:10])
340
341         self.assertEqual(sr.readfrom(0, 5), content[0:5])
342         self.assertEqual(sr.readfrom(5, 5), content[5:10])
343         self.assertEqual(sr.readfrom(10, 5), content[10:15])
344         self.assertEqual(sr.readfrom(15, 5), content[15:20])
345         self.assertEqual(sr.readfrom(20, 5), content[20:25])
346         self.assertEqual(sr.readfrom(25, 5), content[25:30])
347         self.assertEqual(sr.readfrom(30, 5), b'')
348
349     def test_extract_file(self):
350         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
351 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
352 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
353 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
354 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
355 """
356
357         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
358
359         self.assertEqual(m2,
360                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
361         files = arvados.CollectionReader(
362             m2, self.api_client).all_streams()[0].files()
363
364         self.assertEqual(files['md5sum.txt'].as_manifest(),
365                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
366         self.assertEqual(files['md6sum.txt'].as_manifest(),
367                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
368         self.assertEqual(files['md7sum.txt'].as_manifest(),
369                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
370         self.assertEqual(files['md9sum.txt'].as_manifest(),
371                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
372
373     def test_write_directory_tree(self):
374         cwriter = arvados.CollectionWriter(self.api_client)
375         cwriter.write_directory_tree(self.build_directory_tree(
376                 ['basefile', 'subdir/subfile']))
377         self.assertEqual(cwriter.manifest_text(),
378                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
379 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
380
381     def test_write_named_directory_tree(self):
382         cwriter = arvados.CollectionWriter(self.api_client)
383         cwriter.write_directory_tree(self.build_directory_tree(
384                 ['basefile', 'subdir/subfile']), 'root')
385         self.assertEqual(
386             cwriter.manifest_text(),
387             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
388 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
389
390     def test_write_directory_tree_in_one_stream(self):
391         cwriter = arvados.CollectionWriter(self.api_client)
392         cwriter.write_directory_tree(self.build_directory_tree(
393                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
394         self.assertEqual(cwriter.manifest_text(),
395                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
396
397     def test_write_directory_tree_with_limited_recursion(self):
398         cwriter = arvados.CollectionWriter(self.api_client)
399         cwriter.write_directory_tree(
400             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
401             max_manifest_depth=1)
402         self.assertEqual(cwriter.manifest_text(),
403                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
404 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
405
406     def test_write_directory_tree_with_zero_recursion(self):
407         cwriter = arvados.CollectionWriter(self.api_client)
408         content = 'd1/d2/f3d1/f2f1'
409         blockhash = tutil.str_keep_locator(content)
410         cwriter.write_directory_tree(
411             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
412             max_manifest_depth=0)
413         self.assertEqual(
414             cwriter.manifest_text(),
415             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
416
417     def test_write_one_file(self):
418         cwriter = arvados.CollectionWriter(self.api_client)
419         with self.make_test_file() as testfile:
420             cwriter.write_file(testfile.name)
421             self.assertEqual(
422                 cwriter.manifest_text(),
423                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
424                     os.path.basename(testfile.name)))
425
426     def test_write_named_file(self):
427         cwriter = arvados.CollectionWriter(self.api_client)
428         with self.make_test_file() as testfile:
429             cwriter.write_file(testfile.name, 'foo')
430             self.assertEqual(cwriter.manifest_text(),
431                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
432
433     def test_write_multiple_files(self):
434         cwriter = arvados.CollectionWriter(self.api_client)
435         for letter in 'ABC':
436             with self.make_test_file(letter.encode()) as testfile:
437                 cwriter.write_file(testfile.name, letter)
438         self.assertEqual(
439             cwriter.manifest_text(),
440             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
441
442     def test_basic_resume(self):
443         cwriter = TestResumableWriter()
444         with self.make_test_file() as testfile:
445             cwriter.write_file(testfile.name, 'test')
446             resumed = TestResumableWriter.from_state(cwriter.current_state())
447         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
448                           "resumed CollectionWriter had different manifest")
449
450     def test_resume_fails_when_missing_dependency(self):
451         cwriter = TestResumableWriter()
452         with self.make_test_file() as testfile:
453             cwriter.write_file(testfile.name, 'test')
454         self.assertRaises(arvados.errors.StaleWriterStateError,
455                           TestResumableWriter.from_state,
456                           cwriter.current_state())
457
458     def test_resume_fails_when_dependency_mtime_changed(self):
459         cwriter = TestResumableWriter()
460         with self.make_test_file() as testfile:
461             cwriter.write_file(testfile.name, 'test')
462             os.utime(testfile.name, (0, 0))
463             self.assertRaises(arvados.errors.StaleWriterStateError,
464                               TestResumableWriter.from_state,
465                               cwriter.current_state())
466
467     def test_resume_fails_when_dependency_is_nonfile(self):
468         cwriter = TestResumableWriter()
469         cwriter.write_file('/dev/null', 'empty')
470         self.assertRaises(arvados.errors.StaleWriterStateError,
471                           TestResumableWriter.from_state,
472                           cwriter.current_state())
473
474     def test_resume_fails_when_dependency_size_changed(self):
475         cwriter = TestResumableWriter()
476         with self.make_test_file() as testfile:
477             cwriter.write_file(testfile.name, 'test')
478             orig_mtime = os.fstat(testfile.fileno()).st_mtime
479             testfile.write(b'extra')
480             testfile.flush()
481             os.utime(testfile.name, (orig_mtime, orig_mtime))
482             self.assertRaises(arvados.errors.StaleWriterStateError,
483                               TestResumableWriter.from_state,
484                               cwriter.current_state())
485
486     def test_resume_fails_with_expired_locator(self):
487         cwriter = TestResumableWriter()
488         state = cwriter.current_state()
489         # Add an expired locator to the state.
490         state['_current_stream_locators'].append(''.join([
491                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
492         self.assertRaises(arvados.errors.StaleWriterStateError,
493                           TestResumableWriter.from_state, state)
494
495     def test_arbitrary_objects_not_resumable(self):
496         cwriter = TestResumableWriter()
497         with open('/dev/null') as badfile:
498             self.assertRaises(arvados.errors.AssertionError,
499                               cwriter.write_file, badfile)
500
501     def test_arbitrary_writes_not_resumable(self):
502         cwriter = TestResumableWriter()
503         self.assertRaises(arvados.errors.AssertionError,
504                           cwriter.write, "badtext")
505
506     def test_read_arbitrary_data_with_collection_reader(self):
507         # arv-get relies on this to do "arv-get {keep-locator} -".
508         self.write_foo_bar_baz()
509         self.assertEqual(
510             'foobar',
511             arvados.CollectionReader(
512                 '3858f62230ac3c915f300c664312c63f+6'
513                 ).manifest_text())
514
515
516 class CollectionTestMixin(tutil.ApiClientMock):
517     API_COLLECTIONS = run_test_server.fixture('collections')
518     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
519     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
520     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
521     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
522     ALT_COLLECTION = API_COLLECTIONS['bar_file']
523     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
524     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
525
526     def api_client_mock(self, status=200):
527         client = super(CollectionTestMixin, self).api_client_mock()
528         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
529         return client
530
531
532 @tutil.skip_sleep
533 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
534     def mock_get_collection(self, api_mock, code, fixturename):
535         body = self.API_COLLECTIONS.get(fixturename)
536         self._mock_api_call(api_mock.collections().get, code, body)
537
538     def api_client_mock(self, status=200):
539         client = super(CollectionReaderTestCase, self).api_client_mock()
540         self.mock_get_collection(client, status, 'foo_file')
541         return client
542
543     def test_init_no_default_retries(self):
544         client = self.api_client_mock(200)
545         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
546         reader.manifest_text()
547         client.collections().get().execute.assert_called_with(num_retries=0)
548
549     def test_uuid_init_success(self):
550         client = self.api_client_mock(200)
551         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
552                                           num_retries=3)
553         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
554                          reader.manifest_text())
555         client.collections().get().execute.assert_called_with(num_retries=3)
556
557     def test_uuid_init_failure_raises_api_error(self):
558         client = self.api_client_mock(500)
559         with self.assertRaises(arvados.errors.ApiError):
560             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
561
562     def test_locator_init(self):
563         client = self.api_client_mock(200)
564         # Ensure Keep will not return anything if asked.
565         with tutil.mock_keep_responses(None, 404):
566             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
567                                               api_client=client)
568             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
569
570     def test_locator_init_fallback_to_keep(self):
571         # crunch-job needs this to read manifests that have only ever
572         # been written to Keep.
573         client = self.api_client_mock(200)
574         self.mock_get_collection(client, 404, None)
575         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
576             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
577                                               api_client=client)
578             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
579
580     def test_uuid_init_no_fallback_to_keep(self):
581         # Do not look up a collection UUID in Keep.
582         client = self.api_client_mock(404)
583         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
584             with self.assertRaises(arvados.errors.ApiError):
585                 reader = arvados.CollectionReader(self.DEFAULT_UUID,
586                                                   api_client=client)
587
588     def test_try_keep_first_if_permission_hint(self):
589         # To verify that CollectionReader tries Keep first here, we
590         # mock API server to return the wrong data.
591         client = self.api_client_mock(200)
592         with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
593             self.assertEqual(
594                 self.ALT_MANIFEST,
595                 arvados.CollectionReader(
596                     self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98',
597                     api_client=client).manifest_text())
598
599     def test_init_num_retries_propagated(self):
600         # More of an integration test...
601         client = self.api_client_mock(200)
602         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
603                                           num_retries=3)
604         with tutil.mock_keep_responses('foo', 500, 500, 200):
605             self.assertEqual(b'foo',
606                              b''.join(f.read(9) for f in reader.all_files()))
607
608     def test_read_nonnormalized_manifest_with_collection_reader(self):
609         # client should be able to use CollectionReader on a manifest without normalizing it
610         client = self.api_client_mock(500)
611         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
612         reader = arvados.CollectionReader(
613             nonnormal,
614             api_client=client, num_retries=0)
615         # Ensure stripped_manifest() doesn't mangle our manifest in
616         # any way other than stripping hints.
617         self.assertEqual(
618             re.sub('\+[^\d\s\+]+', '', nonnormal),
619             reader.stripped_manifest())
620         # Ensure stripped_manifest() didn't mutate our reader.
621         self.assertEqual(nonnormal, reader.manifest_text())
622         # Ensure the files appear in the order given in the manifest.
623         self.assertEqual(
624             [[6, '.', 'foo.txt'],
625              [0, '.', 'bar.txt']],
626             [[f.size(), f.stream_name(), f.name()]
627              for f in reader.all_streams()[0].all_files()])
628
629     def test_read_empty_collection(self):
630         client = self.api_client_mock(200)
631         self.mock_get_collection(client, 200, 'empty')
632         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
633                                           api_client=client)
634         self.assertEqual('', reader.manifest_text())
635
636     def test_api_response(self):
637         client = self.api_client_mock()
638         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
639         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
640
641     def test_api_response_with_collection_from_keep(self):
642         client = self.api_client_mock()
643         self.mock_get_collection(client, 404, 'foo')
644         with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
645             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
646                                               api_client=client)
647             api_response = reader.api_response()
648         self.assertIsNone(api_response)
649
650     def check_open_file(self, coll_file, stream_name, file_name, file_size):
651         self.assertFalse(coll_file.closed, "returned file is not open")
652         self.assertEqual(stream_name, coll_file.stream_name())
653         self.assertEqual(file_name, coll_file.name)
654         self.assertEqual(file_size, coll_file.size())
655
656     def test_open_collection_file_one_argument(self):
657         client = self.api_client_mock(200)
658         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
659         cfile = reader.open('./foo', 'rb')
660         self.check_open_file(cfile, '.', 'foo', 3)
661
662     def test_open_deep_file(self):
663         coll_name = 'collection_with_files_in_subdir'
664         client = self.api_client_mock(200)
665         self.mock_get_collection(client, 200, coll_name)
666         reader = arvados.CollectionReader(
667             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
668         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
669         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
670                              32)
671
672     def test_open_nonexistent_stream(self):
673         client = self.api_client_mock(200)
674         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
675         self.assertRaises(IOError, reader.open, './nonexistent/foo')
676
677     def test_open_nonexistent_file(self):
678         client = self.api_client_mock(200)
679         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
680         self.assertRaises(IOError, reader.open, 'nonexistent')
681
682
683 @tutil.skip_sleep
684 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
685     def mock_keep(self, body, *codes, **headers):
686         headers.setdefault('x-keep-replicas-stored', 2)
687         return tutil.mock_keep_responses(body, *codes, **headers)
688
689     def foo_writer(self, **kwargs):
690         kwargs.setdefault('api_client', self.api_client_mock())
691         writer = arvados.CollectionWriter(**kwargs)
692         writer.start_new_file('foo')
693         writer.write(b'foo')
694         return writer
695
696     def test_write_whole_collection(self):
697         writer = self.foo_writer()
698         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
699             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
700
701     def test_write_no_default(self):
702         writer = self.foo_writer()
703         with self.mock_keep(None, 500):
704             with self.assertRaises(arvados.errors.KeepWriteError):
705                 writer.finish()
706
707     def test_write_insufficient_replicas_via_proxy(self):
708         writer = self.foo_writer(replication=3)
709         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
710             with self.assertRaises(arvados.errors.KeepWriteError):
711                 writer.manifest_text()
712
713     def test_write_insufficient_replicas_via_disks(self):
714         client = mock.MagicMock(name='api_client')
715         with self.mock_keep(
716                 None, 200, 200,
717                 **{'x-keep-replicas-stored': 1}) as keepmock:
718             self.mock_keep_services(client, status=200, service_type='disk', count=2)
719             writer = self.foo_writer(api_client=client, replication=3)
720             with self.assertRaises(arvados.errors.KeepWriteError):
721                 writer.manifest_text()
722
723     def test_write_three_replicas(self):
724         client = mock.MagicMock(name='api_client')
725         with self.mock_keep(
726                 "", 500, 500, 500, 200, 200, 200,
727                 **{'x-keep-replicas-stored': 1}) as keepmock:
728             self.mock_keep_services(client, status=200, service_type='disk', count=6)
729             writer = self.foo_writer(api_client=client, replication=3)
730             writer.manifest_text()
731             self.assertEqual(6, keepmock.call_count)
732
733     def test_write_whole_collection_through_retries(self):
734         writer = self.foo_writer(num_retries=2)
735         with self.mock_keep(self.DEFAULT_DATA_HASH,
736                             500, 500, 200, 500, 500, 200):
737             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
738
739     def test_flush_data_retries(self):
740         writer = self.foo_writer(num_retries=2)
741         foo_hash = self.DEFAULT_MANIFEST.split()[1]
742         with self.mock_keep(foo_hash, 500, 200):
743             writer.flush_data()
744         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
745
746     def test_one_open(self):
747         client = self.api_client_mock()
748         writer = arvados.CollectionWriter(client)
749         with writer.open('out') as out_file:
750             self.assertEqual('.', writer.current_stream_name())
751             self.assertEqual('out', writer.current_file_name())
752             out_file.write(b'test data')
753             data_loc = tutil.str_keep_locator('test data')
754         self.assertTrue(out_file.closed, "writer file not closed after context")
755         self.assertRaises(ValueError, out_file.write, 'extra text')
756         with self.mock_keep(data_loc, 200) as keep_mock:
757             self.assertEqual(". {} 0:9:out\n".format(data_loc),
758                              writer.manifest_text())
759
760     def test_open_writelines(self):
761         client = self.api_client_mock()
762         writer = arvados.CollectionWriter(client)
763         with writer.open('six') as out_file:
764             out_file.writelines(['12', '34', '56'])
765             data_loc = tutil.str_keep_locator('123456')
766         with self.mock_keep(data_loc, 200) as keep_mock:
767             self.assertEqual(". {} 0:6:six\n".format(data_loc),
768                              writer.manifest_text())
769
770     def test_open_flush(self):
771         client = self.api_client_mock()
772         data_loc1 = tutil.str_keep_locator('flush1')
773         data_loc2 = tutil.str_keep_locator('flush2')
774         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
775             writer = arvados.CollectionWriter(client)
776             with writer.open('flush_test') as out_file:
777                 out_file.write(b'flush1')
778                 out_file.flush()
779                 out_file.write(b'flush2')
780             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
781                                                                 data_loc2),
782                              writer.manifest_text())
783
784     def test_two_opens_same_stream(self):
785         client = self.api_client_mock()
786         writer = arvados.CollectionWriter(client)
787         with writer.open('.', '1') as out_file:
788             out_file.write(b'1st')
789         with writer.open('.', '2') as out_file:
790             out_file.write(b'2nd')
791         data_loc = tutil.str_keep_locator('1st2nd')
792         with self.mock_keep(data_loc, 200) as keep_mock:
793             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
794                              writer.manifest_text())
795
796     def test_two_opens_two_streams(self):
797         client = self.api_client_mock()
798         data_loc1 = tutil.str_keep_locator('file')
799         data_loc2 = tutil.str_keep_locator('indir')
800         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
801             writer = arvados.CollectionWriter(client)
802             with writer.open('file') as out_file:
803                 out_file.write(b'file')
804             with writer.open('./dir', 'indir') as out_file:
805                 out_file.write(b'indir')
806             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
807                 data_loc1, data_loc2)
808             self.assertEqual(expected, writer.manifest_text())
809
810     def test_dup_open_fails(self):
811         client = self.api_client_mock()
812         writer = arvados.CollectionWriter(client)
813         file1 = writer.open('one')
814         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
815
816
817 class CollectionMethods(run_test_server.TestCaseWithServers):
818
819     def test_keys_values_items_support_indexing(self):
820         c = Collection()
821         with c.open('foo', 'wb') as f:
822             f.write(b'foo')
823         with c.open('bar', 'wb') as f:
824             f.write(b'bar')
825         self.assertEqual(2, len(c.keys()))
826         if sys.version_info < (3, 0):
827             # keys() supports indexing only for python2 callers.
828             fn0 = c.keys()[0]
829             fn1 = c.keys()[1]
830         else:
831             fn0, fn1 = c.keys()
832         self.assertEqual(2, len(c.values()))
833         f0 = c.values()[0]
834         f1 = c.values()[1]
835         self.assertEqual(2, len(c.items()))
836         self.assertEqual(fn0, c.items()[0][0])
837         self.assertEqual(fn1, c.items()[1][0])
838
839
840 class CollectionOpenModes(run_test_server.TestCaseWithServers):
841
842     def test_open_binary_modes(self):
843         c = Collection()
844         for mode in ['wb', 'wb+', 'ab', 'ab+']:
845             with c.open('foo', mode) as f:
846                 f.write(b'foo')
847
848     def test_open_invalid_modes(self):
849         c = Collection()
850         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
851             with self.assertRaises(Exception):
852                 c.open('foo', mode)
853
854     def test_open_text_modes(self):
855         c = Collection()
856         with c.open('foo', 'wb') as f:
857             f.write('foo')
858         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
859             if sys.version_info >= (3, 0):
860                 with self.assertRaises(NotImplementedError):
861                     c.open('foo', mode)
862             else:
863                 with c.open('foo', mode) as f:
864                     if mode[0] == 'r' and '+' not in mode:
865                         self.assertEqual('foo', f.read(3))
866                     else:
867                         f.write('bar')
868                         f.seek(-3, os.SEEK_CUR)
869                         self.assertEqual('bar', f.read(3))
870
871
872 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
873
874     def test_replication_desired_kept_on_load(self):
875         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
876         c1 = Collection(m, replication_desired=1)
877         c1.save_new()
878         loc = c1.manifest_locator()
879         c2 = Collection(loc)
880         self.assertEqual(c1.manifest_text, c2.manifest_text)
881         self.assertEqual(c1.replication_desired, c2.replication_desired)
882
883     def test_replication_desired_not_loaded_if_provided(self):
884         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
885         c1 = Collection(m, replication_desired=1)
886         c1.save_new()
887         loc = c1.manifest_locator()
888         c2 = Collection(loc, replication_desired=2)
889         self.assertEqual(c1.manifest_text, c2.manifest_text)
890         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
891
892     def test_init_manifest(self):
893         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
894 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
895 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
896 """
897         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
898         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
899
900     def test_init_manifest_with_collision(self):
901         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
902 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
903 """
904         with self.assertRaises(arvados.errors.ArgumentError):
905             self.assertEqual(m1, CollectionReader(m1))
906
907     def test_init_manifest_with_error(self):
908         m1 = """. 0:43:md5sum.txt"""
909         with self.assertRaises(arvados.errors.ArgumentError):
910             self.assertEqual(m1, CollectionReader(m1))
911
912     def test_remove(self):
913         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
914         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
915         self.assertIn("count1.txt", c)
916         c.remove("count1.txt")
917         self.assertNotIn("count1.txt", c)
918         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
919         with self.assertRaises(arvados.errors.ArgumentError):
920             c.remove("")
921
922     def test_find(self):
923         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
924         self.assertIs(c.find("."), c)
925         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
926         self.assertIs(c.find("count1.txt"), c["count1.txt"])
927         with self.assertRaises(IOError):
928             c.find("/.")
929         with self.assertRaises(arvados.errors.ArgumentError):
930             c.find("")
931         self.assertIs(c.find("./nonexistant.txt"), None)
932         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
933
934     def test_remove_in_subdir(self):
935         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
936         c.remove("foo/count2.txt")
937         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
938
939     def test_remove_empty_subdir(self):
940         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
941         c.remove("foo/count2.txt")
942         c.remove("foo")
943         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
944
945     def test_remove_nonempty_subdir(self):
946         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
947         with self.assertRaises(IOError):
948             c.remove("foo")
949         c.remove("foo", recursive=True)
950         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
951
952     def test_copy_to_file_in_dir(self):
953         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
954         c.copy("count1.txt", "foo/count2.txt")
955         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
956
957     def test_copy_file(self):
958         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
959         c.copy("count1.txt", "count2.txt")
960         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
961
962     def test_copy_to_existing_dir(self):
963         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
964         c.copy("count1.txt", "foo")
965         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
966
967     def test_copy_to_new_dir(self):
968         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
969         c.copy("count1.txt", "foo/")
970         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
971
972     def test_rename_file(self):
973         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
974         c.rename("count1.txt", "count2.txt")
975         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
976
977     def test_move_file_to_dir(self):
978         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
979         c.mkdirs("foo")
980         c.rename("count1.txt", "foo/count2.txt")
981         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
982
983     def test_move_file_to_other(self):
984         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
985         c2 = Collection()
986         c2.rename("count1.txt", "count2.txt", source_collection=c1)
987         self.assertEqual("", c1.manifest_text())
988         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
989
990     def test_clone(self):
991         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
992         cl = c.clone()
993         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
994
995     def test_diff_del_add(self):
996         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
997         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
998         d = c2.diff(c1)
999         self.assertEqual(sorted(d), [
1000             ('add', './count1.txt', c1["count1.txt"]),
1001             ('del', './count2.txt', c2["count2.txt"]),
1002         ])
1003         d = c1.diff(c2)
1004         self.assertEqual(sorted(d), [
1005             ('add', './count2.txt', c2["count2.txt"]),
1006             ('del', './count1.txt', c1["count1.txt"]),
1007         ])
1008         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1009         c1.apply(d)
1010         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1011
1012     def test_diff_same(self):
1013         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1014         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1015         d = c2.diff(c1)
1016         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1017         d = c1.diff(c2)
1018         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1019
1020         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1021         c1.apply(d)
1022         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1023
1024     def test_diff_mod(self):
1025         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1026         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1027         d = c2.diff(c1)
1028         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1029         d = c1.diff(c2)
1030         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1031
1032         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1033         c1.apply(d)
1034         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1035
1036     def test_diff_add(self):
1037         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1038         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1039         d = c2.diff(c1)
1040         self.assertEqual(sorted(d), [
1041             ('del', './count2.txt', c2["count2.txt"]),
1042             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1043         ])
1044         d = c1.diff(c2)
1045         self.assertEqual(sorted(d), [
1046             ('add', './count2.txt', c2["count2.txt"]),
1047             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1048         ])
1049
1050         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1051         c1.apply(d)
1052         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1053
1054     def test_diff_add_in_subcollection(self):
1055         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1056         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1057         d = c2.diff(c1)
1058         self.assertEqual(sorted(d), [
1059             ('del', './foo', c2["foo"]),
1060             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1061         ])
1062         d = c1.diff(c2)
1063         self.assertEqual(sorted(d), [
1064             ('add', './foo', c2["foo"]),
1065             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1066         ])
1067         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1068         c1.apply(d)
1069         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1070
1071     def test_diff_del_add_in_subcollection(self):
1072         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1073         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1074         d = c2.diff(c1)
1075         self.assertEqual(sorted(d), [
1076             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1077             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1078             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1079         ])
1080         d = c1.diff(c2)
1081         self.assertEqual(sorted(d), [
1082             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1083             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1084             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1085         ])
1086
1087         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1088         c1.apply(d)
1089         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1090
1091     def test_diff_mod_in_subcollection(self):
1092         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1093         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1094         d = c2.diff(c1)
1095         self.assertEqual(sorted(d), [
1096             ('mod', './foo', c2["foo"], c1["foo"]),
1097             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1098         ])
1099         d = c1.diff(c2)
1100         self.assertEqual(sorted(d), [
1101             ('mod', './foo', c1["foo"], c2["foo"]),
1102             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1103         ])
1104
1105         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1106         c1.apply(d)
1107         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1108
1109     def test_conflict_keep_local_change(self):
1110         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1111         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1112         d = c1.diff(c2)
1113         self.assertEqual(sorted(d), [
1114             ('add', './count2.txt', c2["count2.txt"]),
1115             ('del', './count1.txt', c1["count1.txt"]),
1116         ])
1117         f = c1.open("count1.txt", "wb")
1118         f.write(b"zzzzz")
1119
1120         # c1 changed, so it should not be deleted.
1121         c1.apply(d)
1122         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1123
1124     def test_conflict_mod(self):
1125         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1126         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1127         d = c1.diff(c2)
1128         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1129         f = c1.open("count1.txt", "wb")
1130         f.write(b"zzzzz")
1131
1132         # c1 changed, so c2 mod will go to a conflict file
1133         c1.apply(d)
1134         self.assertRegex(
1135             c1.portable_manifest_text(),
1136             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1137
1138     def test_conflict_add(self):
1139         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1140         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1141         d = c1.diff(c2)
1142         self.assertEqual(sorted(d), [
1143             ('add', './count1.txt', c2["count1.txt"]),
1144             ('del', './count2.txt', c1["count2.txt"]),
1145         ])
1146         f = c1.open("count1.txt", "wb")
1147         f.write(b"zzzzz")
1148
1149         # c1 added count1.txt, so c2 add will go to a conflict file
1150         c1.apply(d)
1151         self.assertRegex(
1152             c1.portable_manifest_text(),
1153             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1154
1155     def test_conflict_del(self):
1156         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1157         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1158         d = c1.diff(c2)
1159         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1160         c1.remove("count1.txt")
1161
1162         # c1 deleted, so c2 mod will go to a conflict file
1163         c1.apply(d)
1164         self.assertRegex(
1165             c1.portable_manifest_text(),
1166             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1167
1168     def test_notify(self):
1169         c1 = Collection()
1170         events = []
1171         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1172         f = c1.open("foo.txt", "wb")
1173         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1174
1175     def test_open_w(self):
1176         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1177         self.assertEqual(c1["count1.txt"].size(), 10)
1178         c1.open("count1.txt", "wb").close()
1179         self.assertEqual(c1["count1.txt"].size(), 0)
1180
1181
1182 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1183     MAIN_SERVER = {}
1184     KEEP_SERVER = {}
1185
1186     def setUp(self):
1187         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1188
1189     def test_repacked_block_submission_get_permission_token(self):
1190         '''
1191         Make sure that those blocks that are committed after repacking small ones,
1192         get their permission tokens assigned on the collection manifest.
1193         '''
1194         def wrapped_keep_put(*args, **kwargs):
1195             # Simulate slow put operations
1196             time.sleep(1)
1197             return self.keep_put(*args, **kwargs)
1198
1199         re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1200
1201         with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
1202             mocked_put.side_effect = wrapped_keep_put
1203             c = Collection()
1204             # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1205             # small ones before finishing the upload.
1206             for i in range(70):
1207                 f = c.open("file_{}.txt".format(i), 'wb')
1208                 f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1209                 f.close(flush=False)
1210             # We should get 2 blocks with their tokens
1211             self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
1212
1213
1214 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1215     def test_get_manifest_text_only_committed(self):
1216         c = Collection()
1217         with c.open("count.txt", "wb") as f:
1218             # One file committed
1219             with c.open("foo.txt", "wb") as foo:
1220                 foo.write(b"foo")
1221                 foo.flush() # Force block commit
1222             f.write(b"0123456789")
1223             # Other file not committed. Block not written to keep yet.
1224             self.assertEqual(
1225                 c._get_manifest_text(".",
1226                                      strip=False,
1227                                      normalize=False,
1228                                      only_committed=True),
1229                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1230             # And now with the file closed...
1231             f.flush() # Force block commit
1232         self.assertEqual(
1233             c._get_manifest_text(".",
1234                                  strip=False,
1235                                  normalize=False,
1236                                  only_committed=True),
1237             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1238
1239     def test_only_small_blocks_are_packed_together(self):
1240         c = Collection()
1241         # Write a couple of small files,
1242         f = c.open("count.txt", "wb")
1243         f.write(b"0123456789")
1244         f.close(flush=False)
1245         foo = c.open("foo.txt", "wb")
1246         foo.write(b"foo")
1247         foo.close(flush=False)
1248         # Then, write a big file, it shouldn't be packed with the ones above
1249         big = c.open("bigfile.txt", "wb")
1250         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1251         big.close(flush=False)
1252         self.assertEqual(
1253             c.manifest_text("."),
1254             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1255
1256     def test_flush_after_small_block_packing(self):
1257         c = Collection()
1258         # Write a couple of small files,
1259         f = c.open("count.txt", "wb")
1260         f.write(b"0123456789")
1261         f.close(flush=False)
1262         foo = c.open("foo.txt", "wb")
1263         foo.write(b"foo")
1264         foo.close(flush=False)
1265
1266         self.assertEqual(
1267             c.manifest_text(),
1268             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1269
1270         f = c.open("count.txt", "rb+")
1271         f.close(flush=True)
1272
1273         self.assertEqual(
1274             c.manifest_text(),
1275             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1276
1277     def test_write_after_small_block_packing2(self):
1278         c = Collection()
1279         # Write a couple of small files,
1280         f = c.open("count.txt", "wb")
1281         f.write(b"0123456789")
1282         f.close(flush=False)
1283         foo = c.open("foo.txt", "wb")
1284         foo.write(b"foo")
1285         foo.close(flush=False)
1286
1287         self.assertEqual(
1288             c.manifest_text(),
1289             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1290
1291         f = c.open("count.txt", "rb+")
1292         f.write(b"abc")
1293         f.close(flush=False)
1294
1295         self.assertEqual(
1296             c.manifest_text(),
1297             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1298
1299
1300     def test_small_block_packing_with_overwrite(self):
1301         c = Collection()
1302         c.open("b1", "wb").close()
1303         c["b1"].writeto(0, b"b1", 0)
1304
1305         c.open("b2", "wb").close()
1306         c["b2"].writeto(0, b"b2", 0)
1307
1308         c["b1"].writeto(0, b"1b", 0)
1309
1310         self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1311         self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1312         self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1313
1314
1315 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1316     MAIN_SERVER = {}
1317     KEEP_SERVER = {}
1318
1319     def create_count_txt(self):
1320         # Create an empty collection, save it to the API server, then write a
1321         # file, but don't save it.
1322
1323         c = Collection()
1324         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1325         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1326         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1327
1328         with c.open("count.txt", "wb") as f:
1329             f.write(b"0123456789")
1330
1331         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1332
1333         return c
1334
1335     def test_create_and_save(self):
1336         c = self.create_count_txt()
1337         c.save()
1338         self.assertRegex(
1339             c.manifest_text(),
1340             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1341
1342     def test_create_and_save_new(self):
1343         c = self.create_count_txt()
1344         c.save_new()
1345         self.assertRegex(
1346             c.manifest_text(),
1347             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1348
1349     def test_create_diff_apply(self):
1350         c1 = self.create_count_txt()
1351         c1.save()
1352
1353         c2 = Collection(c1.manifest_locator())
1354         with c2.open("count.txt", "wb") as f:
1355             f.write(b"abcdefg")
1356
1357         diff = c1.diff(c2)
1358
1359         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1360
1361         c1.apply(diff)
1362         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1363
1364     def test_diff_apply_with_token(self):
1365         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1366         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1367         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1368
1369         diff = baseline.diff(other)
1370         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1371
1372         c.apply(diff)
1373
1374         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1375
1376
1377     def test_create_and_update(self):
1378         c1 = self.create_count_txt()
1379         c1.save()
1380
1381         c2 = arvados.collection.Collection(c1.manifest_locator())
1382         with c2.open("count.txt", "wb") as f:
1383             f.write(b"abcdefg")
1384
1385         c2.save()
1386
1387         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1388         c1.update()
1389         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1390
1391
1392     def test_create_and_update_with_conflict(self):
1393         c1 = self.create_count_txt()
1394         c1.save()
1395
1396         with c1.open("count.txt", "wb") as f:
1397             f.write(b"XYZ")
1398
1399         c2 = arvados.collection.Collection(c1.manifest_locator())
1400         with c2.open("count.txt", "wb") as f:
1401             f.write(b"abcdefg")
1402
1403         c2.save()
1404
1405         c1.update()
1406         self.assertRegex(
1407             c1.manifest_text(),
1408             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1409
1410     def test_pdh_is_native_str(self):
1411         c1 = self.create_count_txt()
1412         pdh = c1.portable_data_hash()
1413         self.assertEqual(type(''), type(pdh))
1414
1415
1416 if __name__ == '__main__':
1417     unittest.main()