Add getters for properties and trash_at attributes and small bugfix for
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import pprint
13 import random
14 import re
15 import sys
16 import tempfile
17 import datetime
18 import time
19 import unittest
20
21 from . import run_test_server
22 from arvados._ranges import Range, LocatorAndRange
23 from arvados.collection import Collection, CollectionReader
24 from . import arvados_testutil as tutil
25
26 class TestResumableWriter(arvados.ResumableCollectionWriter):
27     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
28
29     def current_state(self):
30         return self.dump_state(copy.deepcopy)
31
32
33 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
34                              tutil.ArvadosBaseTestCase):
35     MAIN_SERVER = {}
36
37     @classmethod
38     def setUpClass(cls):
39         super(ArvadosCollectionsTest, cls).setUpClass()
40         # need admin privileges to make collections with unsigned blocks
41         run_test_server.authorize_with('admin')
42         cls.api_client = arvados.api('v1')
43         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
44                                              local_store=cls.local_store)
45
46     def write_foo_bar_baz(self):
47         cw = arvados.CollectionWriter(self.api_client)
48         self.assertEqual(cw.current_stream_name(), '.',
49                          'current_stream_name() should be "." now')
50         cw.set_current_file_name('foo.txt')
51         cw.write(b'foo')
52         self.assertEqual(cw.current_file_name(), 'foo.txt',
53                          'current_file_name() should be foo.txt now')
54         cw.start_new_file('bar.txt')
55         cw.write(b'bar')
56         cw.start_new_stream('baz')
57         cw.write(b'baz')
58         cw.set_current_file_name('baz.txt')
59         self.assertEqual(cw.manifest_text(),
60                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
61                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
62                          "wrong manifest: got {}".format(cw.manifest_text()))
63         cw.save_new()
64         return cw.portable_data_hash()
65
66     def test_pdh_is_native_str(self):
67         pdh = self.write_foo_bar_baz()
68         self.assertEqual(type(''), type(pdh))
69
70     def test_keep_local_store(self):
71         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
72         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
73
74     def test_local_collection_writer(self):
75         self.assertEqual(self.write_foo_bar_baz(),
76                          '23ca013983d6239e98931cc779e68426+114',
77                          'wrong locator hash: ' + self.write_foo_bar_baz())
78
79     def test_local_collection_reader(self):
80         foobarbaz = self.write_foo_bar_baz()
81         cr = arvados.CollectionReader(
82             foobarbaz + '+Xzizzle', self.api_client)
83         got = []
84         for s in cr.all_streams():
85             for f in s.all_files():
86                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
87         expected = [[3, '.', 'foo.txt', b'foo'],
88                     [3, '.', 'bar.txt', b'bar'],
89                     [3, './baz', 'baz.txt', b'baz']]
90         self.assertEqual(got,
91                          expected)
92         stream0 = cr.all_streams()[0]
93         self.assertEqual(stream0.readfrom(0, 0),
94                          b'',
95                          'reading zero bytes should have returned empty string')
96         self.assertEqual(stream0.readfrom(0, 2**26),
97                          b'foobar',
98                          'reading entire stream failed')
99         self.assertEqual(stream0.readfrom(2**26, 0),
100                          b'',
101                          'reading zero bytes should have returned empty string')
102         self.assertEqual(3, len(cr))
103         self.assertTrue(cr)
104
105     def _test_subset(self, collection, expected):
106         cr = arvados.CollectionReader(collection, self.api_client)
107         for s in cr.all_streams():
108             for ex in expected:
109                 if ex[0] == s:
110                     f = s.files()[ex[2]]
111                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
112                     self.assertEqual(got,
113                                      ex,
114                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
115
116     def test_collection_manifest_subset(self):
117         foobarbaz = self.write_foo_bar_baz()
118         self._test_subset(foobarbaz,
119                           [[3, '.',     'bar.txt', b'bar'],
120                            [3, '.',     'foo.txt', b'foo'],
121                            [3, './baz', 'baz.txt', b'baz']])
122         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
123                            (self.keep_client.put(b"foo"),
124                             self.keep_client.put(b"bar"))),
125                           [[3, '.', 'bar.txt', b'bar'],
126                            [3, '.', 'foo.txt', b'foo']])
127         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
128                            (self.keep_client.put(b"foo"),
129                             self.keep_client.put(b"bar"))),
130                           [[2, '.', 'fo.txt', b'fo'],
131                            [4, '.', 'obar.txt', b'obar']])
132         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
133                            (self.keep_client.put(b"foo"),
134                             self.keep_client.put(b"bar"))),
135                           [[2, '.', 'ar.txt', b'ar'],
136                            [2, '.', 'fo.txt', b'fo'],
137                            [2, '.', 'ob.txt', b'ob'],
138                            [0, '.', 'zero.txt', b'']])
139
140     def test_collection_empty_file(self):
141         cw = arvados.CollectionWriter(self.api_client)
142         cw.start_new_file('zero.txt')
143         cw.write(b'')
144
145         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
146         self.check_manifest_file_sizes(cw.manifest_text(), [0])
147         cw = arvados.CollectionWriter(self.api_client)
148         cw.start_new_file('zero.txt')
149         cw.write(b'')
150         cw.start_new_file('one.txt')
151         cw.write(b'1')
152         cw.start_new_stream('foo')
153         cw.start_new_file('zero.txt')
154         cw.write(b'')
155         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
156
157     def test_no_implicit_normalize(self):
158         cw = arvados.CollectionWriter(self.api_client)
159         cw.start_new_file('b')
160         cw.write(b'b')
161         cw.start_new_file('a')
162         cw.write(b'')
163         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
164         self.check_manifest_file_sizes(
165             arvados.CollectionReader(
166                 cw.manifest_text()).manifest_text(normalize=True),
167             [0,1])
168
169     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
170         cr = arvados.CollectionReader(manifest_text, self.api_client)
171         got_sizes = []
172         for f in cr.all_files():
173             got_sizes += [f.size()]
174         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
175
176     def test_normalized_collection(self):
177         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
178 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
179 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
180 """
181         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
182                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
183 """)
184
185         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
186 """
187         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
188
189         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
190 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
191 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
192 """
193         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
194                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
195 """)
196
197         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
198 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
199 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
200 """
201         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
202                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
203 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
204 """)
205
206         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
207 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
208 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
209 """
210         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
211                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
212 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
213 """)
214
215         with self.data_file('1000G_ref_manifest') as f6:
216             m6 = f6.read()
217             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
218
219         with self.data_file('jlake_manifest') as f7:
220             m7 = f7.read()
221             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
222
223         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
224 """
225         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
226
227     def test_locators_and_ranges(self):
228         blocks2 = [Range('a', 0, 10),
229                    Range('b', 10, 10),
230                    Range('c', 20, 10),
231                    Range('d', 30, 10),
232                    Range('e', 40, 10),
233                    Range('f', 50, 10)]
234
235         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
236         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
237         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
238         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
243
244         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
247         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
252
253         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
256         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
259
260
261         blocks3 = [Range('a', 0, 10),
262                   Range('b', 10, 10),
263                   Range('c', 20, 10),
264                   Range('d', 30, 10),
265                   Range('e', 40, 10),
266                   Range('f', 50, 10),
267                    Range('g', 60, 10)]
268
269         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
270         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
271         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
272         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
275         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
276
277
278         blocks = [Range('a', 0, 10),
279                   Range('b', 10, 15),
280                   Range('c', 25, 5)]
281         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
282         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
283         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
284         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
285
286         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
287                                                                       LocatorAndRange('b', 15, 0, 1)])
288         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
289                                                                       LocatorAndRange('b', 15, 0, 2)])
290         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
291                                                                       LocatorAndRange('b', 15, 0, 15)])
292
293         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
294                                                                       LocatorAndRange('b', 15, 0, 15),
295                                                                       LocatorAndRange('c', 5, 0, 5)])
296         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
297                                                                       LocatorAndRange('b', 15, 0, 15),
298                                                                       LocatorAndRange('c', 5, 0, 5)])
299         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
300                                                                       LocatorAndRange('b', 15, 0, 15),
301                                                                       LocatorAndRange('c', 5, 0, 5)])
302
303         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
304
305         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
306                                                                       LocatorAndRange('b', 15, 0, 15)])
307
308         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
309                                                                       LocatorAndRange('b', 15, 0, 15),
310                                                                       LocatorAndRange('c', 5, 0, 3)])
311
312         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
313
314         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
315                                                                       LocatorAndRange('b', 15, 0, 14)])
316         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
317         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
318                                                                        LocatorAndRange('c', 5, 0, 1)])
319
320     class MockKeep(object):
321         def __init__(self, content, num_retries=0):
322             self.content = content
323
324         def get(self, locator, num_retries=0):
325             return self.content[locator]
326
327     def test_stream_reader(self):
328         keepblocks = {
329             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
330             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
331             'cccccccccccccccccccccccccccccccc+5': b'z0123',
332         }
333         mk = self.MockKeep(keepblocks)
334
335         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
336
337         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
338
339         self.assertEqual(sr.readfrom(0, 30), content[0:30])
340         self.assertEqual(sr.readfrom(2, 30), content[2:30])
341
342         self.assertEqual(sr.readfrom(2, 8), content[2:10])
343         self.assertEqual(sr.readfrom(0, 10), content[0:10])
344
345         self.assertEqual(sr.readfrom(0, 5), content[0:5])
346         self.assertEqual(sr.readfrom(5, 5), content[5:10])
347         self.assertEqual(sr.readfrom(10, 5), content[10:15])
348         self.assertEqual(sr.readfrom(15, 5), content[15:20])
349         self.assertEqual(sr.readfrom(20, 5), content[20:25])
350         self.assertEqual(sr.readfrom(25, 5), content[25:30])
351         self.assertEqual(sr.readfrom(30, 5), b'')
352
353     def test_extract_file(self):
354         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
355 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
356 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
357 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
358 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
359 """
360
361         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
362
363         self.assertEqual(m2,
364                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
365         files = arvados.CollectionReader(
366             m2, self.api_client).all_streams()[0].files()
367
368         self.assertEqual(files['md5sum.txt'].as_manifest(),
369                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
370         self.assertEqual(files['md6sum.txt'].as_manifest(),
371                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
372         self.assertEqual(files['md7sum.txt'].as_manifest(),
373                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
374         self.assertEqual(files['md9sum.txt'].as_manifest(),
375                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
376
377     def test_write_directory_tree(self):
378         cwriter = arvados.CollectionWriter(self.api_client)
379         cwriter.write_directory_tree(self.build_directory_tree(
380                 ['basefile', 'subdir/subfile']))
381         self.assertEqual(cwriter.manifest_text(),
382                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
383 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
384
385     def test_write_named_directory_tree(self):
386         cwriter = arvados.CollectionWriter(self.api_client)
387         cwriter.write_directory_tree(self.build_directory_tree(
388                 ['basefile', 'subdir/subfile']), 'root')
389         self.assertEqual(
390             cwriter.manifest_text(),
391             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
392 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
393
394     def test_write_directory_tree_in_one_stream(self):
395         cwriter = arvados.CollectionWriter(self.api_client)
396         cwriter.write_directory_tree(self.build_directory_tree(
397                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
398         self.assertEqual(cwriter.manifest_text(),
399                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
400
401     def test_write_directory_tree_with_limited_recursion(self):
402         cwriter = arvados.CollectionWriter(self.api_client)
403         cwriter.write_directory_tree(
404             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
405             max_manifest_depth=1)
406         self.assertEqual(cwriter.manifest_text(),
407                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
408 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
409
410     def test_write_directory_tree_with_zero_recursion(self):
411         cwriter = arvados.CollectionWriter(self.api_client)
412         content = 'd1/d2/f3d1/f2f1'
413         blockhash = tutil.str_keep_locator(content)
414         cwriter.write_directory_tree(
415             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
416             max_manifest_depth=0)
417         self.assertEqual(
418             cwriter.manifest_text(),
419             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
420
421     def test_write_one_file(self):
422         cwriter = arvados.CollectionWriter(self.api_client)
423         with self.make_test_file() as testfile:
424             cwriter.write_file(testfile.name)
425             self.assertEqual(
426                 cwriter.manifest_text(),
427                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
428                     os.path.basename(testfile.name)))
429
430     def test_write_named_file(self):
431         cwriter = arvados.CollectionWriter(self.api_client)
432         with self.make_test_file() as testfile:
433             cwriter.write_file(testfile.name, 'foo')
434             self.assertEqual(cwriter.manifest_text(),
435                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
436
437     def test_write_multiple_files(self):
438         cwriter = arvados.CollectionWriter(self.api_client)
439         for letter in 'ABC':
440             with self.make_test_file(letter.encode()) as testfile:
441                 cwriter.write_file(testfile.name, letter)
442         self.assertEqual(
443             cwriter.manifest_text(),
444             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
445
446     def test_basic_resume(self):
447         cwriter = TestResumableWriter()
448         with self.make_test_file() as testfile:
449             cwriter.write_file(testfile.name, 'test')
450             resumed = TestResumableWriter.from_state(cwriter.current_state())
451         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
452                           "resumed CollectionWriter had different manifest")
453
454     def test_resume_fails_when_missing_dependency(self):
455         cwriter = TestResumableWriter()
456         with self.make_test_file() as testfile:
457             cwriter.write_file(testfile.name, 'test')
458         self.assertRaises(arvados.errors.StaleWriterStateError,
459                           TestResumableWriter.from_state,
460                           cwriter.current_state())
461
462     def test_resume_fails_when_dependency_mtime_changed(self):
463         cwriter = TestResumableWriter()
464         with self.make_test_file() as testfile:
465             cwriter.write_file(testfile.name, 'test')
466             os.utime(testfile.name, (0, 0))
467             self.assertRaises(arvados.errors.StaleWriterStateError,
468                               TestResumableWriter.from_state,
469                               cwriter.current_state())
470
471     def test_resume_fails_when_dependency_is_nonfile(self):
472         cwriter = TestResumableWriter()
473         cwriter.write_file('/dev/null', 'empty')
474         self.assertRaises(arvados.errors.StaleWriterStateError,
475                           TestResumableWriter.from_state,
476                           cwriter.current_state())
477
478     def test_resume_fails_when_dependency_size_changed(self):
479         cwriter = TestResumableWriter()
480         with self.make_test_file() as testfile:
481             cwriter.write_file(testfile.name, 'test')
482             orig_mtime = os.fstat(testfile.fileno()).st_mtime
483             testfile.write(b'extra')
484             testfile.flush()
485             os.utime(testfile.name, (orig_mtime, orig_mtime))
486             self.assertRaises(arvados.errors.StaleWriterStateError,
487                               TestResumableWriter.from_state,
488                               cwriter.current_state())
489
490     def test_resume_fails_with_expired_locator(self):
491         cwriter = TestResumableWriter()
492         state = cwriter.current_state()
493         # Add an expired locator to the state.
494         state['_current_stream_locators'].append(''.join([
495                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
496         self.assertRaises(arvados.errors.StaleWriterStateError,
497                           TestResumableWriter.from_state, state)
498
499     def test_arbitrary_objects_not_resumable(self):
500         cwriter = TestResumableWriter()
501         with open('/dev/null') as badfile:
502             self.assertRaises(arvados.errors.AssertionError,
503                               cwriter.write_file, badfile)
504
505     def test_arbitrary_writes_not_resumable(self):
506         cwriter = TestResumableWriter()
507         self.assertRaises(arvados.errors.AssertionError,
508                           cwriter.write, "badtext")
509
510
511 class CollectionTestMixin(tutil.ApiClientMock):
512     API_COLLECTIONS = run_test_server.fixture('collections')
513     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
514     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
515     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
516     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
517     ALT_COLLECTION = API_COLLECTIONS['bar_file']
518     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
519     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
520
521     def api_client_mock(self, status=200):
522         client = super(CollectionTestMixin, self).api_client_mock()
523         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
524         return client
525
526
527 @tutil.skip_sleep
528 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
529     def mock_get_collection(self, api_mock, code, fixturename):
530         body = self.API_COLLECTIONS.get(fixturename)
531         self._mock_api_call(api_mock.collections().get, code, body)
532
533     def api_client_mock(self, status=200):
534         client = super(CollectionReaderTestCase, self).api_client_mock()
535         self.mock_get_collection(client, status, 'foo_file')
536         return client
537
538     def test_init_no_default_retries(self):
539         client = self.api_client_mock(200)
540         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
541         reader.manifest_text()
542         client.collections().get().execute.assert_called_with(num_retries=0)
543
544     def test_uuid_init_success(self):
545         client = self.api_client_mock(200)
546         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
547                                           num_retries=3)
548         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
549                          reader.manifest_text())
550         client.collections().get().execute.assert_called_with(num_retries=3)
551
552     def test_uuid_init_failure_raises_api_error(self):
553         client = self.api_client_mock(500)
554         with self.assertRaises(arvados.errors.ApiError):
555             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
556
557     def test_locator_init(self):
558         client = self.api_client_mock(200)
559         # Ensure Keep will not return anything if asked.
560         with tutil.mock_keep_responses(None, 404):
561             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
562                                               api_client=client)
563             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
564
565     def test_init_no_fallback_to_keep(self):
566         # Do not look up a collection UUID or PDH in Keep.
567         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
568             client = self.api_client_mock(404)
569             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
570                 with self.assertRaises(arvados.errors.ApiError):
571                     reader = arvados.CollectionReader(key, api_client=client)
572
573     def test_init_num_retries_propagated(self):
574         # More of an integration test...
575         client = self.api_client_mock(200)
576         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
577                                           num_retries=3)
578         with tutil.mock_keep_responses('foo', 500, 500, 200):
579             self.assertEqual(b'foo',
580                              b''.join(f.read(9) for f in reader.all_files()))
581
582     def test_read_nonnormalized_manifest_with_collection_reader(self):
583         # client should be able to use CollectionReader on a manifest without normalizing it
584         client = self.api_client_mock(500)
585         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
586         reader = arvados.CollectionReader(
587             nonnormal,
588             api_client=client, num_retries=0)
589         # Ensure stripped_manifest() doesn't mangle our manifest in
590         # any way other than stripping hints.
591         self.assertEqual(
592             re.sub('\+[^\d\s\+]+', '', nonnormal),
593             reader.stripped_manifest())
594         # Ensure stripped_manifest() didn't mutate our reader.
595         self.assertEqual(nonnormal, reader.manifest_text())
596         # Ensure the files appear in the order given in the manifest.
597         self.assertEqual(
598             [[6, '.', 'foo.txt'],
599              [0, '.', 'bar.txt']],
600             [[f.size(), f.stream_name(), f.name()]
601              for f in reader.all_streams()[0].all_files()])
602
603     def test_read_empty_collection(self):
604         client = self.api_client_mock(200)
605         self.mock_get_collection(client, 200, 'empty')
606         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
607                                           api_client=client)
608         self.assertEqual('', reader.manifest_text())
609         self.assertEqual(0, len(reader))
610         self.assertFalse(reader)
611
612     def test_api_response(self):
613         client = self.api_client_mock()
614         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
615         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
616
617     def check_open_file(self, coll_file, stream_name, file_name, file_size):
618         self.assertFalse(coll_file.closed, "returned file is not open")
619         self.assertEqual(stream_name, coll_file.stream_name())
620         self.assertEqual(file_name, coll_file.name)
621         self.assertEqual(file_size, coll_file.size())
622
623     def test_open_collection_file_one_argument(self):
624         client = self.api_client_mock(200)
625         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
626         cfile = reader.open('./foo', 'rb')
627         self.check_open_file(cfile, '.', 'foo', 3)
628
629     def test_open_deep_file(self):
630         coll_name = 'collection_with_files_in_subdir'
631         client = self.api_client_mock(200)
632         self.mock_get_collection(client, 200, coll_name)
633         reader = arvados.CollectionReader(
634             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
635         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
636         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
637                              32)
638
639     def test_open_nonexistent_stream(self):
640         client = self.api_client_mock(200)
641         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
642         self.assertRaises(IOError, reader.open, './nonexistent/foo')
643
644     def test_open_nonexistent_file(self):
645         client = self.api_client_mock(200)
646         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
647         self.assertRaises(IOError, reader.open, 'nonexistent')
648
649
650 @tutil.skip_sleep
651 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
652     def mock_keep(self, body, *codes, **headers):
653         headers.setdefault('x-keep-replicas-stored', 2)
654         return tutil.mock_keep_responses(body, *codes, **headers)
655
656     def foo_writer(self, **kwargs):
657         kwargs.setdefault('api_client', self.api_client_mock())
658         writer = arvados.CollectionWriter(**kwargs)
659         writer.start_new_file('foo')
660         writer.write(b'foo')
661         return writer
662
663     def test_write_whole_collection(self):
664         writer = self.foo_writer()
665         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
666             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
667
668     def test_write_no_default(self):
669         writer = self.foo_writer()
670         with self.mock_keep(None, 500):
671             with self.assertRaises(arvados.errors.KeepWriteError):
672                 writer.finish()
673
674     def test_write_insufficient_replicas_via_proxy(self):
675         writer = self.foo_writer(replication=3)
676         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
677             with self.assertRaises(arvados.errors.KeepWriteError):
678                 writer.manifest_text()
679
680     def test_write_insufficient_replicas_via_disks(self):
681         client = mock.MagicMock(name='api_client')
682         with self.mock_keep(
683                 None, 200, 200,
684                 **{'x-keep-replicas-stored': 1}) as keepmock:
685             self.mock_keep_services(client, status=200, service_type='disk', count=2)
686             writer = self.foo_writer(api_client=client, replication=3)
687             with self.assertRaises(arvados.errors.KeepWriteError):
688                 writer.manifest_text()
689
690     def test_write_three_replicas(self):
691         client = mock.MagicMock(name='api_client')
692         with self.mock_keep(
693                 "", 500, 500, 500, 200, 200, 200,
694                 **{'x-keep-replicas-stored': 1}) as keepmock:
695             self.mock_keep_services(client, status=200, service_type='disk', count=6)
696             writer = self.foo_writer(api_client=client, replication=3)
697             writer.manifest_text()
698             self.assertEqual(6, keepmock.call_count)
699
700     def test_write_whole_collection_through_retries(self):
701         writer = self.foo_writer(num_retries=2)
702         with self.mock_keep(self.DEFAULT_DATA_HASH,
703                             500, 500, 200, 500, 500, 200):
704             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
705
706     def test_flush_data_retries(self):
707         writer = self.foo_writer(num_retries=2)
708         foo_hash = self.DEFAULT_MANIFEST.split()[1]
709         with self.mock_keep(foo_hash, 500, 200):
710             writer.flush_data()
711         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
712
713     def test_one_open(self):
714         client = self.api_client_mock()
715         writer = arvados.CollectionWriter(client)
716         with writer.open('out') as out_file:
717             self.assertEqual('.', writer.current_stream_name())
718             self.assertEqual('out', writer.current_file_name())
719             out_file.write(b'test data')
720             data_loc = tutil.str_keep_locator('test data')
721         self.assertTrue(out_file.closed, "writer file not closed after context")
722         self.assertRaises(ValueError, out_file.write, 'extra text')
723         with self.mock_keep(data_loc, 200) as keep_mock:
724             self.assertEqual(". {} 0:9:out\n".format(data_loc),
725                              writer.manifest_text())
726
727     def test_open_writelines(self):
728         client = self.api_client_mock()
729         writer = arvados.CollectionWriter(client)
730         with writer.open('six') as out_file:
731             out_file.writelines(['12', '34', '56'])
732             data_loc = tutil.str_keep_locator('123456')
733         with self.mock_keep(data_loc, 200) as keep_mock:
734             self.assertEqual(". {} 0:6:six\n".format(data_loc),
735                              writer.manifest_text())
736
737     def test_open_flush(self):
738         client = self.api_client_mock()
739         data_loc1 = tutil.str_keep_locator('flush1')
740         data_loc2 = tutil.str_keep_locator('flush2')
741         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
742             writer = arvados.CollectionWriter(client)
743             with writer.open('flush_test') as out_file:
744                 out_file.write(b'flush1')
745                 out_file.flush()
746                 out_file.write(b'flush2')
747             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
748                                                                 data_loc2),
749                              writer.manifest_text())
750
751     def test_two_opens_same_stream(self):
752         client = self.api_client_mock()
753         writer = arvados.CollectionWriter(client)
754         with writer.open('.', '1') as out_file:
755             out_file.write(b'1st')
756         with writer.open('.', '2') as out_file:
757             out_file.write(b'2nd')
758         data_loc = tutil.str_keep_locator('1st2nd')
759         with self.mock_keep(data_loc, 200) as keep_mock:
760             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
761                              writer.manifest_text())
762
763     def test_two_opens_two_streams(self):
764         client = self.api_client_mock()
765         data_loc1 = tutil.str_keep_locator('file')
766         data_loc2 = tutil.str_keep_locator('indir')
767         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
768             writer = arvados.CollectionWriter(client)
769             with writer.open('file') as out_file:
770                 out_file.write(b'file')
771             with writer.open('./dir', 'indir') as out_file:
772                 out_file.write(b'indir')
773             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
774                 data_loc1, data_loc2)
775             self.assertEqual(expected, writer.manifest_text())
776
777     def test_dup_open_fails(self):
778         client = self.api_client_mock()
779         writer = arvados.CollectionWriter(client)
780         file1 = writer.open('one')
781         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
782
783
784 class CollectionMethods(run_test_server.TestCaseWithServers):
785
786     def test_keys_values_items_support_indexing(self):
787         c = Collection()
788         with c.open('foo', 'wb') as f:
789             f.write(b'foo')
790         with c.open('bar', 'wb') as f:
791             f.write(b'bar')
792         self.assertEqual(2, len(c.keys()))
793         if sys.version_info < (3, 0):
794             # keys() supports indexing only for python2 callers.
795             fn0 = c.keys()[0]
796             fn1 = c.keys()[1]
797         else:
798             fn0, fn1 = c.keys()
799         self.assertEqual(2, len(c.values()))
800         f0 = c.values()[0]
801         f1 = c.values()[1]
802         self.assertEqual(2, len(c.items()))
803         self.assertEqual(fn0, c.items()[0][0])
804         self.assertEqual(fn1, c.items()[1][0])
805
806     def test_get_properties(self):
807         c = Collection()
808         self.assertEqual(c.get_properties(), {})
809         c.save_new(properties={"foo":"bar"})
810         self.assertEqual(c.get_properties(), {"foo":"bar"})
811
812     def test_get_trash_at(self):
813         c = Collection()
814         self.assertEqual(c.get_trash_at(), None)
815         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111))
816         self.assertEqual(c.get_trash_at(), datetime.datetime(2111, 1, 1, 11, 11, 11, 111))
817
818
819 class CollectionOpenModes(run_test_server.TestCaseWithServers):
820
821     def test_open_binary_modes(self):
822         c = Collection()
823         for mode in ['wb', 'wb+', 'ab', 'ab+']:
824             with c.open('foo', mode) as f:
825                 f.write(b'foo')
826
827     def test_open_invalid_modes(self):
828         c = Collection()
829         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
830             with self.assertRaises(Exception):
831                 c.open('foo', mode)
832
833     def test_open_text_modes(self):
834         c = Collection()
835         with c.open('foo', 'wb') as f:
836             f.write('foo')
837         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
838             if sys.version_info >= (3, 0):
839                 with self.assertRaises(NotImplementedError):
840                     c.open('foo', mode)
841             else:
842                 with c.open('foo', mode) as f:
843                     if mode[0] == 'r' and '+' not in mode:
844                         self.assertEqual('foo', f.read(3))
845                     else:
846                         f.write('bar')
847                         f.seek(-3, os.SEEK_CUR)
848                         self.assertEqual('bar', f.read(3))
849
850
851 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
852
853     def test_replication_desired_kept_on_load(self):
854         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
855         c1 = Collection(m, replication_desired=1)
856         c1.save_new()
857         loc = c1.manifest_locator()
858         c2 = Collection(loc)
859         self.assertEqual(c1.manifest_text, c2.manifest_text)
860         self.assertEqual(c1.replication_desired, c2.replication_desired)
861
862     def test_replication_desired_not_loaded_if_provided(self):
863         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
864         c1 = Collection(m, replication_desired=1)
865         c1.save_new()
866         loc = c1.manifest_locator()
867         c2 = Collection(loc, replication_desired=2)
868         self.assertEqual(c1.manifest_text, c2.manifest_text)
869         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
870
871     def test_init_manifest(self):
872         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
873 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
874 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
875 """
876         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
877         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
878
879     def test_init_manifest_with_collision(self):
880         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
881 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
882 """
883         with self.assertRaises(arvados.errors.ArgumentError):
884             self.assertEqual(m1, CollectionReader(m1))
885
886     def test_init_manifest_with_error(self):
887         m1 = """. 0:43:md5sum.txt"""
888         with self.assertRaises(arvados.errors.ArgumentError):
889             self.assertEqual(m1, CollectionReader(m1))
890
891     def test_remove(self):
892         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
893         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
894         self.assertIn("count1.txt", c)
895         c.remove("count1.txt")
896         self.assertNotIn("count1.txt", c)
897         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
898         with self.assertRaises(arvados.errors.ArgumentError):
899             c.remove("")
900
901     def test_find(self):
902         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
903         self.assertIs(c.find("."), c)
904         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
905         self.assertIs(c.find("count1.txt"), c["count1.txt"])
906         with self.assertRaises(IOError):
907             c.find("/.")
908         with self.assertRaises(arvados.errors.ArgumentError):
909             c.find("")
910         self.assertIs(c.find("./nonexistant.txt"), None)
911         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
912
913     def test_remove_in_subdir(self):
914         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
915         c.remove("foo/count2.txt")
916         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
917
918     def test_remove_empty_subdir(self):
919         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
920         c.remove("foo/count2.txt")
921         c.remove("foo")
922         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
923
924     def test_remove_nonempty_subdir(self):
925         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
926         with self.assertRaises(IOError):
927             c.remove("foo")
928         c.remove("foo", recursive=True)
929         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
930
931     def test_copy_to_file_in_dir(self):
932         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
933         c.copy("count1.txt", "foo/count2.txt")
934         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
935
936     def test_copy_file(self):
937         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
938         c.copy("count1.txt", "count2.txt")
939         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
940
941     def test_copy_to_existing_dir(self):
942         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
943         c.copy("count1.txt", "foo")
944         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
945
946     def test_copy_to_new_dir(self):
947         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
948         c.copy("count1.txt", "foo/")
949         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
950
951     def test_rename_file(self):
952         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
953         c.rename("count1.txt", "count2.txt")
954         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
955
956     def test_move_file_to_dir(self):
957         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
958         c.mkdirs("foo")
959         c.rename("count1.txt", "foo/count2.txt")
960         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
961
962     def test_move_file_to_other(self):
963         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
964         c2 = Collection()
965         c2.rename("count1.txt", "count2.txt", source_collection=c1)
966         self.assertEqual("", c1.manifest_text())
967         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
968
969     def test_clone(self):
970         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
971         cl = c.clone()
972         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
973
974     def test_diff_del_add(self):
975         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
976         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
977         d = c2.diff(c1)
978         self.assertEqual(sorted(d), [
979             ('add', './count1.txt', c1["count1.txt"]),
980             ('del', './count2.txt', c2["count2.txt"]),
981         ])
982         d = c1.diff(c2)
983         self.assertEqual(sorted(d), [
984             ('add', './count2.txt', c2["count2.txt"]),
985             ('del', './count1.txt', c1["count1.txt"]),
986         ])
987         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
988         c1.apply(d)
989         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
990
991     def test_diff_same(self):
992         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
993         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
994         d = c2.diff(c1)
995         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
996         d = c1.diff(c2)
997         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
998
999         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1000         c1.apply(d)
1001         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1002
1003     def test_diff_mod(self):
1004         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1005         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1006         d = c2.diff(c1)
1007         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1008         d = c1.diff(c2)
1009         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1010
1011         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1012         c1.apply(d)
1013         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1014
1015     def test_diff_add(self):
1016         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1017         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1018         d = c2.diff(c1)
1019         self.assertEqual(sorted(d), [
1020             ('del', './count2.txt', c2["count2.txt"]),
1021             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1022         ])
1023         d = c1.diff(c2)
1024         self.assertEqual(sorted(d), [
1025             ('add', './count2.txt', c2["count2.txt"]),
1026             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1027         ])
1028
1029         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1030         c1.apply(d)
1031         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1032
1033     def test_diff_add_in_subcollection(self):
1034         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1035         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1036         d = c2.diff(c1)
1037         self.assertEqual(sorted(d), [
1038             ('del', './foo', c2["foo"]),
1039             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1040         ])
1041         d = c1.diff(c2)
1042         self.assertEqual(sorted(d), [
1043             ('add', './foo', c2["foo"]),
1044             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1045         ])
1046         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1047         c1.apply(d)
1048         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1049
1050     def test_diff_del_add_in_subcollection(self):
1051         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1052         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1053         d = c2.diff(c1)
1054         self.assertEqual(sorted(d), [
1055             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1056             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1057             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1058         ])
1059         d = c1.diff(c2)
1060         self.assertEqual(sorted(d), [
1061             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1062             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1063             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1064         ])
1065
1066         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1067         c1.apply(d)
1068         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1069
1070     def test_diff_mod_in_subcollection(self):
1071         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1072         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1073         d = c2.diff(c1)
1074         self.assertEqual(sorted(d), [
1075             ('mod', './foo', c2["foo"], c1["foo"]),
1076             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1077         ])
1078         d = c1.diff(c2)
1079         self.assertEqual(sorted(d), [
1080             ('mod', './foo', c1["foo"], c2["foo"]),
1081             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1082         ])
1083
1084         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1085         c1.apply(d)
1086         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1087
1088     def test_conflict_keep_local_change(self):
1089         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1090         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1091         d = c1.diff(c2)
1092         self.assertEqual(sorted(d), [
1093             ('add', './count2.txt', c2["count2.txt"]),
1094             ('del', './count1.txt', c1["count1.txt"]),
1095         ])
1096         f = c1.open("count1.txt", "wb")
1097         f.write(b"zzzzz")
1098
1099         # c1 changed, so it should not be deleted.
1100         c1.apply(d)
1101         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1102
1103     def test_conflict_mod(self):
1104         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1105         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1106         d = c1.diff(c2)
1107         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1108         f = c1.open("count1.txt", "wb")
1109         f.write(b"zzzzz")
1110
1111         # c1 changed, so c2 mod will go to a conflict file
1112         c1.apply(d)
1113         self.assertRegex(
1114             c1.portable_manifest_text(),
1115             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1116
1117     def test_conflict_add(self):
1118         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1119         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1120         d = c1.diff(c2)
1121         self.assertEqual(sorted(d), [
1122             ('add', './count1.txt', c2["count1.txt"]),
1123             ('del', './count2.txt', c1["count2.txt"]),
1124         ])
1125         f = c1.open("count1.txt", "wb")
1126         f.write(b"zzzzz")
1127
1128         # c1 added count1.txt, so c2 add will go to a conflict file
1129         c1.apply(d)
1130         self.assertRegex(
1131             c1.portable_manifest_text(),
1132             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1133
1134     def test_conflict_del(self):
1135         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1136         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1137         d = c1.diff(c2)
1138         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1139         c1.remove("count1.txt")
1140
1141         # c1 deleted, so c2 mod will go to a conflict file
1142         c1.apply(d)
1143         self.assertRegex(
1144             c1.portable_manifest_text(),
1145             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1146
1147     def test_notify(self):
1148         c1 = Collection()
1149         events = []
1150         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1151         f = c1.open("foo.txt", "wb")
1152         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1153
1154     def test_open_w(self):
1155         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1156         self.assertEqual(c1["count1.txt"].size(), 10)
1157         c1.open("count1.txt", "wb").close()
1158         self.assertEqual(c1["count1.txt"].size(), 0)
1159
1160
1161 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1162     MAIN_SERVER = {}
1163     KEEP_SERVER = {}
1164
1165     def setUp(self):
1166         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1167
1168     def test_repacked_block_submission_get_permission_token(self):
1169         '''
1170         Make sure that those blocks that are committed after repacking small ones,
1171         get their permission tokens assigned on the collection manifest.
1172         '''
1173         def wrapped_keep_put(*args, **kwargs):
1174             # Simulate slow put operations
1175             time.sleep(1)
1176             return self.keep_put(*args, **kwargs)
1177
1178         re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1179
1180         with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
1181             mocked_put.side_effect = wrapped_keep_put
1182             c = Collection()
1183             # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1184             # small ones before finishing the upload.
1185             for i in range(70):
1186                 f = c.open("file_{}.txt".format(i), 'wb')
1187                 f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1188                 f.close(flush=False)
1189             # We should get 2 blocks with their tokens
1190             self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
1191
1192
1193 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1194     def test_get_manifest_text_only_committed(self):
1195         c = Collection()
1196         with c.open("count.txt", "wb") as f:
1197             # One file committed
1198             with c.open("foo.txt", "wb") as foo:
1199                 foo.write(b"foo")
1200                 foo.flush() # Force block commit
1201             f.write(b"0123456789")
1202             # Other file not committed. Block not written to keep yet.
1203             self.assertEqual(
1204                 c._get_manifest_text(".",
1205                                      strip=False,
1206                                      normalize=False,
1207                                      only_committed=True),
1208                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1209             # And now with the file closed...
1210             f.flush() # Force block commit
1211         self.assertEqual(
1212             c._get_manifest_text(".",
1213                                  strip=False,
1214                                  normalize=False,
1215                                  only_committed=True),
1216             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1217
1218     def test_only_small_blocks_are_packed_together(self):
1219         c = Collection()
1220         # Write a couple of small files,
1221         f = c.open("count.txt", "wb")
1222         f.write(b"0123456789")
1223         f.close(flush=False)
1224         foo = c.open("foo.txt", "wb")
1225         foo.write(b"foo")
1226         foo.close(flush=False)
1227         # Then, write a big file, it shouldn't be packed with the ones above
1228         big = c.open("bigfile.txt", "wb")
1229         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1230         big.close(flush=False)
1231         self.assertEqual(
1232             c.manifest_text("."),
1233             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1234
1235     def test_flush_after_small_block_packing(self):
1236         c = Collection()
1237         # Write a couple of small files,
1238         f = c.open("count.txt", "wb")
1239         f.write(b"0123456789")
1240         f.close(flush=False)
1241         foo = c.open("foo.txt", "wb")
1242         foo.write(b"foo")
1243         foo.close(flush=False)
1244
1245         self.assertEqual(
1246             c.manifest_text(),
1247             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1248
1249         f = c.open("count.txt", "rb+")
1250         f.close(flush=True)
1251
1252         self.assertEqual(
1253             c.manifest_text(),
1254             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1255
1256     def test_write_after_small_block_packing2(self):
1257         c = Collection()
1258         # Write a couple of small files,
1259         f = c.open("count.txt", "wb")
1260         f.write(b"0123456789")
1261         f.close(flush=False)
1262         foo = c.open("foo.txt", "wb")
1263         foo.write(b"foo")
1264         foo.close(flush=False)
1265
1266         self.assertEqual(
1267             c.manifest_text(),
1268             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1269
1270         f = c.open("count.txt", "rb+")
1271         f.write(b"abc")
1272         f.close(flush=False)
1273
1274         self.assertEqual(
1275             c.manifest_text(),
1276             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1277
1278
1279     def test_small_block_packing_with_overwrite(self):
1280         c = Collection()
1281         c.open("b1", "wb").close()
1282         c["b1"].writeto(0, b"b1", 0)
1283
1284         c.open("b2", "wb").close()
1285         c["b2"].writeto(0, b"b2", 0)
1286
1287         c["b1"].writeto(0, b"1b", 0)
1288
1289         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1290         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1291         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1292
1293
1294 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1295     MAIN_SERVER = {}
1296     KEEP_SERVER = {}
1297
1298     def create_count_txt(self):
1299         # Create an empty collection, save it to the API server, then write a
1300         # file, but don't save it.
1301
1302         c = Collection()
1303         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1304         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1305         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1306
1307         with c.open("count.txt", "wb") as f:
1308             f.write(b"0123456789")
1309
1310         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1311
1312         return c
1313
1314     def test_create_and_save(self):
1315         c = self.create_count_txt()
1316         c.save(properties={'type' : 'Intermediate'},
1317                storage_classes=['archive'],
1318                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1319
1320         self.assertRegex(
1321             c.manifest_text(),
1322             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1323         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1324         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1325         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1326
1327
1328     def test_create_and_save_new(self):
1329         c = self.create_count_txt()
1330         c.save_new(properties={'type' : 'Intermediate'},
1331                    storage_classes=['archive'],
1332                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1333
1334         self.assertRegex(
1335             c.manifest_text(),
1336             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1337         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1338         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1339         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1340
1341     def test_create_and_save_after_commiting(self):
1342         c = self.create_count_txt()
1343         c.save(properties={'type' : 'Intermediate'},
1344                storage_classes=['hot'],
1345                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1346         c.save(properties={'type' : 'Output'},
1347                storage_classes=['cold'],
1348                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1349
1350         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1351         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1352         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1353
1354     def test_create_diff_apply(self):
1355         c1 = self.create_count_txt()
1356         c1.save()
1357
1358         c2 = Collection(c1.manifest_locator())
1359         with c2.open("count.txt", "wb") as f:
1360             f.write(b"abcdefg")
1361
1362         diff = c1.diff(c2)
1363
1364         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1365
1366         c1.apply(diff)
1367         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1368
1369     def test_diff_apply_with_token(self):
1370         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1371         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1372         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1373
1374         diff = baseline.diff(other)
1375         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1376
1377         c.apply(diff)
1378
1379         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1380
1381
1382     def test_create_and_update(self):
1383         c1 = self.create_count_txt()
1384         c1.save()
1385
1386         c2 = arvados.collection.Collection(c1.manifest_locator())
1387         with c2.open("count.txt", "wb") as f:
1388             f.write(b"abcdefg")
1389
1390         c2.save()
1391
1392         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1393         c1.update()
1394         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1395
1396
1397     def test_create_and_update_with_conflict(self):
1398         c1 = self.create_count_txt()
1399         c1.save()
1400
1401         with c1.open("count.txt", "wb") as f:
1402             f.write(b"XYZ")
1403
1404         c2 = arvados.collection.Collection(c1.manifest_locator())
1405         with c2.open("count.txt", "wb") as f:
1406             f.write(b"abcdefg")
1407
1408         c2.save()
1409
1410         c1.update()
1411         self.assertRegex(
1412             c1.manifest_text(),
1413             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1414
1415     def test_pdh_is_native_str(self):
1416         c1 = self.create_count_txt()
1417         pdh = c1.portable_data_hash()
1418         self.assertEqual(type(''), type(pdh))
1419
1420
1421 if __name__ == '__main__':
1422     unittest.main()