Fix 2.4.2 upgrade notes formatting refs #19330
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import random
13 import re
14 import sys
15 import datetime
16 import ciso8601
17 import time
18 import unittest
19
20 from . import run_test_server
21 from arvados._ranges import Range, LocatorAndRange
22 from arvados.collection import Collection, CollectionReader
23 from . import arvados_testutil as tutil
24
25 class TestResumableWriter(arvados.ResumableCollectionWriter):
26     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
27
28     def current_state(self):
29         return self.dump_state(copy.deepcopy)
30
31
32 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
33                              tutil.ArvadosBaseTestCase):
34     MAIN_SERVER = {}
35
36     @classmethod
37     def setUpClass(cls):
38         super(ArvadosCollectionsTest, cls).setUpClass()
39         # need admin privileges to make collections with unsigned blocks
40         run_test_server.authorize_with('admin')
41         cls.api_client = arvados.api('v1')
42         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
43                                              local_store=cls.local_store)
44
45     def write_foo_bar_baz(self):
46         cw = arvados.CollectionWriter(self.api_client)
47         self.assertEqual(cw.current_stream_name(), '.',
48                          'current_stream_name() should be "." now')
49         cw.set_current_file_name('foo.txt')
50         cw.write(b'foo')
51         self.assertEqual(cw.current_file_name(), 'foo.txt',
52                          'current_file_name() should be foo.txt now')
53         cw.start_new_file('bar.txt')
54         cw.write(b'bar')
55         cw.start_new_stream('baz')
56         cw.write(b'baz')
57         cw.set_current_file_name('baz.txt')
58         self.assertEqual(cw.manifest_text(),
59                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
60                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
61                          "wrong manifest: got {}".format(cw.manifest_text()))
62         cw.save_new()
63         return cw.portable_data_hash()
64
65     def test_pdh_is_native_str(self):
66         pdh = self.write_foo_bar_baz()
67         self.assertEqual(type(''), type(pdh))
68
69     def test_keep_local_store(self):
70         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
71         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
72
73     def test_local_collection_writer(self):
74         self.assertEqual(self.write_foo_bar_baz(),
75                          '23ca013983d6239e98931cc779e68426+114',
76                          'wrong locator hash: ' + self.write_foo_bar_baz())
77
78     def test_local_collection_reader(self):
79         foobarbaz = self.write_foo_bar_baz()
80         cr = arvados.CollectionReader(
81             foobarbaz + '+Xzizzle', self.api_client)
82         got = []
83         for s in cr.all_streams():
84             for f in s.all_files():
85                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
86         expected = [[3, '.', 'foo.txt', b'foo'],
87                     [3, '.', 'bar.txt', b'bar'],
88                     [3, './baz', 'baz.txt', b'baz']]
89         self.assertEqual(got,
90                          expected)
91         stream0 = cr.all_streams()[0]
92         self.assertEqual(stream0.readfrom(0, 0),
93                          b'',
94                          'reading zero bytes should have returned empty string')
95         self.assertEqual(stream0.readfrom(0, 2**26),
96                          b'foobar',
97                          'reading entire stream failed')
98         self.assertEqual(stream0.readfrom(2**26, 0),
99                          b'',
100                          'reading zero bytes should have returned empty string')
101         self.assertEqual(3, len(cr))
102         self.assertTrue(cr)
103
104     def _test_subset(self, collection, expected):
105         cr = arvados.CollectionReader(collection, self.api_client)
106         for s in cr.all_streams():
107             for ex in expected:
108                 if ex[0] == s:
109                     f = s.files()[ex[2]]
110                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
111                     self.assertEqual(got,
112                                      ex,
113                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
114
115     def test_collection_manifest_subset(self):
116         foobarbaz = self.write_foo_bar_baz()
117         self._test_subset(foobarbaz,
118                           [[3, '.',     'bar.txt', b'bar'],
119                            [3, '.',     'foo.txt', b'foo'],
120                            [3, './baz', 'baz.txt', b'baz']])
121         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
122                            (self.keep_client.put(b"foo"),
123                             self.keep_client.put(b"bar"))),
124                           [[3, '.', 'bar.txt', b'bar'],
125                            [3, '.', 'foo.txt', b'foo']])
126         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
127                            (self.keep_client.put(b"foo"),
128                             self.keep_client.put(b"bar"))),
129                           [[2, '.', 'fo.txt', b'fo'],
130                            [4, '.', 'obar.txt', b'obar']])
131         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
132                            (self.keep_client.put(b"foo"),
133                             self.keep_client.put(b"bar"))),
134                           [[2, '.', 'ar.txt', b'ar'],
135                            [2, '.', 'fo.txt', b'fo'],
136                            [2, '.', 'ob.txt', b'ob'],
137                            [0, '.', 'zero.txt', b'']])
138
139     def test_collection_empty_file(self):
140         cw = arvados.CollectionWriter(self.api_client)
141         cw.start_new_file('zero.txt')
142         cw.write(b'')
143
144         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
145         self.check_manifest_file_sizes(cw.manifest_text(), [0])
146         cw = arvados.CollectionWriter(self.api_client)
147         cw.start_new_file('zero.txt')
148         cw.write(b'')
149         cw.start_new_file('one.txt')
150         cw.write(b'1')
151         cw.start_new_stream('foo')
152         cw.start_new_file('zero.txt')
153         cw.write(b'')
154         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
155
156     def test_no_implicit_normalize(self):
157         cw = arvados.CollectionWriter(self.api_client)
158         cw.start_new_file('b')
159         cw.write(b'b')
160         cw.start_new_file('a')
161         cw.write(b'')
162         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
163         self.check_manifest_file_sizes(
164             arvados.CollectionReader(
165                 cw.manifest_text()).manifest_text(normalize=True),
166             [0,1])
167
168     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
169         cr = arvados.CollectionReader(manifest_text, self.api_client)
170         got_sizes = []
171         for f in cr.all_files():
172             got_sizes += [f.size()]
173         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
174
175     def test_normalized_collection(self):
176         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
177 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
178 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
179 """
180         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
181                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
182 """)
183
184         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
185 """
186         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
187
188         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
189 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
190 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
191 """
192         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
193                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
194 """)
195
196         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
197 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
198 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
199 """
200         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
201                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
202 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
203 """)
204
205         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
206 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
207 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
208 """
209         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
210                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
211 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
212 """)
213
214         with self.data_file('1000G_ref_manifest') as f6:
215             m6 = f6.read()
216             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
217
218         with self.data_file('jlake_manifest') as f7:
219             m7 = f7.read()
220             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
221
222         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
223 """
224         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
225
226     def test_locators_and_ranges(self):
227         blocks2 = [Range('a', 0, 10),
228                    Range('b', 10, 10),
229                    Range('c', 20, 10),
230                    Range('d', 30, 10),
231                    Range('e', 40, 10),
232                    Range('f', 50, 10)]
233
234         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
235         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
236         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
237         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
238         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
242
243         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
247         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
251
252         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
256         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
258
259
260         blocks3 = [Range('a', 0, 10),
261                   Range('b', 10, 10),
262                   Range('c', 20, 10),
263                   Range('d', 30, 10),
264                   Range('e', 40, 10),
265                   Range('f', 50, 10),
266                    Range('g', 60, 10)]
267
268         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
269         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
270         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
271         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
272         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
275
276
277         blocks = [Range('a', 0, 10),
278                   Range('b', 10, 15),
279                   Range('c', 25, 5)]
280         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
281         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
282         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
283         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
284
285         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
286                                                                       LocatorAndRange('b', 15, 0, 1)])
287         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
288                                                                       LocatorAndRange('b', 15, 0, 2)])
289         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
290                                                                       LocatorAndRange('b', 15, 0, 15)])
291
292         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
293                                                                       LocatorAndRange('b', 15, 0, 15),
294                                                                       LocatorAndRange('c', 5, 0, 5)])
295         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
296                                                                       LocatorAndRange('b', 15, 0, 15),
297                                                                       LocatorAndRange('c', 5, 0, 5)])
298         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
299                                                                       LocatorAndRange('b', 15, 0, 15),
300                                                                       LocatorAndRange('c', 5, 0, 5)])
301
302         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
303
304         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
305                                                                       LocatorAndRange('b', 15, 0, 15)])
306
307         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
308                                                                       LocatorAndRange('b', 15, 0, 15),
309                                                                       LocatorAndRange('c', 5, 0, 3)])
310
311         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
312
313         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
314                                                                       LocatorAndRange('b', 15, 0, 14)])
315         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
316         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
317                                                                        LocatorAndRange('c', 5, 0, 1)])
318
319     class MockKeep(object):
320         def __init__(self, content, num_retries=0):
321             self.content = content
322
323         def get(self, locator, num_retries=0, prefetch=False):
324             return self.content[locator]
325
326     def test_stream_reader(self):
327         keepblocks = {
328             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
329             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
330             'cccccccccccccccccccccccccccccccc+5': b'z0123',
331         }
332         mk = self.MockKeep(keepblocks)
333
334         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
335
336         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
337
338         self.assertEqual(sr.readfrom(0, 30), content[0:30])
339         self.assertEqual(sr.readfrom(2, 30), content[2:30])
340
341         self.assertEqual(sr.readfrom(2, 8), content[2:10])
342         self.assertEqual(sr.readfrom(0, 10), content[0:10])
343
344         self.assertEqual(sr.readfrom(0, 5), content[0:5])
345         self.assertEqual(sr.readfrom(5, 5), content[5:10])
346         self.assertEqual(sr.readfrom(10, 5), content[10:15])
347         self.assertEqual(sr.readfrom(15, 5), content[15:20])
348         self.assertEqual(sr.readfrom(20, 5), content[20:25])
349         self.assertEqual(sr.readfrom(25, 5), content[25:30])
350         self.assertEqual(sr.readfrom(30, 5), b'')
351
352     def test_extract_file(self):
353         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
354 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
355 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
356 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
357 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
358 """
359
360         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
361
362         self.assertEqual(m2,
363                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
364         files = arvados.CollectionReader(
365             m2, self.api_client).all_streams()[0].files()
366
367         self.assertEqual(files['md5sum.txt'].as_manifest(),
368                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
369         self.assertEqual(files['md6sum.txt'].as_manifest(),
370                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
371         self.assertEqual(files['md7sum.txt'].as_manifest(),
372                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
373         self.assertEqual(files['md9sum.txt'].as_manifest(),
374                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
375
376     def test_write_directory_tree(self):
377         cwriter = arvados.CollectionWriter(self.api_client)
378         cwriter.write_directory_tree(self.build_directory_tree(
379                 ['basefile', 'subdir/subfile']))
380         self.assertEqual(cwriter.manifest_text(),
381                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
382 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
383
384     def test_write_named_directory_tree(self):
385         cwriter = arvados.CollectionWriter(self.api_client)
386         cwriter.write_directory_tree(self.build_directory_tree(
387                 ['basefile', 'subdir/subfile']), 'root')
388         self.assertEqual(
389             cwriter.manifest_text(),
390             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
391 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
392
393     def test_write_directory_tree_in_one_stream(self):
394         cwriter = arvados.CollectionWriter(self.api_client)
395         cwriter.write_directory_tree(self.build_directory_tree(
396                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
397         self.assertEqual(cwriter.manifest_text(),
398                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
399
400     def test_write_directory_tree_with_limited_recursion(self):
401         cwriter = arvados.CollectionWriter(self.api_client)
402         cwriter.write_directory_tree(
403             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
404             max_manifest_depth=1)
405         self.assertEqual(cwriter.manifest_text(),
406                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
407 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
408
409     def test_write_directory_tree_with_zero_recursion(self):
410         cwriter = arvados.CollectionWriter(self.api_client)
411         content = 'd1/d2/f3d1/f2f1'
412         blockhash = tutil.str_keep_locator(content)
413         cwriter.write_directory_tree(
414             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
415             max_manifest_depth=0)
416         self.assertEqual(
417             cwriter.manifest_text(),
418             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
419
420     def test_write_one_file(self):
421         cwriter = arvados.CollectionWriter(self.api_client)
422         with self.make_test_file() as testfile:
423             cwriter.write_file(testfile.name)
424             self.assertEqual(
425                 cwriter.manifest_text(),
426                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
427                     os.path.basename(testfile.name)))
428
429     def test_write_named_file(self):
430         cwriter = arvados.CollectionWriter(self.api_client)
431         with self.make_test_file() as testfile:
432             cwriter.write_file(testfile.name, 'foo')
433             self.assertEqual(cwriter.manifest_text(),
434                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
435
436     def test_write_multiple_files(self):
437         cwriter = arvados.CollectionWriter(self.api_client)
438         for letter in 'ABC':
439             with self.make_test_file(letter.encode()) as testfile:
440                 cwriter.write_file(testfile.name, letter)
441         self.assertEqual(
442             cwriter.manifest_text(),
443             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
444
445     def test_basic_resume(self):
446         cwriter = TestResumableWriter()
447         with self.make_test_file() as testfile:
448             cwriter.write_file(testfile.name, 'test')
449             resumed = TestResumableWriter.from_state(cwriter.current_state())
450         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
451                           "resumed CollectionWriter had different manifest")
452
453     def test_resume_fails_when_missing_dependency(self):
454         cwriter = TestResumableWriter()
455         with self.make_test_file() as testfile:
456             cwriter.write_file(testfile.name, 'test')
457         self.assertRaises(arvados.errors.StaleWriterStateError,
458                           TestResumableWriter.from_state,
459                           cwriter.current_state())
460
461     def test_resume_fails_when_dependency_mtime_changed(self):
462         cwriter = TestResumableWriter()
463         with self.make_test_file() as testfile:
464             cwriter.write_file(testfile.name, 'test')
465             os.utime(testfile.name, (0, 0))
466             self.assertRaises(arvados.errors.StaleWriterStateError,
467                               TestResumableWriter.from_state,
468                               cwriter.current_state())
469
470     def test_resume_fails_when_dependency_is_nonfile(self):
471         cwriter = TestResumableWriter()
472         cwriter.write_file('/dev/null', 'empty')
473         self.assertRaises(arvados.errors.StaleWriterStateError,
474                           TestResumableWriter.from_state,
475                           cwriter.current_state())
476
477     def test_resume_fails_when_dependency_size_changed(self):
478         cwriter = TestResumableWriter()
479         with self.make_test_file() as testfile:
480             cwriter.write_file(testfile.name, 'test')
481             orig_mtime = os.fstat(testfile.fileno()).st_mtime
482             testfile.write(b'extra')
483             testfile.flush()
484             os.utime(testfile.name, (orig_mtime, orig_mtime))
485             self.assertRaises(arvados.errors.StaleWriterStateError,
486                               TestResumableWriter.from_state,
487                               cwriter.current_state())
488
489     def test_resume_fails_with_expired_locator(self):
490         cwriter = TestResumableWriter()
491         state = cwriter.current_state()
492         # Add an expired locator to the state.
493         state['_current_stream_locators'].append(''.join([
494                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
495         self.assertRaises(arvados.errors.StaleWriterStateError,
496                           TestResumableWriter.from_state, state)
497
498     def test_arbitrary_objects_not_resumable(self):
499         cwriter = TestResumableWriter()
500         with open('/dev/null') as badfile:
501             self.assertRaises(arvados.errors.AssertionError,
502                               cwriter.write_file, badfile)
503
504     def test_arbitrary_writes_not_resumable(self):
505         cwriter = TestResumableWriter()
506         self.assertRaises(arvados.errors.AssertionError,
507                           cwriter.write, "badtext")
508
509
510 class CollectionTestMixin(tutil.ApiClientMock):
511     API_COLLECTIONS = run_test_server.fixture('collections')
512     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
513     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
514     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
515     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
516     ALT_COLLECTION = API_COLLECTIONS['bar_file']
517     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
518     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
519
520     def api_client_mock(self, status=200):
521         client = super(CollectionTestMixin, self).api_client_mock()
522         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
523         return client
524
525
526 @tutil.skip_sleep
527 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
528     def mock_get_collection(self, api_mock, code, fixturename):
529         body = self.API_COLLECTIONS.get(fixturename)
530         self._mock_api_call(api_mock.collections().get, code, body)
531
532     def api_client_mock(self, status=200):
533         client = super(CollectionReaderTestCase, self).api_client_mock()
534         self.mock_get_collection(client, status, 'foo_file')
535         return client
536
537     def test_init_no_default_retries(self):
538         client = self.api_client_mock(200)
539         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
540         reader.manifest_text()
541         client.collections().get().execute.assert_called_with(num_retries=0)
542
543     def test_uuid_init_success(self):
544         client = self.api_client_mock(200)
545         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
546                                           num_retries=3)
547         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
548                          reader.manifest_text())
549         client.collections().get().execute.assert_called_with(num_retries=3)
550
551     def test_uuid_init_failure_raises_api_error(self):
552         client = self.api_client_mock(500)
553         with self.assertRaises(arvados.errors.ApiError):
554             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
555
556     def test_locator_init(self):
557         client = self.api_client_mock(200)
558         # Ensure Keep will not return anything if asked.
559         with tutil.mock_keep_responses(None, 404):
560             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
561                                               api_client=client)
562             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
563
564     def test_init_no_fallback_to_keep(self):
565         # Do not look up a collection UUID or PDH in Keep.
566         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
567             client = self.api_client_mock(404)
568             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
569                 with self.assertRaises(arvados.errors.ApiError):
570                     reader = arvados.CollectionReader(key, api_client=client)
571
572     def test_init_num_retries_propagated(self):
573         # More of an integration test...
574         client = self.api_client_mock(200)
575         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
576                                           num_retries=3)
577         with tutil.mock_keep_responses('foo', 500, 500, 200):
578             self.assertEqual(b'foo',
579                              b''.join(f.read(9) for f in reader.all_files()))
580
581     def test_read_nonnormalized_manifest_with_collection_reader(self):
582         # client should be able to use CollectionReader on a manifest without normalizing it
583         client = self.api_client_mock(500)
584         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
585         reader = arvados.CollectionReader(
586             nonnormal,
587             api_client=client, num_retries=0)
588         # Ensure stripped_manifest() doesn't mangle our manifest in
589         # any way other than stripping hints.
590         self.assertEqual(
591             re.sub('\+[^\d\s\+]+', '', nonnormal),
592             reader.stripped_manifest())
593         # Ensure stripped_manifest() didn't mutate our reader.
594         self.assertEqual(nonnormal, reader.manifest_text())
595         # Ensure the files appear in the order given in the manifest.
596         self.assertEqual(
597             [[6, '.', 'foo.txt'],
598              [0, '.', 'bar.txt']],
599             [[f.size(), f.stream_name(), f.name()]
600              for f in reader.all_streams()[0].all_files()])
601
602     def test_read_empty_collection(self):
603         client = self.api_client_mock(200)
604         self.mock_get_collection(client, 200, 'empty')
605         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
606                                           api_client=client)
607         self.assertEqual('', reader.manifest_text())
608         self.assertEqual(0, len(reader))
609         self.assertFalse(reader)
610
611     def test_api_response(self):
612         client = self.api_client_mock()
613         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
614         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
615
616     def check_open_file(self, coll_file, stream_name, file_name, file_size):
617         self.assertFalse(coll_file.closed, "returned file is not open")
618         self.assertEqual(stream_name, coll_file.stream_name())
619         self.assertEqual(file_name, coll_file.name)
620         self.assertEqual(file_size, coll_file.size())
621
622     def test_open_collection_file_one_argument(self):
623         client = self.api_client_mock(200)
624         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
625         cfile = reader.open('./foo', 'rb')
626         self.check_open_file(cfile, '.', 'foo', 3)
627
628     def test_open_deep_file(self):
629         coll_name = 'collection_with_files_in_subdir'
630         client = self.api_client_mock(200)
631         self.mock_get_collection(client, 200, coll_name)
632         reader = arvados.CollectionReader(
633             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
634         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
635         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
636                              32)
637
638     def test_open_nonexistent_stream(self):
639         client = self.api_client_mock(200)
640         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
641         self.assertRaises(IOError, reader.open, './nonexistent/foo')
642
643     def test_open_nonexistent_file(self):
644         client = self.api_client_mock(200)
645         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
646         self.assertRaises(IOError, reader.open, 'nonexistent')
647
648
649 @tutil.skip_sleep
650 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
651     def mock_keep(self, body, *codes, **headers):
652         headers.setdefault('x-keep-replicas-stored', 2)
653         return tutil.mock_keep_responses(body, *codes, **headers)
654
655     def foo_writer(self, **kwargs):
656         kwargs.setdefault('api_client', self.api_client_mock())
657         writer = arvados.CollectionWriter(**kwargs)
658         writer.start_new_file('foo')
659         writer.write(b'foo')
660         return writer
661
662     def test_write_whole_collection(self):
663         writer = self.foo_writer()
664         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
665             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
666
667     def test_write_no_default(self):
668         writer = self.foo_writer()
669         with self.mock_keep(None, 500):
670             with self.assertRaises(arvados.errors.KeepWriteError):
671                 writer.finish()
672
673     def test_write_insufficient_replicas_via_proxy(self):
674         writer = self.foo_writer(replication=3)
675         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
676             with self.assertRaises(arvados.errors.KeepWriteError):
677                 writer.manifest_text()
678
679     def test_write_insufficient_replicas_via_disks(self):
680         client = mock.MagicMock(name='api_client')
681         with self.mock_keep(
682                 None, 200, 200,
683                 **{'x-keep-replicas-stored': 1}) as keepmock:
684             self.mock_keep_services(client, status=200, service_type='disk', count=2)
685             writer = self.foo_writer(api_client=client, replication=3)
686             with self.assertRaises(arvados.errors.KeepWriteError):
687                 writer.manifest_text()
688
689     def test_write_three_replicas(self):
690         client = mock.MagicMock(name='api_client')
691         with self.mock_keep(
692                 "", 500, 500, 500, 200, 200, 200,
693                 **{'x-keep-replicas-stored': 1}) as keepmock:
694             self.mock_keep_services(client, status=200, service_type='disk', count=6)
695             writer = self.foo_writer(api_client=client, replication=3)
696             writer.manifest_text()
697             self.assertEqual(6, keepmock.call_count)
698
699     def test_write_whole_collection_through_retries(self):
700         writer = self.foo_writer(num_retries=2)
701         with self.mock_keep(self.DEFAULT_DATA_HASH,
702                             500, 500, 200, 500, 500, 200):
703             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
704
705     def test_flush_data_retries(self):
706         writer = self.foo_writer(num_retries=2)
707         foo_hash = self.DEFAULT_MANIFEST.split()[1]
708         with self.mock_keep(foo_hash, 500, 200):
709             writer.flush_data()
710         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
711
712     def test_one_open(self):
713         client = self.api_client_mock()
714         writer = arvados.CollectionWriter(client)
715         with writer.open('out') as out_file:
716             self.assertEqual('.', writer.current_stream_name())
717             self.assertEqual('out', writer.current_file_name())
718             out_file.write(b'test data')
719             data_loc = tutil.str_keep_locator('test data')
720         self.assertTrue(out_file.closed, "writer file not closed after context")
721         self.assertRaises(ValueError, out_file.write, 'extra text')
722         with self.mock_keep(data_loc, 200) as keep_mock:
723             self.assertEqual(". {} 0:9:out\n".format(data_loc),
724                              writer.manifest_text())
725
726     def test_open_writelines(self):
727         client = self.api_client_mock()
728         writer = arvados.CollectionWriter(client)
729         with writer.open('six') as out_file:
730             out_file.writelines(['12', '34', '56'])
731             data_loc = tutil.str_keep_locator('123456')
732         with self.mock_keep(data_loc, 200) as keep_mock:
733             self.assertEqual(". {} 0:6:six\n".format(data_loc),
734                              writer.manifest_text())
735
736     def test_open_flush(self):
737         client = self.api_client_mock()
738         data_loc1 = tutil.str_keep_locator('flush1')
739         data_loc2 = tutil.str_keep_locator('flush2')
740         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
741             writer = arvados.CollectionWriter(client)
742             with writer.open('flush_test') as out_file:
743                 out_file.write(b'flush1')
744                 out_file.flush()
745                 out_file.write(b'flush2')
746             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
747                                                                 data_loc2),
748                              writer.manifest_text())
749
750     def test_two_opens_same_stream(self):
751         client = self.api_client_mock()
752         writer = arvados.CollectionWriter(client)
753         with writer.open('.', '1') as out_file:
754             out_file.write(b'1st')
755         with writer.open('.', '2') as out_file:
756             out_file.write(b'2nd')
757         data_loc = tutil.str_keep_locator('1st2nd')
758         with self.mock_keep(data_loc, 200) as keep_mock:
759             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
760                              writer.manifest_text())
761
762     def test_two_opens_two_streams(self):
763         client = self.api_client_mock()
764         data_loc1 = tutil.str_keep_locator('file')
765         data_loc2 = tutil.str_keep_locator('indir')
766         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
767             writer = arvados.CollectionWriter(client)
768             with writer.open('file') as out_file:
769                 out_file.write(b'file')
770             with writer.open('./dir', 'indir') as out_file:
771                 out_file.write(b'indir')
772             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
773                 data_loc1, data_loc2)
774             self.assertEqual(expected, writer.manifest_text())
775
776     def test_dup_open_fails(self):
777         client = self.api_client_mock()
778         writer = arvados.CollectionWriter(client)
779         file1 = writer.open('one')
780         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
781
782
783 class CollectionMethods(run_test_server.TestCaseWithServers):
784
785     def test_keys_values_items_support_indexing(self):
786         c = Collection()
787         with c.open('foo', 'wb') as f:
788             f.write(b'foo')
789         with c.open('bar', 'wb') as f:
790             f.write(b'bar')
791         self.assertEqual(2, len(c.keys()))
792         if sys.version_info < (3, 0):
793             # keys() supports indexing only for python2 callers.
794             fn0 = c.keys()[0]
795             fn1 = c.keys()[1]
796         else:
797             fn0, fn1 = c.keys()
798         self.assertEqual(2, len(c.values()))
799         f0 = c.values()[0]
800         f1 = c.values()[1]
801         self.assertEqual(2, len(c.items()))
802         self.assertEqual(fn0, c.items()[0][0])
803         self.assertEqual(fn1, c.items()[1][0])
804
805     def test_get_properties(self):
806         c = Collection()
807         self.assertEqual(c.get_properties(), {})
808         c.save_new(properties={"foo":"bar"})
809         self.assertEqual(c.get_properties(), {"foo":"bar"})
810
811     def test_get_trash_at(self):
812         c = Collection()
813         self.assertEqual(c.get_trash_at(), None)
814         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
815         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
816
817
818 class CollectionOpenModes(run_test_server.TestCaseWithServers):
819
820     def test_open_binary_modes(self):
821         c = Collection()
822         for mode in ['wb', 'wb+', 'ab', 'ab+']:
823             with c.open('foo', mode) as f:
824                 f.write(b'foo')
825
826     def test_open_invalid_modes(self):
827         c = Collection()
828         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
829             with self.assertRaises(Exception):
830                 c.open('foo', mode)
831
832     def test_open_text_modes(self):
833         c = Collection()
834         with c.open('foo', 'wb') as f:
835             f.write('foo')
836         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
837             with c.open('foo', mode) as f:
838                 if mode[0] == 'r' and '+' not in mode:
839                     self.assertEqual('foo', f.read(3))
840                 else:
841                     f.write('bar')
842                     f.seek(0, os.SEEK_SET)
843                     self.assertEqual('bar', f.read(3))
844
845
846 class TextModes(run_test_server.TestCaseWithServers):
847
848     def setUp(self):
849         arvados.config.KEEP_BLOCK_SIZE = 4
850         if sys.version_info < (3, 0):
851             import unicodedata
852             self.sailboat = unicodedata.lookup('SAILBOAT')
853             self.snowman = unicodedata.lookup('SNOWMAN')
854         else:
855             self.sailboat = '\N{SAILBOAT}'
856             self.snowman = '\N{SNOWMAN}'
857
858     def tearDown(self):
859         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
860
861     def test_read_sailboat_across_block_boundary(self):
862         c = Collection()
863         f = c.open('sailboats', 'wb')
864         data = self.sailboat.encode('utf-8')
865         f.write(data)
866         f.write(data[:1])
867         f.write(data[1:])
868         f.write(b'\n')
869         f.close()
870         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
871
872         f = c.open('sailboats', 'r')
873         string = f.readline()
874         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
875         f.close()
876
877     def test_write_snowman_across_block_boundary(self):
878         c = Collection()
879         f = c.open('snowmany', 'w')
880         data = self.snowman
881         f.write(data+data+'\n'+data+'\n')
882         f.close()
883         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
884
885         f = c.open('snowmany', 'r')
886         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
887         self.assertEqual(f.readline(), self.snowman+'\n')
888         f.close()
889
890
891 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
892
893     def test_replication_desired_kept_on_load(self):
894         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
895         c1 = Collection(m, replication_desired=1)
896         c1.save_new()
897         loc = c1.manifest_locator()
898         c2 = Collection(loc)
899         self.assertEqual(c1.manifest_text, c2.manifest_text)
900         self.assertEqual(c1.replication_desired, c2.replication_desired)
901
902     def test_replication_desired_not_loaded_if_provided(self):
903         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
904         c1 = Collection(m, replication_desired=1)
905         c1.save_new()
906         loc = c1.manifest_locator()
907         c2 = Collection(loc, replication_desired=2)
908         self.assertEqual(c1.manifest_text, c2.manifest_text)
909         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
910
911     def test_storage_classes_desired_kept_on_load(self):
912         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
913         c1 = Collection(m, storage_classes_desired=['archival'])
914         c1.save_new()
915         loc = c1.manifest_locator()
916         c2 = Collection(loc)
917         self.assertEqual(c1.manifest_text, c2.manifest_text)
918         self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
919
920     def test_storage_classes_change_after_save(self):
921         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
922         c1 = Collection(m, storage_classes_desired=['archival'])
923         c1.save_new()
924         loc = c1.manifest_locator()
925         c2 = Collection(loc)
926         self.assertEqual(['archival'], c2.storage_classes_desired())
927         c2.save(storage_classes=['highIO'])
928         self.assertEqual(['highIO'], c2.storage_classes_desired())
929         c3 = Collection(loc)
930         self.assertEqual(c1.manifest_text, c3.manifest_text)
931         self.assertEqual(['highIO'], c3.storage_classes_desired())
932
933     def test_storage_classes_desired_not_loaded_if_provided(self):
934         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
935         c1 = Collection(m, storage_classes_desired=['archival'])
936         c1.save_new()
937         loc = c1.manifest_locator()
938         c2 = Collection(loc, storage_classes_desired=['default'])
939         self.assertEqual(c1.manifest_text, c2.manifest_text)
940         self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
941
942     def test_init_manifest(self):
943         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
944 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
945 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
946 """
947         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
948         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
949
950     def test_init_manifest_with_collision(self):
951         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
952 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
953 """
954         with self.assertRaises(arvados.errors.ArgumentError):
955             self.assertEqual(m1, CollectionReader(m1))
956
957     def test_init_manifest_with_error(self):
958         m1 = """. 0:43:md5sum.txt"""
959         with self.assertRaises(arvados.errors.ArgumentError):
960             self.assertEqual(m1, CollectionReader(m1))
961
962     def test_remove(self):
963         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
964         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
965         self.assertIn("count1.txt", c)
966         c.remove("count1.txt")
967         self.assertNotIn("count1.txt", c)
968         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
969         with self.assertRaises(arvados.errors.ArgumentError):
970             c.remove("")
971
972     def test_remove_recursive(self):
973         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
974         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
975         self.assertIn("a", c)
976         self.assertEqual(1, len(c["a"].keys()))
977         # cannot remove non-empty directory with default recursive=False
978         with self.assertRaises(OSError):
979             c.remove("a/b")
980         with self.assertRaises(OSError):
981             c.remove("a/b/c/d")
982         c.remove("a/b", recursive=True)
983         self.assertEqual(0, len(c["a"].keys()))
984         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
985
986     def test_find(self):
987         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
988         self.assertIs(c.find("."), c)
989         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
990         self.assertIs(c.find("count1.txt"), c["count1.txt"])
991         with self.assertRaises(IOError):
992             c.find("/.")
993         with self.assertRaises(arvados.errors.ArgumentError):
994             c.find("")
995         self.assertIs(c.find("./nonexistant.txt"), None)
996         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
997
998     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
999         # Dir & file names are literally '\056' (escaped form: \134056)
1000         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1001         c = Collection(manifest)
1002         self.assertEqual(c.portable_manifest_text(), manifest)
1003
1004     def test_other_special_chars_on_file_token(self):
1005         cases = [
1006             ('\\000', '\0'),
1007             ('\\011', '\t'),
1008             ('\\012', '\n'),
1009             ('\\072', ':'),
1010             ('\\134400', '\\400'),
1011         ]
1012         for encoded, decoded in cases:
1013             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
1014             c = Collection(manifest)
1015             self.assertEqual(c.portable_manifest_text(), manifest)
1016             self.assertIn('some%sfile.txt' % decoded, c.keys())
1017
1018     def test_escaped_paths_do_get_unescaped_on_listing(self):
1019         # Dir & file names are literally '\056' (escaped form: \134056)
1020         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1021         c = Collection(manifest)
1022         self.assertIn('\\056 Test', c.keys())
1023         self.assertIn('\\056', c['\\056 Test'].keys())
1024
1025     def test_make_empty_dir_with_escaped_chars(self):
1026         c = Collection()
1027         c.mkdirs('./Empty\\056Dir')
1028         self.assertEqual(c.portable_manifest_text(),
1029                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1030
1031     def test_make_empty_dir_with_spaces(self):
1032         c = Collection()
1033         c.mkdirs('./foo bar/baz waz')
1034         self.assertEqual(c.portable_manifest_text(),
1035                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1036
1037     def test_remove_in_subdir(self):
1038         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1039         c.remove("foo/count2.txt")
1040         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1041
1042     def test_remove_empty_subdir(self):
1043         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1044         c.remove("foo/count2.txt")
1045         c.remove("foo")
1046         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1047
1048     def test_remove_nonempty_subdir(self):
1049         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1050         with self.assertRaises(IOError):
1051             c.remove("foo")
1052         c.remove("foo", recursive=True)
1053         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1054
1055     def test_copy_to_file_in_dir(self):
1056         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1057         c.copy("count1.txt", "foo/count2.txt")
1058         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1059
1060     def test_copy_file(self):
1061         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1062         c.copy("count1.txt", "count2.txt")
1063         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1064
1065     def test_copy_to_existing_dir(self):
1066         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1067         c.copy("count1.txt", "foo")
1068         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1069
1070     def test_copy_to_new_dir(self):
1071         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1072         c.copy("count1.txt", "foo/")
1073         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1074
1075     def test_rename_file(self):
1076         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1077         c.rename("count1.txt", "count2.txt")
1078         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1079
1080     def test_move_file_to_dir(self):
1081         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1082         c.mkdirs("foo")
1083         c.rename("count1.txt", "foo/count2.txt")
1084         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1085
1086     def test_move_file_to_other(self):
1087         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1088         c2 = Collection()
1089         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1090         self.assertEqual("", c1.manifest_text())
1091         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1092
1093     def test_clone(self):
1094         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1095         cl = c.clone()
1096         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1097
1098     def test_diff_del_add(self):
1099         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1100         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1101         d = c2.diff(c1)
1102         self.assertEqual(sorted(d), [
1103             ('add', './count1.txt', c1["count1.txt"]),
1104             ('del', './count2.txt', c2["count2.txt"]),
1105         ])
1106         d = c1.diff(c2)
1107         self.assertEqual(sorted(d), [
1108             ('add', './count2.txt', c2["count2.txt"]),
1109             ('del', './count1.txt', c1["count1.txt"]),
1110         ])
1111         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1112         c1.apply(d)
1113         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1114
1115     def test_diff_same(self):
1116         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1117         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1118         d = c2.diff(c1)
1119         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1120         d = c1.diff(c2)
1121         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1122
1123         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1124         c1.apply(d)
1125         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1126
1127     def test_diff_mod(self):
1128         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1129         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1130         d = c2.diff(c1)
1131         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1132         d = c1.diff(c2)
1133         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1134
1135         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1136         c1.apply(d)
1137         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1138
1139     def test_diff_add(self):
1140         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1141         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1142         d = c2.diff(c1)
1143         self.assertEqual(sorted(d), [
1144             ('del', './count2.txt', c2["count2.txt"]),
1145             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1146         ])
1147         d = c1.diff(c2)
1148         self.assertEqual(sorted(d), [
1149             ('add', './count2.txt', c2["count2.txt"]),
1150             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1151         ])
1152
1153         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1154         c1.apply(d)
1155         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1156
1157     def test_diff_add_in_subcollection(self):
1158         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1159         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1160         d = c2.diff(c1)
1161         self.assertEqual(sorted(d), [
1162             ('del', './foo', c2["foo"]),
1163             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1164         ])
1165         d = c1.diff(c2)
1166         self.assertEqual(sorted(d), [
1167             ('add', './foo', c2["foo"]),
1168             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1169         ])
1170         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1171         c1.apply(d)
1172         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1173
1174     def test_diff_del_add_in_subcollection(self):
1175         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1176         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1177         d = c2.diff(c1)
1178         self.assertEqual(sorted(d), [
1179             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1180             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1181             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1182         ])
1183         d = c1.diff(c2)
1184         self.assertEqual(sorted(d), [
1185             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1186             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1187             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1188         ])
1189
1190         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1191         c1.apply(d)
1192         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1193
1194     def test_diff_mod_in_subcollection(self):
1195         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1196         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1197         d = c2.diff(c1)
1198         self.assertEqual(sorted(d), [
1199             ('mod', './foo', c2["foo"], c1["foo"]),
1200             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1201         ])
1202         d = c1.diff(c2)
1203         self.assertEqual(sorted(d), [
1204             ('mod', './foo', c1["foo"], c2["foo"]),
1205             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1206         ])
1207
1208         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1209         c1.apply(d)
1210         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1211
1212     def test_conflict_keep_local_change(self):
1213         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1214         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1215         d = c1.diff(c2)
1216         self.assertEqual(sorted(d), [
1217             ('add', './count2.txt', c2["count2.txt"]),
1218             ('del', './count1.txt', c1["count1.txt"]),
1219         ])
1220         f = c1.open("count1.txt", "wb")
1221         f.write(b"zzzzz")
1222
1223         # c1 changed, so it should not be deleted.
1224         c1.apply(d)
1225         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1226
1227     def test_conflict_mod(self):
1228         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1229         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1230         d = c1.diff(c2)
1231         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1232         f = c1.open("count1.txt", "wb")
1233         f.write(b"zzzzz")
1234
1235         # c1 changed, so c2 mod will go to a conflict file
1236         c1.apply(d)
1237         self.assertRegex(
1238             c1.portable_manifest_text(),
1239             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1240
1241     def test_conflict_add(self):
1242         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1243         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1244         d = c1.diff(c2)
1245         self.assertEqual(sorted(d), [
1246             ('add', './count1.txt', c2["count1.txt"]),
1247             ('del', './count2.txt', c1["count2.txt"]),
1248         ])
1249         f = c1.open("count1.txt", "wb")
1250         f.write(b"zzzzz")
1251
1252         # c1 added count1.txt, so c2 add will go to a conflict file
1253         c1.apply(d)
1254         self.assertRegex(
1255             c1.portable_manifest_text(),
1256             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1257
1258     def test_conflict_del(self):
1259         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1260         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1261         d = c1.diff(c2)
1262         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1263         c1.remove("count1.txt")
1264
1265         # c1 deleted, so c2 mod will go to a conflict file
1266         c1.apply(d)
1267         self.assertRegex(
1268             c1.portable_manifest_text(),
1269             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1270
1271     def test_notify(self):
1272         c1 = Collection()
1273         events = []
1274         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1275         f = c1.open("foo.txt", "wb")
1276         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1277
1278     def test_open_w(self):
1279         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1280         self.assertEqual(c1["count1.txt"].size(), 10)
1281         c1.open("count1.txt", "wb").close()
1282         self.assertEqual(c1["count1.txt"].size(), 0)
1283
1284
1285 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1286     MAIN_SERVER = {}
1287     KEEP_SERVER = {}
1288     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1289     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1290
1291     def setUp(self):
1292         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1293
1294     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1295     def test_storage_classes_desired(self, put_mock):
1296         put_mock.side_effect = self.keep_put
1297         c = Collection(storage_classes_desired=['default'])
1298         with c.open("file.txt", 'wb') as f:
1299             f.write('content')
1300         c.save_new()
1301         _, kwargs = put_mock.call_args
1302         self.assertEqual(['default'], kwargs['classes'])
1303
1304     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1305     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1306         '''
1307         Make sure that those blocks that are committed after repacking small ones,
1308         get their permission tokens assigned on the collection manifest.
1309         '''
1310         def wrapped_keep_put(*args, **kwargs):
1311             # Simulate slow put operations
1312             time.sleep(1)
1313             return self.keep_put(*args, **kwargs)
1314
1315         mocked_put.side_effect = wrapped_keep_put
1316         c = Collection()
1317         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1318         # small ones before finishing the upload.
1319         for i in range(70):
1320             f = c.open("file_{}.txt".format(i), 'wb')
1321             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1322             f.close(flush=False)
1323         # We should get 2 blocks with their tokens
1324         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1325
1326     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1327     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1328         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1329         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1330         rs_mock.return_value = local_block_loc
1331         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1332         self.assertEqual(
1333             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1334         self.assertEqual(
1335             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1336         c.save_new()
1337         rs_mock.assert_called()
1338         self.assertEqual(
1339             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1340         self.assertEqual(
1341             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1342
1343     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1344     def test_copy_remote_blocks_on_save(self, rs_mock):
1345         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1346         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1347         rs_mock.return_value = local_block_loc
1348         # Remote collection
1349         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1350         self.assertEqual(
1351             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1352         # Local collection
1353         local_c = Collection()
1354         with local_c.open('barfile.txt', 'wb') as f:
1355             f.write('bar')
1356         local_c.save_new()
1357         self.assertEqual(
1358             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1359         self.assertEqual(
1360             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1361         # Copy remote file to local collection
1362         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1363         self.assertEqual(
1364             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1365         self.assertEqual(
1366             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1367         # Save local collection: remote block should be copied
1368         local_c.save()
1369         rs_mock.assert_called()
1370         self.assertEqual(
1371             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1372         self.assertEqual(
1373             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1374
1375
1376 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1377     def test_preserve_version_on_save(self):
1378         c = Collection()
1379         c.save_new(preserve_version=True)
1380         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1381         self.assertEqual(coll_record['version'], 1)
1382         self.assertEqual(coll_record['preserve_version'], True)
1383         with c.open("foo.txt", "wb") as foo:
1384             foo.write(b"foo")
1385         c.save(preserve_version=True)
1386         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1387         self.assertEqual(coll_record['version'], 2)
1388         self.assertEqual(coll_record['preserve_version'], True)
1389         with c.open("bar.txt", "wb") as foo:
1390             foo.write(b"bar")
1391         c.save(preserve_version=False)
1392         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1393         self.assertEqual(coll_record['version'], 3)
1394         self.assertEqual(coll_record['preserve_version'], False)
1395
1396     def test_get_manifest_text_only_committed(self):
1397         c = Collection()
1398         with c.open("count.txt", "wb") as f:
1399             # One file committed
1400             with c.open("foo.txt", "wb") as foo:
1401                 foo.write(b"foo")
1402                 foo.flush() # Force block commit
1403             f.write(b"0123456789")
1404             # Other file not committed. Block not written to keep yet.
1405             self.assertEqual(
1406                 c._get_manifest_text(".",
1407                                      strip=False,
1408                                      normalize=False,
1409                                      only_committed=True),
1410                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1411             # And now with the file closed...
1412             f.flush() # Force block commit
1413         self.assertEqual(
1414             c._get_manifest_text(".",
1415                                  strip=False,
1416                                  normalize=False,
1417                                  only_committed=True),
1418             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1419
1420     def test_only_small_blocks_are_packed_together(self):
1421         c = Collection()
1422         # Write a couple of small files,
1423         f = c.open("count.txt", "wb")
1424         f.write(b"0123456789")
1425         f.close(flush=False)
1426         foo = c.open("foo.txt", "wb")
1427         foo.write(b"foo")
1428         foo.close(flush=False)
1429         # Then, write a big file, it shouldn't be packed with the ones above
1430         big = c.open("bigfile.txt", "wb")
1431         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1432         big.close(flush=False)
1433         self.assertEqual(
1434             c.manifest_text("."),
1435             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1436
1437     def test_flush_after_small_block_packing(self):
1438         c = Collection()
1439         # Write a couple of small files,
1440         f = c.open("count.txt", "wb")
1441         f.write(b"0123456789")
1442         f.close(flush=False)
1443         foo = c.open("foo.txt", "wb")
1444         foo.write(b"foo")
1445         foo.close(flush=False)
1446
1447         self.assertEqual(
1448             c.manifest_text(),
1449             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1450
1451         f = c.open("count.txt", "rb+")
1452         f.close(flush=True)
1453
1454         self.assertEqual(
1455             c.manifest_text(),
1456             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1457
1458     def test_write_after_small_block_packing2(self):
1459         c = Collection()
1460         # Write a couple of small files,
1461         f = c.open("count.txt", "wb")
1462         f.write(b"0123456789")
1463         f.close(flush=False)
1464         foo = c.open("foo.txt", "wb")
1465         foo.write(b"foo")
1466         foo.close(flush=False)
1467
1468         self.assertEqual(
1469             c.manifest_text(),
1470             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1471
1472         f = c.open("count.txt", "rb+")
1473         f.write(b"abc")
1474         f.close(flush=False)
1475
1476         self.assertEqual(
1477             c.manifest_text(),
1478             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1479
1480
1481     def test_small_block_packing_with_overwrite(self):
1482         c = Collection()
1483         c.open("b1", "wb").close()
1484         c["b1"].writeto(0, b"b1", 0)
1485
1486         c.open("b2", "wb").close()
1487         c["b2"].writeto(0, b"b2", 0)
1488
1489         c["b1"].writeto(0, b"1b", 0)
1490
1491         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1492         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1493         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1494
1495
1496 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1497     MAIN_SERVER = {}
1498     KEEP_SERVER = {}
1499
1500     def create_count_txt(self):
1501         # Create an empty collection, save it to the API server, then write a
1502         # file, but don't save it.
1503
1504         c = Collection()
1505         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1506         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1507         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1508
1509         with c.open("count.txt", "wb") as f:
1510             f.write(b"0123456789")
1511
1512         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1513
1514         return c
1515
1516     def test_create_and_save(self):
1517         c = self.create_count_txt()
1518         c.save(properties={'type' : 'Intermediate'},
1519                storage_classes=['archive'],
1520                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1521
1522         self.assertRegex(
1523             c.manifest_text(),
1524             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1525         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1526         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1527         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1528
1529
1530     def test_create_and_save_new(self):
1531         c = self.create_count_txt()
1532         c.save_new(properties={'type' : 'Intermediate'},
1533                    storage_classes=['archive'],
1534                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1535
1536         self.assertRegex(
1537             c.manifest_text(),
1538             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1539         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1540         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1541         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1542
1543     def test_create_and_save_after_commiting(self):
1544         c = self.create_count_txt()
1545         c.save(properties={'type' : 'Intermediate'},
1546                storage_classes=['hot'],
1547                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1548         c.save(properties={'type' : 'Output'},
1549                storage_classes=['cold'],
1550                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1551
1552         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1553         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1554         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1555
1556     def test_create_diff_apply(self):
1557         c1 = self.create_count_txt()
1558         c1.save()
1559
1560         c2 = Collection(c1.manifest_locator())
1561         with c2.open("count.txt", "wb") as f:
1562             f.write(b"abcdefg")
1563
1564         diff = c1.diff(c2)
1565
1566         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1567
1568         c1.apply(diff)
1569         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1570
1571     def test_diff_apply_with_token(self):
1572         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1573         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1574         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1575
1576         diff = baseline.diff(other)
1577         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1578
1579         c.apply(diff)
1580
1581         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1582
1583
1584     def test_create_and_update(self):
1585         c1 = self.create_count_txt()
1586         c1.save()
1587
1588         c2 = arvados.collection.Collection(c1.manifest_locator())
1589         with c2.open("count.txt", "wb") as f:
1590             f.write(b"abcdefg")
1591
1592         c2.save()
1593
1594         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1595         c1.update()
1596         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1597
1598
1599     def test_create_and_update_with_conflict(self):
1600         c1 = self.create_count_txt()
1601         c1.save()
1602
1603         with c1.open("count.txt", "wb") as f:
1604             f.write(b"XYZ")
1605
1606         c2 = arvados.collection.Collection(c1.manifest_locator())
1607         with c2.open("count.txt", "wb") as f:
1608             f.write(b"abcdefg")
1609
1610         c2.save()
1611
1612         c1.update()
1613         self.assertRegex(
1614             c1.manifest_text(),
1615             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1616
1617     def test_pdh_is_native_str(self):
1618         c1 = self.create_count_txt()
1619         pdh = c1.portable_data_hash()
1620         self.assertEqual(type(''), type(pdh))
1621
1622
1623 if __name__ == '__main__':
1624     unittest.main()