16053: Specify utf8 encoding when creating databases.
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import pprint
13 import random
14 import re
15 import sys
16 import tempfile
17 import datetime
18 import ciso8601
19 import time
20 import unittest
21
22 from . import run_test_server
23 from arvados._ranges import Range, LocatorAndRange
24 from arvados.collection import Collection, CollectionReader
25 from . import arvados_testutil as tutil
26
27 class TestResumableWriter(arvados.ResumableCollectionWriter):
28     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
29
30     def current_state(self):
31         return self.dump_state(copy.deepcopy)
32
33
34 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
35                              tutil.ArvadosBaseTestCase):
36     MAIN_SERVER = {}
37
38     @classmethod
39     def setUpClass(cls):
40         super(ArvadosCollectionsTest, cls).setUpClass()
41         # need admin privileges to make collections with unsigned blocks
42         run_test_server.authorize_with('admin')
43         cls.api_client = arvados.api('v1')
44         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
45                                              local_store=cls.local_store)
46
47     def write_foo_bar_baz(self):
48         cw = arvados.CollectionWriter(self.api_client)
49         self.assertEqual(cw.current_stream_name(), '.',
50                          'current_stream_name() should be "." now')
51         cw.set_current_file_name('foo.txt')
52         cw.write(b'foo')
53         self.assertEqual(cw.current_file_name(), 'foo.txt',
54                          'current_file_name() should be foo.txt now')
55         cw.start_new_file('bar.txt')
56         cw.write(b'bar')
57         cw.start_new_stream('baz')
58         cw.write(b'baz')
59         cw.set_current_file_name('baz.txt')
60         self.assertEqual(cw.manifest_text(),
61                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
62                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
63                          "wrong manifest: got {}".format(cw.manifest_text()))
64         cw.save_new()
65         return cw.portable_data_hash()
66
67     def test_pdh_is_native_str(self):
68         pdh = self.write_foo_bar_baz()
69         self.assertEqual(type(''), type(pdh))
70
71     def test_keep_local_store(self):
72         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
73         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
74
75     def test_local_collection_writer(self):
76         self.assertEqual(self.write_foo_bar_baz(),
77                          '23ca013983d6239e98931cc779e68426+114',
78                          'wrong locator hash: ' + self.write_foo_bar_baz())
79
80     def test_local_collection_reader(self):
81         foobarbaz = self.write_foo_bar_baz()
82         cr = arvados.CollectionReader(
83             foobarbaz + '+Xzizzle', self.api_client)
84         got = []
85         for s in cr.all_streams():
86             for f in s.all_files():
87                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
88         expected = [[3, '.', 'foo.txt', b'foo'],
89                     [3, '.', 'bar.txt', b'bar'],
90                     [3, './baz', 'baz.txt', b'baz']]
91         self.assertEqual(got,
92                          expected)
93         stream0 = cr.all_streams()[0]
94         self.assertEqual(stream0.readfrom(0, 0),
95                          b'',
96                          'reading zero bytes should have returned empty string')
97         self.assertEqual(stream0.readfrom(0, 2**26),
98                          b'foobar',
99                          'reading entire stream failed')
100         self.assertEqual(stream0.readfrom(2**26, 0),
101                          b'',
102                          'reading zero bytes should have returned empty string')
103         self.assertEqual(3, len(cr))
104         self.assertTrue(cr)
105
106     def _test_subset(self, collection, expected):
107         cr = arvados.CollectionReader(collection, self.api_client)
108         for s in cr.all_streams():
109             for ex in expected:
110                 if ex[0] == s:
111                     f = s.files()[ex[2]]
112                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
113                     self.assertEqual(got,
114                                      ex,
115                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
116
117     def test_collection_manifest_subset(self):
118         foobarbaz = self.write_foo_bar_baz()
119         self._test_subset(foobarbaz,
120                           [[3, '.',     'bar.txt', b'bar'],
121                            [3, '.',     'foo.txt', b'foo'],
122                            [3, './baz', 'baz.txt', b'baz']])
123         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
124                            (self.keep_client.put(b"foo"),
125                             self.keep_client.put(b"bar"))),
126                           [[3, '.', 'bar.txt', b'bar'],
127                            [3, '.', 'foo.txt', b'foo']])
128         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
129                            (self.keep_client.put(b"foo"),
130                             self.keep_client.put(b"bar"))),
131                           [[2, '.', 'fo.txt', b'fo'],
132                            [4, '.', 'obar.txt', b'obar']])
133         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
134                            (self.keep_client.put(b"foo"),
135                             self.keep_client.put(b"bar"))),
136                           [[2, '.', 'ar.txt', b'ar'],
137                            [2, '.', 'fo.txt', b'fo'],
138                            [2, '.', 'ob.txt', b'ob'],
139                            [0, '.', 'zero.txt', b'']])
140
141     def test_collection_empty_file(self):
142         cw = arvados.CollectionWriter(self.api_client)
143         cw.start_new_file('zero.txt')
144         cw.write(b'')
145
146         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
147         self.check_manifest_file_sizes(cw.manifest_text(), [0])
148         cw = arvados.CollectionWriter(self.api_client)
149         cw.start_new_file('zero.txt')
150         cw.write(b'')
151         cw.start_new_file('one.txt')
152         cw.write(b'1')
153         cw.start_new_stream('foo')
154         cw.start_new_file('zero.txt')
155         cw.write(b'')
156         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
157
158     def test_no_implicit_normalize(self):
159         cw = arvados.CollectionWriter(self.api_client)
160         cw.start_new_file('b')
161         cw.write(b'b')
162         cw.start_new_file('a')
163         cw.write(b'')
164         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
165         self.check_manifest_file_sizes(
166             arvados.CollectionReader(
167                 cw.manifest_text()).manifest_text(normalize=True),
168             [0,1])
169
170     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
171         cr = arvados.CollectionReader(manifest_text, self.api_client)
172         got_sizes = []
173         for f in cr.all_files():
174             got_sizes += [f.size()]
175         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
176
177     def test_normalized_collection(self):
178         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
179 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
180 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
181 """
182         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
183                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
184 """)
185
186         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
187 """
188         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
189
190         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
191 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
192 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
193 """
194         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
195                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
196 """)
197
198         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
199 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
200 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
201 """
202         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
203                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
204 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
205 """)
206
207         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
208 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
209 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
210 """
211         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
212                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
213 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
214 """)
215
216         with self.data_file('1000G_ref_manifest') as f6:
217             m6 = f6.read()
218             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
219
220         with self.data_file('jlake_manifest') as f7:
221             m7 = f7.read()
222             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
223
224         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
225 """
226         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
227
228     def test_locators_and_ranges(self):
229         blocks2 = [Range('a', 0, 10),
230                    Range('b', 10, 10),
231                    Range('c', 20, 10),
232                    Range('d', 30, 10),
233                    Range('e', 40, 10),
234                    Range('f', 50, 10)]
235
236         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
237         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
238         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
244
245         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
247         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
253
254         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
256         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
259         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
260
261
262         blocks3 = [Range('a', 0, 10),
263                   Range('b', 10, 10),
264                   Range('c', 20, 10),
265                   Range('d', 30, 10),
266                   Range('e', 40, 10),
267                   Range('f', 50, 10),
268                    Range('g', 60, 10)]
269
270         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
271         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
272         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
275         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
276         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
277
278
279         blocks = [Range('a', 0, 10),
280                   Range('b', 10, 15),
281                   Range('c', 25, 5)]
282         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
283         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
284         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
285         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
286
287         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
288                                                                       LocatorAndRange('b', 15, 0, 1)])
289         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
290                                                                       LocatorAndRange('b', 15, 0, 2)])
291         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
292                                                                       LocatorAndRange('b', 15, 0, 15)])
293
294         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
295                                                                       LocatorAndRange('b', 15, 0, 15),
296                                                                       LocatorAndRange('c', 5, 0, 5)])
297         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
298                                                                       LocatorAndRange('b', 15, 0, 15),
299                                                                       LocatorAndRange('c', 5, 0, 5)])
300         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
301                                                                       LocatorAndRange('b', 15, 0, 15),
302                                                                       LocatorAndRange('c', 5, 0, 5)])
303
304         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
305
306         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
307                                                                       LocatorAndRange('b', 15, 0, 15)])
308
309         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
310                                                                       LocatorAndRange('b', 15, 0, 15),
311                                                                       LocatorAndRange('c', 5, 0, 3)])
312
313         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
314
315         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
316                                                                       LocatorAndRange('b', 15, 0, 14)])
317         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
318         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
319                                                                        LocatorAndRange('c', 5, 0, 1)])
320
321     class MockKeep(object):
322         def __init__(self, content, num_retries=0):
323             self.content = content
324
325         def get(self, locator, num_retries=0):
326             return self.content[locator]
327
328     def test_stream_reader(self):
329         keepblocks = {
330             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
331             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
332             'cccccccccccccccccccccccccccccccc+5': b'z0123',
333         }
334         mk = self.MockKeep(keepblocks)
335
336         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
337
338         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
339
340         self.assertEqual(sr.readfrom(0, 30), content[0:30])
341         self.assertEqual(sr.readfrom(2, 30), content[2:30])
342
343         self.assertEqual(sr.readfrom(2, 8), content[2:10])
344         self.assertEqual(sr.readfrom(0, 10), content[0:10])
345
346         self.assertEqual(sr.readfrom(0, 5), content[0:5])
347         self.assertEqual(sr.readfrom(5, 5), content[5:10])
348         self.assertEqual(sr.readfrom(10, 5), content[10:15])
349         self.assertEqual(sr.readfrom(15, 5), content[15:20])
350         self.assertEqual(sr.readfrom(20, 5), content[20:25])
351         self.assertEqual(sr.readfrom(25, 5), content[25:30])
352         self.assertEqual(sr.readfrom(30, 5), b'')
353
354     def test_extract_file(self):
355         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
356 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
357 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
358 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
359 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
360 """
361
362         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
363
364         self.assertEqual(m2,
365                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
366         files = arvados.CollectionReader(
367             m2, self.api_client).all_streams()[0].files()
368
369         self.assertEqual(files['md5sum.txt'].as_manifest(),
370                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
371         self.assertEqual(files['md6sum.txt'].as_manifest(),
372                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
373         self.assertEqual(files['md7sum.txt'].as_manifest(),
374                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
375         self.assertEqual(files['md9sum.txt'].as_manifest(),
376                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
377
378     def test_write_directory_tree(self):
379         cwriter = arvados.CollectionWriter(self.api_client)
380         cwriter.write_directory_tree(self.build_directory_tree(
381                 ['basefile', 'subdir/subfile']))
382         self.assertEqual(cwriter.manifest_text(),
383                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
384 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
385
386     def test_write_named_directory_tree(self):
387         cwriter = arvados.CollectionWriter(self.api_client)
388         cwriter.write_directory_tree(self.build_directory_tree(
389                 ['basefile', 'subdir/subfile']), 'root')
390         self.assertEqual(
391             cwriter.manifest_text(),
392             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
393 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
394
395     def test_write_directory_tree_in_one_stream(self):
396         cwriter = arvados.CollectionWriter(self.api_client)
397         cwriter.write_directory_tree(self.build_directory_tree(
398                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
399         self.assertEqual(cwriter.manifest_text(),
400                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
401
402     def test_write_directory_tree_with_limited_recursion(self):
403         cwriter = arvados.CollectionWriter(self.api_client)
404         cwriter.write_directory_tree(
405             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
406             max_manifest_depth=1)
407         self.assertEqual(cwriter.manifest_text(),
408                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
409 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
410
411     def test_write_directory_tree_with_zero_recursion(self):
412         cwriter = arvados.CollectionWriter(self.api_client)
413         content = 'd1/d2/f3d1/f2f1'
414         blockhash = tutil.str_keep_locator(content)
415         cwriter.write_directory_tree(
416             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
417             max_manifest_depth=0)
418         self.assertEqual(
419             cwriter.manifest_text(),
420             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
421
422     def test_write_one_file(self):
423         cwriter = arvados.CollectionWriter(self.api_client)
424         with self.make_test_file() as testfile:
425             cwriter.write_file(testfile.name)
426             self.assertEqual(
427                 cwriter.manifest_text(),
428                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
429                     os.path.basename(testfile.name)))
430
431     def test_write_named_file(self):
432         cwriter = arvados.CollectionWriter(self.api_client)
433         with self.make_test_file() as testfile:
434             cwriter.write_file(testfile.name, 'foo')
435             self.assertEqual(cwriter.manifest_text(),
436                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
437
438     def test_write_multiple_files(self):
439         cwriter = arvados.CollectionWriter(self.api_client)
440         for letter in 'ABC':
441             with self.make_test_file(letter.encode()) as testfile:
442                 cwriter.write_file(testfile.name, letter)
443         self.assertEqual(
444             cwriter.manifest_text(),
445             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
446
447     def test_basic_resume(self):
448         cwriter = TestResumableWriter()
449         with self.make_test_file() as testfile:
450             cwriter.write_file(testfile.name, 'test')
451             resumed = TestResumableWriter.from_state(cwriter.current_state())
452         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
453                           "resumed CollectionWriter had different manifest")
454
455     def test_resume_fails_when_missing_dependency(self):
456         cwriter = TestResumableWriter()
457         with self.make_test_file() as testfile:
458             cwriter.write_file(testfile.name, 'test')
459         self.assertRaises(arvados.errors.StaleWriterStateError,
460                           TestResumableWriter.from_state,
461                           cwriter.current_state())
462
463     def test_resume_fails_when_dependency_mtime_changed(self):
464         cwriter = TestResumableWriter()
465         with self.make_test_file() as testfile:
466             cwriter.write_file(testfile.name, 'test')
467             os.utime(testfile.name, (0, 0))
468             self.assertRaises(arvados.errors.StaleWriterStateError,
469                               TestResumableWriter.from_state,
470                               cwriter.current_state())
471
472     def test_resume_fails_when_dependency_is_nonfile(self):
473         cwriter = TestResumableWriter()
474         cwriter.write_file('/dev/null', 'empty')
475         self.assertRaises(arvados.errors.StaleWriterStateError,
476                           TestResumableWriter.from_state,
477                           cwriter.current_state())
478
479     def test_resume_fails_when_dependency_size_changed(self):
480         cwriter = TestResumableWriter()
481         with self.make_test_file() as testfile:
482             cwriter.write_file(testfile.name, 'test')
483             orig_mtime = os.fstat(testfile.fileno()).st_mtime
484             testfile.write(b'extra')
485             testfile.flush()
486             os.utime(testfile.name, (orig_mtime, orig_mtime))
487             self.assertRaises(arvados.errors.StaleWriterStateError,
488                               TestResumableWriter.from_state,
489                               cwriter.current_state())
490
491     def test_resume_fails_with_expired_locator(self):
492         cwriter = TestResumableWriter()
493         state = cwriter.current_state()
494         # Add an expired locator to the state.
495         state['_current_stream_locators'].append(''.join([
496                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
497         self.assertRaises(arvados.errors.StaleWriterStateError,
498                           TestResumableWriter.from_state, state)
499
500     def test_arbitrary_objects_not_resumable(self):
501         cwriter = TestResumableWriter()
502         with open('/dev/null') as badfile:
503             self.assertRaises(arvados.errors.AssertionError,
504                               cwriter.write_file, badfile)
505
506     def test_arbitrary_writes_not_resumable(self):
507         cwriter = TestResumableWriter()
508         self.assertRaises(arvados.errors.AssertionError,
509                           cwriter.write, "badtext")
510
511
512 class CollectionTestMixin(tutil.ApiClientMock):
513     API_COLLECTIONS = run_test_server.fixture('collections')
514     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
515     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
516     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
517     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
518     ALT_COLLECTION = API_COLLECTIONS['bar_file']
519     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
520     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
521
522     def api_client_mock(self, status=200):
523         client = super(CollectionTestMixin, self).api_client_mock()
524         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
525         return client
526
527
528 @tutil.skip_sleep
529 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
530     def mock_get_collection(self, api_mock, code, fixturename):
531         body = self.API_COLLECTIONS.get(fixturename)
532         self._mock_api_call(api_mock.collections().get, code, body)
533
534     def api_client_mock(self, status=200):
535         client = super(CollectionReaderTestCase, self).api_client_mock()
536         self.mock_get_collection(client, status, 'foo_file')
537         return client
538
539     def test_init_no_default_retries(self):
540         client = self.api_client_mock(200)
541         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
542         reader.manifest_text()
543         client.collections().get().execute.assert_called_with(num_retries=0)
544
545     def test_uuid_init_success(self):
546         client = self.api_client_mock(200)
547         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
548                                           num_retries=3)
549         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
550                          reader.manifest_text())
551         client.collections().get().execute.assert_called_with(num_retries=3)
552
553     def test_uuid_init_failure_raises_api_error(self):
554         client = self.api_client_mock(500)
555         with self.assertRaises(arvados.errors.ApiError):
556             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
557
558     def test_locator_init(self):
559         client = self.api_client_mock(200)
560         # Ensure Keep will not return anything if asked.
561         with tutil.mock_keep_responses(None, 404):
562             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
563                                               api_client=client)
564             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
565
566     def test_init_no_fallback_to_keep(self):
567         # Do not look up a collection UUID or PDH in Keep.
568         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
569             client = self.api_client_mock(404)
570             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
571                 with self.assertRaises(arvados.errors.ApiError):
572                     reader = arvados.CollectionReader(key, api_client=client)
573
574     def test_init_num_retries_propagated(self):
575         # More of an integration test...
576         client = self.api_client_mock(200)
577         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
578                                           num_retries=3)
579         with tutil.mock_keep_responses('foo', 500, 500, 200):
580             self.assertEqual(b'foo',
581                              b''.join(f.read(9) for f in reader.all_files()))
582
583     def test_read_nonnormalized_manifest_with_collection_reader(self):
584         # client should be able to use CollectionReader on a manifest without normalizing it
585         client = self.api_client_mock(500)
586         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
587         reader = arvados.CollectionReader(
588             nonnormal,
589             api_client=client, num_retries=0)
590         # Ensure stripped_manifest() doesn't mangle our manifest in
591         # any way other than stripping hints.
592         self.assertEqual(
593             re.sub('\+[^\d\s\+]+', '', nonnormal),
594             reader.stripped_manifest())
595         # Ensure stripped_manifest() didn't mutate our reader.
596         self.assertEqual(nonnormal, reader.manifest_text())
597         # Ensure the files appear in the order given in the manifest.
598         self.assertEqual(
599             [[6, '.', 'foo.txt'],
600              [0, '.', 'bar.txt']],
601             [[f.size(), f.stream_name(), f.name()]
602              for f in reader.all_streams()[0].all_files()])
603
604     def test_read_empty_collection(self):
605         client = self.api_client_mock(200)
606         self.mock_get_collection(client, 200, 'empty')
607         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
608                                           api_client=client)
609         self.assertEqual('', reader.manifest_text())
610         self.assertEqual(0, len(reader))
611         self.assertFalse(reader)
612
613     def test_api_response(self):
614         client = self.api_client_mock()
615         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
616         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
617
618     def check_open_file(self, coll_file, stream_name, file_name, file_size):
619         self.assertFalse(coll_file.closed, "returned file is not open")
620         self.assertEqual(stream_name, coll_file.stream_name())
621         self.assertEqual(file_name, coll_file.name)
622         self.assertEqual(file_size, coll_file.size())
623
624     def test_open_collection_file_one_argument(self):
625         client = self.api_client_mock(200)
626         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
627         cfile = reader.open('./foo', 'rb')
628         self.check_open_file(cfile, '.', 'foo', 3)
629
630     def test_open_deep_file(self):
631         coll_name = 'collection_with_files_in_subdir'
632         client = self.api_client_mock(200)
633         self.mock_get_collection(client, 200, coll_name)
634         reader = arvados.CollectionReader(
635             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
636         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
637         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
638                              32)
639
640     def test_open_nonexistent_stream(self):
641         client = self.api_client_mock(200)
642         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
643         self.assertRaises(IOError, reader.open, './nonexistent/foo')
644
645     def test_open_nonexistent_file(self):
646         client = self.api_client_mock(200)
647         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
648         self.assertRaises(IOError, reader.open, 'nonexistent')
649
650
651 @tutil.skip_sleep
652 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
653     def mock_keep(self, body, *codes, **headers):
654         headers.setdefault('x-keep-replicas-stored', 2)
655         return tutil.mock_keep_responses(body, *codes, **headers)
656
657     def foo_writer(self, **kwargs):
658         kwargs.setdefault('api_client', self.api_client_mock())
659         writer = arvados.CollectionWriter(**kwargs)
660         writer.start_new_file('foo')
661         writer.write(b'foo')
662         return writer
663
664     def test_write_whole_collection(self):
665         writer = self.foo_writer()
666         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
667             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
668
669     def test_write_no_default(self):
670         writer = self.foo_writer()
671         with self.mock_keep(None, 500):
672             with self.assertRaises(arvados.errors.KeepWriteError):
673                 writer.finish()
674
675     def test_write_insufficient_replicas_via_proxy(self):
676         writer = self.foo_writer(replication=3)
677         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
678             with self.assertRaises(arvados.errors.KeepWriteError):
679                 writer.manifest_text()
680
681     def test_write_insufficient_replicas_via_disks(self):
682         client = mock.MagicMock(name='api_client')
683         with self.mock_keep(
684                 None, 200, 200,
685                 **{'x-keep-replicas-stored': 1}) as keepmock:
686             self.mock_keep_services(client, status=200, service_type='disk', count=2)
687             writer = self.foo_writer(api_client=client, replication=3)
688             with self.assertRaises(arvados.errors.KeepWriteError):
689                 writer.manifest_text()
690
691     def test_write_three_replicas(self):
692         client = mock.MagicMock(name='api_client')
693         with self.mock_keep(
694                 "", 500, 500, 500, 200, 200, 200,
695                 **{'x-keep-replicas-stored': 1}) as keepmock:
696             self.mock_keep_services(client, status=200, service_type='disk', count=6)
697             writer = self.foo_writer(api_client=client, replication=3)
698             writer.manifest_text()
699             self.assertEqual(6, keepmock.call_count)
700
701     def test_write_whole_collection_through_retries(self):
702         writer = self.foo_writer(num_retries=2)
703         with self.mock_keep(self.DEFAULT_DATA_HASH,
704                             500, 500, 200, 500, 500, 200):
705             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
706
707     def test_flush_data_retries(self):
708         writer = self.foo_writer(num_retries=2)
709         foo_hash = self.DEFAULT_MANIFEST.split()[1]
710         with self.mock_keep(foo_hash, 500, 200):
711             writer.flush_data()
712         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
713
714     def test_one_open(self):
715         client = self.api_client_mock()
716         writer = arvados.CollectionWriter(client)
717         with writer.open('out') as out_file:
718             self.assertEqual('.', writer.current_stream_name())
719             self.assertEqual('out', writer.current_file_name())
720             out_file.write(b'test data')
721             data_loc = tutil.str_keep_locator('test data')
722         self.assertTrue(out_file.closed, "writer file not closed after context")
723         self.assertRaises(ValueError, out_file.write, 'extra text')
724         with self.mock_keep(data_loc, 200) as keep_mock:
725             self.assertEqual(". {} 0:9:out\n".format(data_loc),
726                              writer.manifest_text())
727
728     def test_open_writelines(self):
729         client = self.api_client_mock()
730         writer = arvados.CollectionWriter(client)
731         with writer.open('six') as out_file:
732             out_file.writelines(['12', '34', '56'])
733             data_loc = tutil.str_keep_locator('123456')
734         with self.mock_keep(data_loc, 200) as keep_mock:
735             self.assertEqual(". {} 0:6:six\n".format(data_loc),
736                              writer.manifest_text())
737
738     def test_open_flush(self):
739         client = self.api_client_mock()
740         data_loc1 = tutil.str_keep_locator('flush1')
741         data_loc2 = tutil.str_keep_locator('flush2')
742         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
743             writer = arvados.CollectionWriter(client)
744             with writer.open('flush_test') as out_file:
745                 out_file.write(b'flush1')
746                 out_file.flush()
747                 out_file.write(b'flush2')
748             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
749                                                                 data_loc2),
750                              writer.manifest_text())
751
752     def test_two_opens_same_stream(self):
753         client = self.api_client_mock()
754         writer = arvados.CollectionWriter(client)
755         with writer.open('.', '1') as out_file:
756             out_file.write(b'1st')
757         with writer.open('.', '2') as out_file:
758             out_file.write(b'2nd')
759         data_loc = tutil.str_keep_locator('1st2nd')
760         with self.mock_keep(data_loc, 200) as keep_mock:
761             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
762                              writer.manifest_text())
763
764     def test_two_opens_two_streams(self):
765         client = self.api_client_mock()
766         data_loc1 = tutil.str_keep_locator('file')
767         data_loc2 = tutil.str_keep_locator('indir')
768         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
769             writer = arvados.CollectionWriter(client)
770             with writer.open('file') as out_file:
771                 out_file.write(b'file')
772             with writer.open('./dir', 'indir') as out_file:
773                 out_file.write(b'indir')
774             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
775                 data_loc1, data_loc2)
776             self.assertEqual(expected, writer.manifest_text())
777
778     def test_dup_open_fails(self):
779         client = self.api_client_mock()
780         writer = arvados.CollectionWriter(client)
781         file1 = writer.open('one')
782         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
783
784
785 class CollectionMethods(run_test_server.TestCaseWithServers):
786
787     def test_keys_values_items_support_indexing(self):
788         c = Collection()
789         with c.open('foo', 'wb') as f:
790             f.write(b'foo')
791         with c.open('bar', 'wb') as f:
792             f.write(b'bar')
793         self.assertEqual(2, len(c.keys()))
794         if sys.version_info < (3, 0):
795             # keys() supports indexing only for python2 callers.
796             fn0 = c.keys()[0]
797             fn1 = c.keys()[1]
798         else:
799             fn0, fn1 = c.keys()
800         self.assertEqual(2, len(c.values()))
801         f0 = c.values()[0]
802         f1 = c.values()[1]
803         self.assertEqual(2, len(c.items()))
804         self.assertEqual(fn0, c.items()[0][0])
805         self.assertEqual(fn1, c.items()[1][0])
806
807     def test_get_properties(self):
808         c = Collection()
809         self.assertEqual(c.get_properties(), {})
810         c.save_new(properties={"foo":"bar"})
811         self.assertEqual(c.get_properties(), {"foo":"bar"})
812
813     def test_get_trash_at(self):
814         c = Collection()
815         self.assertEqual(c.get_trash_at(), None)
816         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
817         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
818
819
820 class CollectionOpenModes(run_test_server.TestCaseWithServers):
821
822     def test_open_binary_modes(self):
823         c = Collection()
824         for mode in ['wb', 'wb+', 'ab', 'ab+']:
825             with c.open('foo', mode) as f:
826                 f.write(b'foo')
827
828     def test_open_invalid_modes(self):
829         c = Collection()
830         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
831             with self.assertRaises(Exception):
832                 c.open('foo', mode)
833
834     def test_open_text_modes(self):
835         c = Collection()
836         with c.open('foo', 'wb') as f:
837             f.write('foo')
838         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
839             with c.open('foo', mode) as f:
840                 if mode[0] == 'r' and '+' not in mode:
841                     self.assertEqual('foo', f.read(3))
842                 else:
843                     f.write('bar')
844                     f.seek(0, os.SEEK_SET)
845                     self.assertEqual('bar', f.read(3))
846
847
848 class TextModes(run_test_server.TestCaseWithServers):
849
850     def setUp(self):
851         arvados.config.KEEP_BLOCK_SIZE = 4
852         if sys.version_info < (3, 0):
853             import unicodedata
854             self.sailboat = unicodedata.lookup('SAILBOAT')
855             self.snowman = unicodedata.lookup('SNOWMAN')
856         else:
857             self.sailboat = '\N{SAILBOAT}'
858             self.snowman = '\N{SNOWMAN}'
859
860     def tearDown(self):
861         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
862
863     def test_read_sailboat_across_block_boundary(self):
864         c = Collection()
865         f = c.open('sailboats', 'wb')
866         data = self.sailboat.encode('utf-8')
867         f.write(data)
868         f.write(data[:1])
869         f.write(data[1:])
870         f.write(b'\n')
871         f.close()
872         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
873
874         f = c.open('sailboats', 'r')
875         string = f.readline()
876         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
877         f.close()
878
879     def test_write_snowman_across_block_boundary(self):
880         c = Collection()
881         f = c.open('snowmany', 'w')
882         data = self.snowman
883         f.write(data+data+'\n'+data+'\n')
884         f.close()
885         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
886
887         f = c.open('snowmany', 'r')
888         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
889         self.assertEqual(f.readline(), self.snowman+'\n')
890         f.close()
891
892
893 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
894
895     def test_replication_desired_kept_on_load(self):
896         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
897         c1 = Collection(m, replication_desired=1)
898         c1.save_new()
899         loc = c1.manifest_locator()
900         c2 = Collection(loc)
901         self.assertEqual(c1.manifest_text, c2.manifest_text)
902         self.assertEqual(c1.replication_desired, c2.replication_desired)
903
904     def test_replication_desired_not_loaded_if_provided(self):
905         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
906         c1 = Collection(m, replication_desired=1)
907         c1.save_new()
908         loc = c1.manifest_locator()
909         c2 = Collection(loc, replication_desired=2)
910         self.assertEqual(c1.manifest_text, c2.manifest_text)
911         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
912
913     def test_init_manifest(self):
914         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
915 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
916 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
917 """
918         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
919         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
920
921     def test_init_manifest_with_collision(self):
922         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
923 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
924 """
925         with self.assertRaises(arvados.errors.ArgumentError):
926             self.assertEqual(m1, CollectionReader(m1))
927
928     def test_init_manifest_with_error(self):
929         m1 = """. 0:43:md5sum.txt"""
930         with self.assertRaises(arvados.errors.ArgumentError):
931             self.assertEqual(m1, CollectionReader(m1))
932
933     def test_remove(self):
934         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
935         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
936         self.assertIn("count1.txt", c)
937         c.remove("count1.txt")
938         self.assertNotIn("count1.txt", c)
939         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
940         with self.assertRaises(arvados.errors.ArgumentError):
941             c.remove("")
942
943     def test_find(self):
944         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
945         self.assertIs(c.find("."), c)
946         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
947         self.assertIs(c.find("count1.txt"), c["count1.txt"])
948         with self.assertRaises(IOError):
949             c.find("/.")
950         with self.assertRaises(arvados.errors.ArgumentError):
951             c.find("")
952         self.assertIs(c.find("./nonexistant.txt"), None)
953         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
954
955     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
956         # Dir & file names are literally '\056' (escaped form: \134056)
957         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
958         c = Collection(manifest)
959         self.assertEqual(c.portable_manifest_text(), manifest)
960
961     def test_other_special_chars_on_file_token(self):
962         cases = [
963             ('\\000', '\0'),
964             ('\\011', '\t'),
965             ('\\012', '\n'),
966             ('\\072', ':'),
967             ('\\134400', '\\400'),
968         ]
969         for encoded, decoded in cases:
970             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
971             c = Collection(manifest)
972             self.assertEqual(c.portable_manifest_text(), manifest)
973             self.assertIn('some%sfile.txt' % decoded, c.keys())
974
975     def test_escaped_paths_do_get_unescaped_on_listing(self):
976         # Dir & file names are literally '\056' (escaped form: \134056)
977         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
978         c = Collection(manifest)
979         self.assertIn('\\056 Test', c.keys())
980         self.assertIn('\\056', c['\\056 Test'].keys())
981
982     def test_make_empty_dir_with_escaped_chars(self):
983         c = Collection()
984         c.mkdirs('./Empty\\056Dir')
985         self.assertEqual(c.portable_manifest_text(),
986                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
987
988     def test_make_empty_dir_with_spaces(self):
989         c = Collection()
990         c.mkdirs('./foo bar/baz waz')
991         self.assertEqual(c.portable_manifest_text(),
992                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
993
994     def test_remove_in_subdir(self):
995         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
996         c.remove("foo/count2.txt")
997         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
998
999     def test_remove_empty_subdir(self):
1000         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1001         c.remove("foo/count2.txt")
1002         c.remove("foo")
1003         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1004
1005     def test_remove_nonempty_subdir(self):
1006         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1007         with self.assertRaises(IOError):
1008             c.remove("foo")
1009         c.remove("foo", recursive=True)
1010         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1011
1012     def test_copy_to_file_in_dir(self):
1013         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1014         c.copy("count1.txt", "foo/count2.txt")
1015         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1016
1017     def test_copy_file(self):
1018         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1019         c.copy("count1.txt", "count2.txt")
1020         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1021
1022     def test_copy_to_existing_dir(self):
1023         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1024         c.copy("count1.txt", "foo")
1025         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1026
1027     def test_copy_to_new_dir(self):
1028         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1029         c.copy("count1.txt", "foo/")
1030         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1031
1032     def test_rename_file(self):
1033         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1034         c.rename("count1.txt", "count2.txt")
1035         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1036
1037     def test_move_file_to_dir(self):
1038         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1039         c.mkdirs("foo")
1040         c.rename("count1.txt", "foo/count2.txt")
1041         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1042
1043     def test_move_file_to_other(self):
1044         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1045         c2 = Collection()
1046         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1047         self.assertEqual("", c1.manifest_text())
1048         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1049
1050     def test_clone(self):
1051         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1052         cl = c.clone()
1053         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1054
1055     def test_diff_del_add(self):
1056         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1057         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1058         d = c2.diff(c1)
1059         self.assertEqual(sorted(d), [
1060             ('add', './count1.txt', c1["count1.txt"]),
1061             ('del', './count2.txt', c2["count2.txt"]),
1062         ])
1063         d = c1.diff(c2)
1064         self.assertEqual(sorted(d), [
1065             ('add', './count2.txt', c2["count2.txt"]),
1066             ('del', './count1.txt', c1["count1.txt"]),
1067         ])
1068         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1069         c1.apply(d)
1070         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1071
1072     def test_diff_same(self):
1073         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1074         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1075         d = c2.diff(c1)
1076         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1077         d = c1.diff(c2)
1078         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1079
1080         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1081         c1.apply(d)
1082         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1083
1084     def test_diff_mod(self):
1085         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1086         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1087         d = c2.diff(c1)
1088         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1089         d = c1.diff(c2)
1090         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1091
1092         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1093         c1.apply(d)
1094         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1095
1096     def test_diff_add(self):
1097         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1098         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1099         d = c2.diff(c1)
1100         self.assertEqual(sorted(d), [
1101             ('del', './count2.txt', c2["count2.txt"]),
1102             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1103         ])
1104         d = c1.diff(c2)
1105         self.assertEqual(sorted(d), [
1106             ('add', './count2.txt', c2["count2.txt"]),
1107             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1108         ])
1109
1110         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1111         c1.apply(d)
1112         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1113
1114     def test_diff_add_in_subcollection(self):
1115         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1116         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1117         d = c2.diff(c1)
1118         self.assertEqual(sorted(d), [
1119             ('del', './foo', c2["foo"]),
1120             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1121         ])
1122         d = c1.diff(c2)
1123         self.assertEqual(sorted(d), [
1124             ('add', './foo', c2["foo"]),
1125             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1126         ])
1127         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1128         c1.apply(d)
1129         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1130
1131     def test_diff_del_add_in_subcollection(self):
1132         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1133         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1134         d = c2.diff(c1)
1135         self.assertEqual(sorted(d), [
1136             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1137             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1138             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1139         ])
1140         d = c1.diff(c2)
1141         self.assertEqual(sorted(d), [
1142             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1143             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1144             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1145         ])
1146
1147         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1148         c1.apply(d)
1149         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1150
1151     def test_diff_mod_in_subcollection(self):
1152         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1153         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1154         d = c2.diff(c1)
1155         self.assertEqual(sorted(d), [
1156             ('mod', './foo', c2["foo"], c1["foo"]),
1157             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1158         ])
1159         d = c1.diff(c2)
1160         self.assertEqual(sorted(d), [
1161             ('mod', './foo', c1["foo"], c2["foo"]),
1162             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1163         ])
1164
1165         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1166         c1.apply(d)
1167         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1168
1169     def test_conflict_keep_local_change(self):
1170         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1171         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1172         d = c1.diff(c2)
1173         self.assertEqual(sorted(d), [
1174             ('add', './count2.txt', c2["count2.txt"]),
1175             ('del', './count1.txt', c1["count1.txt"]),
1176         ])
1177         f = c1.open("count1.txt", "wb")
1178         f.write(b"zzzzz")
1179
1180         # c1 changed, so it should not be deleted.
1181         c1.apply(d)
1182         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1183
1184     def test_conflict_mod(self):
1185         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1186         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1187         d = c1.diff(c2)
1188         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1189         f = c1.open("count1.txt", "wb")
1190         f.write(b"zzzzz")
1191
1192         # c1 changed, so c2 mod will go to a conflict file
1193         c1.apply(d)
1194         self.assertRegex(
1195             c1.portable_manifest_text(),
1196             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1197
1198     def test_conflict_add(self):
1199         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1200         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1201         d = c1.diff(c2)
1202         self.assertEqual(sorted(d), [
1203             ('add', './count1.txt', c2["count1.txt"]),
1204             ('del', './count2.txt', c1["count2.txt"]),
1205         ])
1206         f = c1.open("count1.txt", "wb")
1207         f.write(b"zzzzz")
1208
1209         # c1 added count1.txt, so c2 add will go to a conflict file
1210         c1.apply(d)
1211         self.assertRegex(
1212             c1.portable_manifest_text(),
1213             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1214
1215     def test_conflict_del(self):
1216         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1217         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1218         d = c1.diff(c2)
1219         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1220         c1.remove("count1.txt")
1221
1222         # c1 deleted, so c2 mod will go to a conflict file
1223         c1.apply(d)
1224         self.assertRegex(
1225             c1.portable_manifest_text(),
1226             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1227
1228     def test_notify(self):
1229         c1 = Collection()
1230         events = []
1231         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1232         f = c1.open("foo.txt", "wb")
1233         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1234
1235     def test_open_w(self):
1236         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1237         self.assertEqual(c1["count1.txt"].size(), 10)
1238         c1.open("count1.txt", "wb").close()
1239         self.assertEqual(c1["count1.txt"].size(), 0)
1240
1241
1242 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1243     MAIN_SERVER = {}
1244     KEEP_SERVER = {}
1245     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1246     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1247
1248     def setUp(self):
1249         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1250
1251     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1252     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1253         '''
1254         Make sure that those blocks that are committed after repacking small ones,
1255         get their permission tokens assigned on the collection manifest.
1256         '''
1257         def wrapped_keep_put(*args, **kwargs):
1258             # Simulate slow put operations
1259             time.sleep(1)
1260             return self.keep_put(*args, **kwargs)
1261
1262         mocked_put.side_effect = wrapped_keep_put
1263         c = Collection()
1264         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1265         # small ones before finishing the upload.
1266         for i in range(70):
1267             f = c.open("file_{}.txt".format(i), 'wb')
1268             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1269             f.close(flush=False)
1270         # We should get 2 blocks with their tokens
1271         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1272
1273     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1274     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1275         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1276         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1277         rs_mock.return_value = local_block_loc
1278         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1279         self.assertEqual(
1280             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1281         self.assertEqual(
1282             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1283         c.save_new()
1284         rs_mock.assert_called()
1285         self.assertEqual(
1286             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1287         self.assertEqual(
1288             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1289
1290     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1291     def test_copy_remote_blocks_on_save(self, rs_mock):
1292         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1293         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1294         rs_mock.return_value = local_block_loc
1295         # Remote collection
1296         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1297         self.assertEqual(
1298             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1299         # Local collection
1300         local_c = Collection()
1301         with local_c.open('barfile.txt', 'wb') as f:
1302             f.write('bar')
1303         local_c.save_new()
1304         self.assertEqual(
1305             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1306         self.assertEqual(
1307             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1308         # Copy remote file to local collection
1309         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1310         self.assertEqual(
1311             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1312         self.assertEqual(
1313             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1314         # Save local collection: remote block should be copied
1315         local_c.save()
1316         rs_mock.assert_called()
1317         self.assertEqual(
1318             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1319         self.assertEqual(
1320             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1321
1322
1323 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1324     def test_get_manifest_text_only_committed(self):
1325         c = Collection()
1326         with c.open("count.txt", "wb") as f:
1327             # One file committed
1328             with c.open("foo.txt", "wb") as foo:
1329                 foo.write(b"foo")
1330                 foo.flush() # Force block commit
1331             f.write(b"0123456789")
1332             # Other file not committed. Block not written to keep yet.
1333             self.assertEqual(
1334                 c._get_manifest_text(".",
1335                                      strip=False,
1336                                      normalize=False,
1337                                      only_committed=True),
1338                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1339             # And now with the file closed...
1340             f.flush() # Force block commit
1341         self.assertEqual(
1342             c._get_manifest_text(".",
1343                                  strip=False,
1344                                  normalize=False,
1345                                  only_committed=True),
1346             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1347
1348     def test_only_small_blocks_are_packed_together(self):
1349         c = Collection()
1350         # Write a couple of small files,
1351         f = c.open("count.txt", "wb")
1352         f.write(b"0123456789")
1353         f.close(flush=False)
1354         foo = c.open("foo.txt", "wb")
1355         foo.write(b"foo")
1356         foo.close(flush=False)
1357         # Then, write a big file, it shouldn't be packed with the ones above
1358         big = c.open("bigfile.txt", "wb")
1359         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1360         big.close(flush=False)
1361         self.assertEqual(
1362             c.manifest_text("."),
1363             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1364
1365     def test_flush_after_small_block_packing(self):
1366         c = Collection()
1367         # Write a couple of small files,
1368         f = c.open("count.txt", "wb")
1369         f.write(b"0123456789")
1370         f.close(flush=False)
1371         foo = c.open("foo.txt", "wb")
1372         foo.write(b"foo")
1373         foo.close(flush=False)
1374
1375         self.assertEqual(
1376             c.manifest_text(),
1377             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1378
1379         f = c.open("count.txt", "rb+")
1380         f.close(flush=True)
1381
1382         self.assertEqual(
1383             c.manifest_text(),
1384             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1385
1386     def test_write_after_small_block_packing2(self):
1387         c = Collection()
1388         # Write a couple of small files,
1389         f = c.open("count.txt", "wb")
1390         f.write(b"0123456789")
1391         f.close(flush=False)
1392         foo = c.open("foo.txt", "wb")
1393         foo.write(b"foo")
1394         foo.close(flush=False)
1395
1396         self.assertEqual(
1397             c.manifest_text(),
1398             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1399
1400         f = c.open("count.txt", "rb+")
1401         f.write(b"abc")
1402         f.close(flush=False)
1403
1404         self.assertEqual(
1405             c.manifest_text(),
1406             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1407
1408
1409     def test_small_block_packing_with_overwrite(self):
1410         c = Collection()
1411         c.open("b1", "wb").close()
1412         c["b1"].writeto(0, b"b1", 0)
1413
1414         c.open("b2", "wb").close()
1415         c["b2"].writeto(0, b"b2", 0)
1416
1417         c["b1"].writeto(0, b"1b", 0)
1418
1419         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1420         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1421         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1422
1423
1424 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1425     MAIN_SERVER = {}
1426     KEEP_SERVER = {}
1427
1428     def create_count_txt(self):
1429         # Create an empty collection, save it to the API server, then write a
1430         # file, but don't save it.
1431
1432         c = Collection()
1433         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1434         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1435         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1436
1437         with c.open("count.txt", "wb") as f:
1438             f.write(b"0123456789")
1439
1440         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1441
1442         return c
1443
1444     def test_create_and_save(self):
1445         c = self.create_count_txt()
1446         c.save(properties={'type' : 'Intermediate'},
1447                storage_classes=['archive'],
1448                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1449
1450         self.assertRegex(
1451             c.manifest_text(),
1452             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1453         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1454         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1455         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1456
1457
1458     def test_create_and_save_new(self):
1459         c = self.create_count_txt()
1460         c.save_new(properties={'type' : 'Intermediate'},
1461                    storage_classes=['archive'],
1462                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1463
1464         self.assertRegex(
1465             c.manifest_text(),
1466             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1467         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1468         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1469         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1470
1471     def test_create_and_save_after_commiting(self):
1472         c = self.create_count_txt()
1473         c.save(properties={'type' : 'Intermediate'},
1474                storage_classes=['hot'],
1475                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1476         c.save(properties={'type' : 'Output'},
1477                storage_classes=['cold'],
1478                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1479
1480         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1481         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1482         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1483
1484     def test_create_diff_apply(self):
1485         c1 = self.create_count_txt()
1486         c1.save()
1487
1488         c2 = Collection(c1.manifest_locator())
1489         with c2.open("count.txt", "wb") as f:
1490             f.write(b"abcdefg")
1491
1492         diff = c1.diff(c2)
1493
1494         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1495
1496         c1.apply(diff)
1497         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1498
1499     def test_diff_apply_with_token(self):
1500         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1501         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1502         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1503
1504         diff = baseline.diff(other)
1505         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1506
1507         c.apply(diff)
1508
1509         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1510
1511
1512     def test_create_and_update(self):
1513         c1 = self.create_count_txt()
1514         c1.save()
1515
1516         c2 = arvados.collection.Collection(c1.manifest_locator())
1517         with c2.open("count.txt", "wb") as f:
1518             f.write(b"abcdefg")
1519
1520         c2.save()
1521
1522         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1523         c1.update()
1524         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1525
1526
1527     def test_create_and_update_with_conflict(self):
1528         c1 = self.create_count_txt()
1529         c1.save()
1530
1531         with c1.open("count.txt", "wb") as f:
1532             f.write(b"XYZ")
1533
1534         c2 = arvados.collection.Collection(c1.manifest_locator())
1535         with c2.open("count.txt", "wb") as f:
1536             f.write(b"abcdefg")
1537
1538         c2.save()
1539
1540         c1.update()
1541         self.assertRegex(
1542             c1.manifest_text(),
1543             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1544
1545     def test_pdh_is_native_str(self):
1546         c1 = self.create_count_txt()
1547         pdh = c1.portable_data_hash()
1548         self.assertEqual(type(''), type(pdh))
1549
1550
1551 if __name__ == '__main__':
1552     unittest.main()