Merge branch '19954-permission-dedup-doc'
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import random
13 import re
14 import sys
15 import datetime
16 import ciso8601
17 import time
18 import unittest
19 import parameterized
20
21 from . import run_test_server
22 from arvados._ranges import Range, LocatorAndRange
23 from arvados.collection import Collection, CollectionReader
24 from . import arvados_testutil as tutil
25 from .arvados_testutil import make_block_cache
26
27 class TestResumableWriter(arvados.ResumableCollectionWriter):
28     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
29
30     def current_state(self):
31         return self.dump_state(copy.deepcopy)
32
33 @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
34 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
35                              tutil.ArvadosBaseTestCase):
36     disk_cache = False
37     MAIN_SERVER = {}
38
39     @classmethod
40     def setUpClass(cls):
41         super(ArvadosCollectionsTest, cls).setUpClass()
42         # need admin privileges to make collections with unsigned blocks
43         run_test_server.authorize_with('admin')
44         cls.api_client = arvados.api('v1')
45         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
46                                              local_store=cls.local_store,
47                                              block_cache=make_block_cache(cls.disk_cache))
48
49     def write_foo_bar_baz(self):
50         cw = arvados.CollectionWriter(self.api_client)
51         self.assertEqual(cw.current_stream_name(), '.',
52                          'current_stream_name() should be "." now')
53         cw.set_current_file_name('foo.txt')
54         cw.write(b'foo')
55         self.assertEqual(cw.current_file_name(), 'foo.txt',
56                          'current_file_name() should be foo.txt now')
57         cw.start_new_file('bar.txt')
58         cw.write(b'bar')
59         cw.start_new_stream('baz')
60         cw.write(b'baz')
61         cw.set_current_file_name('baz.txt')
62         self.assertEqual(cw.manifest_text(),
63                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
64                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
65                          "wrong manifest: got {}".format(cw.manifest_text()))
66         cw.save_new()
67         return cw.portable_data_hash()
68
69     def test_pdh_is_native_str(self):
70         pdh = self.write_foo_bar_baz()
71         self.assertEqual(type(''), type(pdh))
72
73     def test_keep_local_store(self):
74         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
75         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
76
77     def test_local_collection_writer(self):
78         self.assertEqual(self.write_foo_bar_baz(),
79                          '23ca013983d6239e98931cc779e68426+114',
80                          'wrong locator hash: ' + self.write_foo_bar_baz())
81
82     def test_local_collection_reader(self):
83         foobarbaz = self.write_foo_bar_baz()
84         cr = arvados.CollectionReader(
85             foobarbaz + '+Xzizzle', self.api_client)
86         got = []
87         for s in cr.all_streams():
88             for f in s.all_files():
89                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
90         expected = [[3, '.', 'foo.txt', b'foo'],
91                     [3, '.', 'bar.txt', b'bar'],
92                     [3, './baz', 'baz.txt', b'baz']]
93         self.assertEqual(got,
94                          expected)
95         stream0 = cr.all_streams()[0]
96         self.assertEqual(stream0.readfrom(0, 0),
97                          b'',
98                          'reading zero bytes should have returned empty string')
99         self.assertEqual(stream0.readfrom(0, 2**26),
100                          b'foobar',
101                          'reading entire stream failed')
102         self.assertEqual(stream0.readfrom(2**26, 0),
103                          b'',
104                          'reading zero bytes should have returned empty string')
105         self.assertEqual(3, len(cr))
106         self.assertTrue(cr)
107
108     def _test_subset(self, collection, expected):
109         cr = arvados.CollectionReader(collection, self.api_client)
110         for s in cr.all_streams():
111             for ex in expected:
112                 if ex[0] == s:
113                     f = s.files()[ex[2]]
114                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
115                     self.assertEqual(got,
116                                      ex,
117                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
118
119     def test_collection_manifest_subset(self):
120         foobarbaz = self.write_foo_bar_baz()
121         self._test_subset(foobarbaz,
122                           [[3, '.',     'bar.txt', b'bar'],
123                            [3, '.',     'foo.txt', b'foo'],
124                            [3, './baz', 'baz.txt', b'baz']])
125         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
126                            (self.keep_client.put(b"foo"),
127                             self.keep_client.put(b"bar"))),
128                           [[3, '.', 'bar.txt', b'bar'],
129                            [3, '.', 'foo.txt', b'foo']])
130         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
131                            (self.keep_client.put(b"foo"),
132                             self.keep_client.put(b"bar"))),
133                           [[2, '.', 'fo.txt', b'fo'],
134                            [4, '.', 'obar.txt', b'obar']])
135         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
136                            (self.keep_client.put(b"foo"),
137                             self.keep_client.put(b"bar"))),
138                           [[2, '.', 'ar.txt', b'ar'],
139                            [2, '.', 'fo.txt', b'fo'],
140                            [2, '.', 'ob.txt', b'ob'],
141                            [0, '.', 'zero.txt', b'']])
142
143     def test_collection_empty_file(self):
144         cw = arvados.CollectionWriter(self.api_client)
145         cw.start_new_file('zero.txt')
146         cw.write(b'')
147
148         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
149         self.check_manifest_file_sizes(cw.manifest_text(), [0])
150         cw = arvados.CollectionWriter(self.api_client)
151         cw.start_new_file('zero.txt')
152         cw.write(b'')
153         cw.start_new_file('one.txt')
154         cw.write(b'1')
155         cw.start_new_stream('foo')
156         cw.start_new_file('zero.txt')
157         cw.write(b'')
158         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
159
160     def test_no_implicit_normalize(self):
161         cw = arvados.CollectionWriter(self.api_client)
162         cw.start_new_file('b')
163         cw.write(b'b')
164         cw.start_new_file('a')
165         cw.write(b'')
166         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
167         self.check_manifest_file_sizes(
168             arvados.CollectionReader(
169                 cw.manifest_text()).manifest_text(normalize=True),
170             [0,1])
171
172     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
173         cr = arvados.CollectionReader(manifest_text, self.api_client)
174         got_sizes = []
175         for f in cr.all_files():
176             got_sizes += [f.size()]
177         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
178
179     def test_normalized_collection(self):
180         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
181 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
182 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
183 """
184         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
185                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
186 """)
187
188         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
189 """
190         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
191
192         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
193 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
194 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
195 """
196         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
197                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
198 """)
199
200         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
201 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
202 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
203 """
204         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
205                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
206 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
207 """)
208
209         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
210 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
211 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
212 """
213         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
214                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
215 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
216 """)
217
218         with self.data_file('1000G_ref_manifest') as f6:
219             m6 = f6.read()
220             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
221
222         with self.data_file('jlake_manifest') as f7:
223             m7 = f7.read()
224             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
225
226         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
227 """
228         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
229
230     def test_locators_and_ranges(self):
231         blocks2 = [Range('a', 0, 10),
232                    Range('b', 10, 10),
233                    Range('c', 20, 10),
234                    Range('d', 30, 10),
235                    Range('e', 40, 10),
236                    Range('f', 50, 10)]
237
238         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
246
247         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
255
256         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
259         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
260         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
261         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
262
263
264         blocks3 = [Range('a', 0, 10),
265                   Range('b', 10, 10),
266                   Range('c', 20, 10),
267                   Range('d', 30, 10),
268                   Range('e', 40, 10),
269                   Range('f', 50, 10),
270                    Range('g', 60, 10)]
271
272         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
275         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
276         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
277         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
278         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
279
280
281         blocks = [Range('a', 0, 10),
282                   Range('b', 10, 15),
283                   Range('c', 25, 5)]
284         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
285         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
286         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
287         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
288
289         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
290                                                                       LocatorAndRange('b', 15, 0, 1)])
291         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
292                                                                       LocatorAndRange('b', 15, 0, 2)])
293         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
294                                                                       LocatorAndRange('b', 15, 0, 15)])
295
296         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
297                                                                       LocatorAndRange('b', 15, 0, 15),
298                                                                       LocatorAndRange('c', 5, 0, 5)])
299         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
300                                                                       LocatorAndRange('b', 15, 0, 15),
301                                                                       LocatorAndRange('c', 5, 0, 5)])
302         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
303                                                                       LocatorAndRange('b', 15, 0, 15),
304                                                                       LocatorAndRange('c', 5, 0, 5)])
305
306         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
307
308         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
309                                                                       LocatorAndRange('b', 15, 0, 15)])
310
311         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
312                                                                       LocatorAndRange('b', 15, 0, 15),
313                                                                       LocatorAndRange('c', 5, 0, 3)])
314
315         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
316
317         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
318                                                                       LocatorAndRange('b', 15, 0, 14)])
319         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
320         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
321                                                                        LocatorAndRange('c', 5, 0, 1)])
322
323     class MockKeep(object):
324         def __init__(self, content, num_retries=0):
325             self.content = content
326
327         def get(self, locator, num_retries=0, prefetch=False):
328             return self.content[locator]
329
330     def test_stream_reader(self):
331         keepblocks = {
332             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
333             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
334             'cccccccccccccccccccccccccccccccc+5': b'z0123',
335         }
336         mk = self.MockKeep(keepblocks)
337
338         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
339
340         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
341
342         self.assertEqual(sr.readfrom(0, 30), content[0:30])
343         self.assertEqual(sr.readfrom(2, 30), content[2:30])
344
345         self.assertEqual(sr.readfrom(2, 8), content[2:10])
346         self.assertEqual(sr.readfrom(0, 10), content[0:10])
347
348         self.assertEqual(sr.readfrom(0, 5), content[0:5])
349         self.assertEqual(sr.readfrom(5, 5), content[5:10])
350         self.assertEqual(sr.readfrom(10, 5), content[10:15])
351         self.assertEqual(sr.readfrom(15, 5), content[15:20])
352         self.assertEqual(sr.readfrom(20, 5), content[20:25])
353         self.assertEqual(sr.readfrom(25, 5), content[25:30])
354         self.assertEqual(sr.readfrom(30, 5), b'')
355
356     def test_extract_file(self):
357         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
358 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
359 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
360 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
361 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
362 """
363
364         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
365
366         self.assertEqual(m2,
367                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
368         files = arvados.CollectionReader(
369             m2, self.api_client).all_streams()[0].files()
370
371         self.assertEqual(files['md5sum.txt'].as_manifest(),
372                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
373         self.assertEqual(files['md6sum.txt'].as_manifest(),
374                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
375         self.assertEqual(files['md7sum.txt'].as_manifest(),
376                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
377         self.assertEqual(files['md9sum.txt'].as_manifest(),
378                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
379
380     def test_write_directory_tree(self):
381         cwriter = arvados.CollectionWriter(self.api_client)
382         cwriter.write_directory_tree(self.build_directory_tree(
383                 ['basefile', 'subdir/subfile']))
384         self.assertEqual(cwriter.manifest_text(),
385                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
386 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
387
388     def test_write_named_directory_tree(self):
389         cwriter = arvados.CollectionWriter(self.api_client)
390         cwriter.write_directory_tree(self.build_directory_tree(
391                 ['basefile', 'subdir/subfile']), 'root')
392         self.assertEqual(
393             cwriter.manifest_text(),
394             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
395 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
396
397     def test_write_directory_tree_in_one_stream(self):
398         cwriter = arvados.CollectionWriter(self.api_client)
399         cwriter.write_directory_tree(self.build_directory_tree(
400                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
401         self.assertEqual(cwriter.manifest_text(),
402                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
403
404     def test_write_directory_tree_with_limited_recursion(self):
405         cwriter = arvados.CollectionWriter(self.api_client)
406         cwriter.write_directory_tree(
407             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
408             max_manifest_depth=1)
409         self.assertEqual(cwriter.manifest_text(),
410                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
411 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
412
413     def test_write_directory_tree_with_zero_recursion(self):
414         cwriter = arvados.CollectionWriter(self.api_client)
415         content = 'd1/d2/f3d1/f2f1'
416         blockhash = tutil.str_keep_locator(content)
417         cwriter.write_directory_tree(
418             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
419             max_manifest_depth=0)
420         self.assertEqual(
421             cwriter.manifest_text(),
422             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
423
424     def test_write_one_file(self):
425         cwriter = arvados.CollectionWriter(self.api_client)
426         with self.make_test_file() as testfile:
427             cwriter.write_file(testfile.name)
428             self.assertEqual(
429                 cwriter.manifest_text(),
430                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
431                     os.path.basename(testfile.name)))
432
433     def test_write_named_file(self):
434         cwriter = arvados.CollectionWriter(self.api_client)
435         with self.make_test_file() as testfile:
436             cwriter.write_file(testfile.name, 'foo')
437             self.assertEqual(cwriter.manifest_text(),
438                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
439
440     def test_write_multiple_files(self):
441         cwriter = arvados.CollectionWriter(self.api_client)
442         for letter in 'ABC':
443             with self.make_test_file(letter.encode()) as testfile:
444                 cwriter.write_file(testfile.name, letter)
445         self.assertEqual(
446             cwriter.manifest_text(),
447             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
448
449     def test_basic_resume(self):
450         cwriter = TestResumableWriter()
451         with self.make_test_file() as testfile:
452             cwriter.write_file(testfile.name, 'test')
453             resumed = TestResumableWriter.from_state(cwriter.current_state())
454         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
455                           "resumed CollectionWriter had different manifest")
456
457     def test_resume_fails_when_missing_dependency(self):
458         cwriter = TestResumableWriter()
459         with self.make_test_file() as testfile:
460             cwriter.write_file(testfile.name, 'test')
461         self.assertRaises(arvados.errors.StaleWriterStateError,
462                           TestResumableWriter.from_state,
463                           cwriter.current_state())
464
465     def test_resume_fails_when_dependency_mtime_changed(self):
466         cwriter = TestResumableWriter()
467         with self.make_test_file() as testfile:
468             cwriter.write_file(testfile.name, 'test')
469             os.utime(testfile.name, (0, 0))
470             self.assertRaises(arvados.errors.StaleWriterStateError,
471                               TestResumableWriter.from_state,
472                               cwriter.current_state())
473
474     def test_resume_fails_when_dependency_is_nonfile(self):
475         cwriter = TestResumableWriter()
476         cwriter.write_file('/dev/null', 'empty')
477         self.assertRaises(arvados.errors.StaleWriterStateError,
478                           TestResumableWriter.from_state,
479                           cwriter.current_state())
480
481     def test_resume_fails_when_dependency_size_changed(self):
482         cwriter = TestResumableWriter()
483         with self.make_test_file() as testfile:
484             cwriter.write_file(testfile.name, 'test')
485             orig_mtime = os.fstat(testfile.fileno()).st_mtime
486             testfile.write(b'extra')
487             testfile.flush()
488             os.utime(testfile.name, (orig_mtime, orig_mtime))
489             self.assertRaises(arvados.errors.StaleWriterStateError,
490                               TestResumableWriter.from_state,
491                               cwriter.current_state())
492
493     def test_resume_fails_with_expired_locator(self):
494         cwriter = TestResumableWriter()
495         state = cwriter.current_state()
496         # Add an expired locator to the state.
497         state['_current_stream_locators'].append(''.join([
498                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
499         self.assertRaises(arvados.errors.StaleWriterStateError,
500                           TestResumableWriter.from_state, state)
501
502     def test_arbitrary_objects_not_resumable(self):
503         cwriter = TestResumableWriter()
504         with open('/dev/null') as badfile:
505             self.assertRaises(arvados.errors.AssertionError,
506                               cwriter.write_file, badfile)
507
508     def test_arbitrary_writes_not_resumable(self):
509         cwriter = TestResumableWriter()
510         self.assertRaises(arvados.errors.AssertionError,
511                           cwriter.write, "badtext")
512
513
514 class CollectionTestMixin(tutil.ApiClientMock):
515     API_COLLECTIONS = run_test_server.fixture('collections')
516     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
517     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
518     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
519     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
520     ALT_COLLECTION = API_COLLECTIONS['bar_file']
521     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
522     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
523
524     def api_client_mock(self, status=200):
525         client = super(CollectionTestMixin, self).api_client_mock()
526         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
527         return client
528
529
530 @tutil.skip_sleep
531 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
532     def mock_get_collection(self, api_mock, code, fixturename):
533         body = self.API_COLLECTIONS.get(fixturename)
534         self._mock_api_call(api_mock.collections().get, code, body)
535
536     def api_client_mock(self, status=200):
537         client = super(CollectionReaderTestCase, self).api_client_mock()
538         self.mock_get_collection(client, status, 'foo_file')
539         return client
540
541     def test_init_no_default_retries(self):
542         client = self.api_client_mock(200)
543         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
544         reader.manifest_text()
545         client.collections().get().execute.assert_called_with(num_retries=0)
546
547     def test_uuid_init_success(self):
548         client = self.api_client_mock(200)
549         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
550                                           num_retries=3)
551         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
552                          reader.manifest_text())
553         client.collections().get().execute.assert_called_with(num_retries=3)
554
555     def test_uuid_init_failure_raises_api_error(self):
556         client = self.api_client_mock(500)
557         with self.assertRaises(arvados.errors.ApiError):
558             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
559
560     def test_locator_init(self):
561         client = self.api_client_mock(200)
562         # Ensure Keep will not return anything if asked.
563         with tutil.mock_keep_responses(None, 404):
564             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
565                                               api_client=client)
566             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
567
568     def test_init_no_fallback_to_keep(self):
569         # Do not look up a collection UUID or PDH in Keep.
570         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
571             client = self.api_client_mock(404)
572             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
573                 with self.assertRaises(arvados.errors.ApiError):
574                     reader = arvados.CollectionReader(key, api_client=client)
575
576     def test_init_num_retries_propagated(self):
577         # More of an integration test...
578         client = self.api_client_mock(200)
579         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
580                                           num_retries=3)
581         with tutil.mock_keep_responses('foo', 500, 500, 200):
582             self.assertEqual(b'foo',
583                              b''.join(f.read(9) for f in reader.all_files()))
584
585     def test_read_nonnormalized_manifest_with_collection_reader(self):
586         # client should be able to use CollectionReader on a manifest without normalizing it
587         client = self.api_client_mock(500)
588         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
589         reader = arvados.CollectionReader(
590             nonnormal,
591             api_client=client, num_retries=0)
592         # Ensure stripped_manifest() doesn't mangle our manifest in
593         # any way other than stripping hints.
594         self.assertEqual(
595             re.sub('\+[^\d\s\+]+', '', nonnormal),
596             reader.stripped_manifest())
597         # Ensure stripped_manifest() didn't mutate our reader.
598         self.assertEqual(nonnormal, reader.manifest_text())
599         # Ensure the files appear in the order given in the manifest.
600         self.assertEqual(
601             [[6, '.', 'foo.txt'],
602              [0, '.', 'bar.txt']],
603             [[f.size(), f.stream_name(), f.name()]
604              for f in reader.all_streams()[0].all_files()])
605
606     def test_read_empty_collection(self):
607         client = self.api_client_mock(200)
608         self.mock_get_collection(client, 200, 'empty')
609         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
610                                           api_client=client)
611         self.assertEqual('', reader.manifest_text())
612         self.assertEqual(0, len(reader))
613         self.assertFalse(reader)
614
615     def test_api_response(self):
616         client = self.api_client_mock()
617         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
618         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
619
620     def check_open_file(self, coll_file, stream_name, file_name, file_size):
621         self.assertFalse(coll_file.closed, "returned file is not open")
622         self.assertEqual(stream_name, coll_file.stream_name())
623         self.assertEqual(file_name, coll_file.name)
624         self.assertEqual(file_size, coll_file.size())
625
626     def test_open_collection_file_one_argument(self):
627         client = self.api_client_mock(200)
628         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
629         cfile = reader.open('./foo', 'rb')
630         self.check_open_file(cfile, '.', 'foo', 3)
631
632     def test_open_deep_file(self):
633         coll_name = 'collection_with_files_in_subdir'
634         client = self.api_client_mock(200)
635         self.mock_get_collection(client, 200, coll_name)
636         reader = arvados.CollectionReader(
637             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
638         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
639         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
640                              32)
641
642     def test_open_nonexistent_stream(self):
643         client = self.api_client_mock(200)
644         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
645         self.assertRaises(IOError, reader.open, './nonexistent/foo')
646
647     def test_open_nonexistent_file(self):
648         client = self.api_client_mock(200)
649         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
650         self.assertRaises(IOError, reader.open, 'nonexistent')
651
652
653 @tutil.skip_sleep
654 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
655     def mock_keep(self, body, *codes, **headers):
656         headers.setdefault('x-keep-replicas-stored', 2)
657         return tutil.mock_keep_responses(body, *codes, **headers)
658
659     def foo_writer(self, **kwargs):
660         kwargs.setdefault('api_client', self.api_client_mock())
661         writer = arvados.CollectionWriter(**kwargs)
662         writer.start_new_file('foo')
663         writer.write(b'foo')
664         return writer
665
666     def test_write_whole_collection(self):
667         writer = self.foo_writer()
668         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
669             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
670
671     def test_write_no_default(self):
672         writer = self.foo_writer()
673         with self.mock_keep(None, 500):
674             with self.assertRaises(arvados.errors.KeepWriteError):
675                 writer.finish()
676
677     def test_write_insufficient_replicas_via_proxy(self):
678         writer = self.foo_writer(replication=3)
679         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
680             with self.assertRaises(arvados.errors.KeepWriteError):
681                 writer.manifest_text()
682
683     def test_write_insufficient_replicas_via_disks(self):
684         client = mock.MagicMock(name='api_client')
685         with self.mock_keep(
686                 None, 200, 200,
687                 **{'x-keep-replicas-stored': 1}) as keepmock:
688             self.mock_keep_services(client, status=200, service_type='disk', count=2)
689             writer = self.foo_writer(api_client=client, replication=3)
690             with self.assertRaises(arvados.errors.KeepWriteError):
691                 writer.manifest_text()
692
693     def test_write_three_replicas(self):
694         client = mock.MagicMock(name='api_client')
695         with self.mock_keep(
696                 "", 500, 500, 500, 200, 200, 200,
697                 **{'x-keep-replicas-stored': 1}) as keepmock:
698             self.mock_keep_services(client, status=200, service_type='disk', count=6)
699             writer = self.foo_writer(api_client=client, replication=3)
700             writer.manifest_text()
701             self.assertEqual(6, keepmock.call_count)
702
703     def test_write_whole_collection_through_retries(self):
704         writer = self.foo_writer(num_retries=2)
705         with self.mock_keep(self.DEFAULT_DATA_HASH,
706                             500, 500, 200, 500, 500, 200):
707             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
708
709     def test_flush_data_retries(self):
710         writer = self.foo_writer(num_retries=2)
711         foo_hash = self.DEFAULT_MANIFEST.split()[1]
712         with self.mock_keep(foo_hash, 500, 200):
713             writer.flush_data()
714         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
715
716     def test_one_open(self):
717         client = self.api_client_mock()
718         writer = arvados.CollectionWriter(client)
719         with writer.open('out') as out_file:
720             self.assertEqual('.', writer.current_stream_name())
721             self.assertEqual('out', writer.current_file_name())
722             out_file.write(b'test data')
723             data_loc = tutil.str_keep_locator('test data')
724         self.assertTrue(out_file.closed, "writer file not closed after context")
725         self.assertRaises(ValueError, out_file.write, 'extra text')
726         with self.mock_keep(data_loc, 200) as keep_mock:
727             self.assertEqual(". {} 0:9:out\n".format(data_loc),
728                              writer.manifest_text())
729
730     def test_open_writelines(self):
731         client = self.api_client_mock()
732         writer = arvados.CollectionWriter(client)
733         with writer.open('six') as out_file:
734             out_file.writelines(['12', '34', '56'])
735             data_loc = tutil.str_keep_locator('123456')
736         with self.mock_keep(data_loc, 200) as keep_mock:
737             self.assertEqual(". {} 0:6:six\n".format(data_loc),
738                              writer.manifest_text())
739
740     def test_open_flush(self):
741         client = self.api_client_mock()
742         data_loc1 = tutil.str_keep_locator('flush1')
743         data_loc2 = tutil.str_keep_locator('flush2')
744         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
745             writer = arvados.CollectionWriter(client)
746             with writer.open('flush_test') as out_file:
747                 out_file.write(b'flush1')
748                 out_file.flush()
749                 out_file.write(b'flush2')
750             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
751                                                                 data_loc2),
752                              writer.manifest_text())
753
754     def test_two_opens_same_stream(self):
755         client = self.api_client_mock()
756         writer = arvados.CollectionWriter(client)
757         with writer.open('.', '1') as out_file:
758             out_file.write(b'1st')
759         with writer.open('.', '2') as out_file:
760             out_file.write(b'2nd')
761         data_loc = tutil.str_keep_locator('1st2nd')
762         with self.mock_keep(data_loc, 200) as keep_mock:
763             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
764                              writer.manifest_text())
765
766     def test_two_opens_two_streams(self):
767         client = self.api_client_mock()
768         data_loc1 = tutil.str_keep_locator('file')
769         data_loc2 = tutil.str_keep_locator('indir')
770         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
771             writer = arvados.CollectionWriter(client)
772             with writer.open('file') as out_file:
773                 out_file.write(b'file')
774             with writer.open('./dir', 'indir') as out_file:
775                 out_file.write(b'indir')
776             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
777                 data_loc1, data_loc2)
778             self.assertEqual(expected, writer.manifest_text())
779
780     def test_dup_open_fails(self):
781         client = self.api_client_mock()
782         writer = arvados.CollectionWriter(client)
783         file1 = writer.open('one')
784         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
785
786
787 class CollectionMethods(run_test_server.TestCaseWithServers):
788
789     def test_keys_values_items_support_indexing(self):
790         c = Collection()
791         with c.open('foo', 'wb') as f:
792             f.write(b'foo')
793         with c.open('bar', 'wb') as f:
794             f.write(b'bar')
795         self.assertEqual(2, len(c.keys()))
796         if sys.version_info < (3, 0):
797             # keys() supports indexing only for python2 callers.
798             fn0 = c.keys()[0]
799             fn1 = c.keys()[1]
800         else:
801             fn0, fn1 = c.keys()
802         self.assertEqual(2, len(c.values()))
803         f0 = c.values()[0]
804         f1 = c.values()[1]
805         self.assertEqual(2, len(c.items()))
806         self.assertEqual(fn0, c.items()[0][0])
807         self.assertEqual(fn1, c.items()[1][0])
808
809     def test_get_properties(self):
810         c = Collection()
811         self.assertEqual(c.get_properties(), {})
812         c.save_new(properties={"foo":"bar"})
813         self.assertEqual(c.get_properties(), {"foo":"bar"})
814
815     def test_get_trash_at(self):
816         c = Collection()
817         self.assertEqual(c.get_trash_at(), None)
818         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
819         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
820
821
822 class CollectionOpenModes(run_test_server.TestCaseWithServers):
823
824     def test_open_binary_modes(self):
825         c = Collection()
826         for mode in ['wb', 'wb+', 'ab', 'ab+']:
827             with c.open('foo', mode) as f:
828                 f.write(b'foo')
829
830     def test_open_invalid_modes(self):
831         c = Collection()
832         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
833             with self.assertRaises(Exception):
834                 c.open('foo', mode)
835
836     def test_open_text_modes(self):
837         c = Collection()
838         with c.open('foo', 'wb') as f:
839             f.write('foo')
840         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
841             with c.open('foo', mode) as f:
842                 if mode[0] == 'r' and '+' not in mode:
843                     self.assertEqual('foo', f.read(3))
844                 else:
845                     f.write('bar')
846                     f.seek(0, os.SEEK_SET)
847                     self.assertEqual('bar', f.read(3))
848
849
850 class TextModes(run_test_server.TestCaseWithServers):
851
852     def setUp(self):
853         arvados.config.KEEP_BLOCK_SIZE = 4
854         if sys.version_info < (3, 0):
855             import unicodedata
856             self.sailboat = unicodedata.lookup('SAILBOAT')
857             self.snowman = unicodedata.lookup('SNOWMAN')
858         else:
859             self.sailboat = '\N{SAILBOAT}'
860             self.snowman = '\N{SNOWMAN}'
861
862     def tearDown(self):
863         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
864
865     def test_read_sailboat_across_block_boundary(self):
866         c = Collection()
867         f = c.open('sailboats', 'wb')
868         data = self.sailboat.encode('utf-8')
869         f.write(data)
870         f.write(data[:1])
871         f.write(data[1:])
872         f.write(b'\n')
873         f.close()
874         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
875
876         f = c.open('sailboats', 'r')
877         string = f.readline()
878         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
879         f.close()
880
881     def test_write_snowman_across_block_boundary(self):
882         c = Collection()
883         f = c.open('snowmany', 'w')
884         data = self.snowman
885         f.write(data+data+'\n'+data+'\n')
886         f.close()
887         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
888
889         f = c.open('snowmany', 'r')
890         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
891         self.assertEqual(f.readline(), self.snowman+'\n')
892         f.close()
893
894
895 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
896
897     def test_replication_desired_kept_on_load(self):
898         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
899         c1 = Collection(m, replication_desired=1)
900         c1.save_new()
901         loc = c1.manifest_locator()
902         c2 = Collection(loc)
903         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
904         self.assertEqual(c1.replication_desired, c2.replication_desired)
905
906     def test_replication_desired_not_loaded_if_provided(self):
907         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
908         c1 = Collection(m, replication_desired=1)
909         c1.save_new()
910         loc = c1.manifest_locator()
911         c2 = Collection(loc, replication_desired=2)
912         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
913         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
914
915     def test_storage_classes_desired_kept_on_load(self):
916         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
917         c1 = Collection(m, storage_classes_desired=['archival'])
918         c1.save_new()
919         loc = c1.manifest_locator()
920         c2 = Collection(loc)
921         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
922         self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
923
924     def test_storage_classes_change_after_save(self):
925         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
926         c1 = Collection(m, storage_classes_desired=['archival'])
927         c1.save_new()
928         loc = c1.manifest_locator()
929         c2 = Collection(loc)
930         self.assertEqual(['archival'], c2.storage_classes_desired())
931         c2.save(storage_classes=['highIO'])
932         self.assertEqual(['highIO'], c2.storage_classes_desired())
933         c3 = Collection(loc)
934         self.assertEqual(c1.manifest_text(strip=True), c3.manifest_text(strip=True))
935         self.assertEqual(['highIO'], c3.storage_classes_desired())
936
937     def test_storage_classes_desired_not_loaded_if_provided(self):
938         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
939         c1 = Collection(m, storage_classes_desired=['archival'])
940         c1.save_new()
941         loc = c1.manifest_locator()
942         c2 = Collection(loc, storage_classes_desired=['default'])
943         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
944         self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
945
946     def test_init_manifest(self):
947         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
948 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
949 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
950 """
951         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
952         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
953
954     def test_init_manifest_with_collision(self):
955         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
956 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
957 """
958         with self.assertRaises(arvados.errors.ArgumentError):
959             self.assertEqual(m1, CollectionReader(m1))
960
961     def test_init_manifest_with_error(self):
962         m1 = """. 0:43:md5sum.txt"""
963         with self.assertRaises(arvados.errors.ArgumentError):
964             self.assertEqual(m1, CollectionReader(m1))
965
966     def test_remove(self):
967         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
968         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
969         self.assertIn("count1.txt", c)
970         c.remove("count1.txt")
971         self.assertNotIn("count1.txt", c)
972         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
973         with self.assertRaises(arvados.errors.ArgumentError):
974             c.remove("")
975
976     def test_remove_recursive(self):
977         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
978         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
979         self.assertIn("a", c)
980         self.assertEqual(1, len(c["a"].keys()))
981         # cannot remove non-empty directory with default recursive=False
982         with self.assertRaises(OSError):
983             c.remove("a/b")
984         with self.assertRaises(OSError):
985             c.remove("a/b/c/d")
986         c.remove("a/b", recursive=True)
987         self.assertEqual(0, len(c["a"].keys()))
988         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
989
990     def test_find(self):
991         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
992         self.assertIs(c.find("."), c)
993         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
994         self.assertIs(c.find("count1.txt"), c["count1.txt"])
995         with self.assertRaises(IOError):
996             c.find("/.")
997         with self.assertRaises(arvados.errors.ArgumentError):
998             c.find("")
999         self.assertIs(c.find("./nonexistant.txt"), None)
1000         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
1001
1002     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
1003         # Dir & file names are literally '\056' (escaped form: \134056)
1004         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1005         c = Collection(manifest)
1006         self.assertEqual(c.portable_manifest_text(), manifest)
1007
1008     def test_other_special_chars_on_file_token(self):
1009         cases = [
1010             ('\\000', '\0'),
1011             ('\\011', '\t'),
1012             ('\\012', '\n'),
1013             ('\\072', ':'),
1014             ('\\134400', '\\400'),
1015         ]
1016         for encoded, decoded in cases:
1017             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
1018             c = Collection(manifest)
1019             self.assertEqual(c.portable_manifest_text(), manifest)
1020             self.assertIn('some%sfile.txt' % decoded, c.keys())
1021
1022     def test_escaped_paths_do_get_unescaped_on_listing(self):
1023         # Dir & file names are literally '\056' (escaped form: \134056)
1024         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1025         c = Collection(manifest)
1026         self.assertIn('\\056 Test', c.keys())
1027         self.assertIn('\\056', c['\\056 Test'].keys())
1028
1029     def test_make_empty_dir_with_escaped_chars(self):
1030         c = Collection()
1031         c.mkdirs('./Empty\\056Dir')
1032         self.assertEqual(c.portable_manifest_text(),
1033                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1034
1035     def test_make_empty_dir_with_spaces(self):
1036         c = Collection()
1037         c.mkdirs('./foo bar/baz waz')
1038         self.assertEqual(c.portable_manifest_text(),
1039                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1040
1041     def test_remove_in_subdir(self):
1042         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1043         c.remove("foo/count2.txt")
1044         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1045
1046     def test_remove_empty_subdir(self):
1047         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1048         c.remove("foo/count2.txt")
1049         c.remove("foo")
1050         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1051
1052     def test_remove_nonempty_subdir(self):
1053         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1054         with self.assertRaises(IOError):
1055             c.remove("foo")
1056         c.remove("foo", recursive=True)
1057         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1058
1059     def test_copy_to_file_in_dir(self):
1060         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1061         c.copy("count1.txt", "foo/count2.txt")
1062         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1063
1064     def test_copy_file(self):
1065         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1066         c.copy("count1.txt", "count2.txt")
1067         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1068
1069     def test_copy_to_existing_dir(self):
1070         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1071         c.copy("count1.txt", "foo")
1072         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1073
1074     def test_copy_to_new_dir(self):
1075         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1076         c.copy("count1.txt", "foo/")
1077         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1078
1079     def test_rename_file(self):
1080         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1081         c.rename("count1.txt", "count2.txt")
1082         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1083
1084     def test_move_file_to_dir(self):
1085         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1086         c.mkdirs("foo")
1087         c.rename("count1.txt", "foo/count2.txt")
1088         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1089
1090     def test_move_file_to_other(self):
1091         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1092         c2 = Collection()
1093         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1094         self.assertEqual("", c1.manifest_text())
1095         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1096
1097     def test_clone(self):
1098         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1099         cl = c.clone()
1100         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1101
1102     def test_diff_del_add(self):
1103         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1104         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1105         d = c2.diff(c1)
1106         self.assertEqual(sorted(d), [
1107             ('add', './count1.txt', c1["count1.txt"]),
1108             ('del', './count2.txt', c2["count2.txt"]),
1109         ])
1110         d = c1.diff(c2)
1111         self.assertEqual(sorted(d), [
1112             ('add', './count2.txt', c2["count2.txt"]),
1113             ('del', './count1.txt', c1["count1.txt"]),
1114         ])
1115         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1116         c1.apply(d)
1117         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1118
1119     def test_diff_same(self):
1120         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1121         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1122         d = c2.diff(c1)
1123         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1124         d = c1.diff(c2)
1125         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1126
1127         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1128         c1.apply(d)
1129         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1130
1131     def test_diff_mod(self):
1132         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1133         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1134         d = c2.diff(c1)
1135         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1136         d = c1.diff(c2)
1137         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1138
1139         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1140         c1.apply(d)
1141         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1142
1143     def test_diff_add(self):
1144         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1145         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1146         d = c2.diff(c1)
1147         self.assertEqual(sorted(d), [
1148             ('del', './count2.txt', c2["count2.txt"]),
1149             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1150         ])
1151         d = c1.diff(c2)
1152         self.assertEqual(sorted(d), [
1153             ('add', './count2.txt', c2["count2.txt"]),
1154             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1155         ])
1156
1157         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1158         c1.apply(d)
1159         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1160
1161     def test_diff_add_in_subcollection(self):
1162         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1163         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1164         d = c2.diff(c1)
1165         self.assertEqual(sorted(d), [
1166             ('del', './foo', c2["foo"]),
1167             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1168         ])
1169         d = c1.diff(c2)
1170         self.assertEqual(sorted(d), [
1171             ('add', './foo', c2["foo"]),
1172             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1173         ])
1174         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1175         c1.apply(d)
1176         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1177
1178     def test_diff_del_add_in_subcollection(self):
1179         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1180         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1181         d = c2.diff(c1)
1182         self.assertEqual(sorted(d), [
1183             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1184             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1185             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1186         ])
1187         d = c1.diff(c2)
1188         self.assertEqual(sorted(d), [
1189             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1190             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1191             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1192         ])
1193
1194         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1195         c1.apply(d)
1196         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1197
1198     def test_diff_mod_in_subcollection(self):
1199         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1200         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1201         d = c2.diff(c1)
1202         self.assertEqual(sorted(d), [
1203             ('mod', './foo', c2["foo"], c1["foo"]),
1204             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1205         ])
1206         d = c1.diff(c2)
1207         self.assertEqual(sorted(d), [
1208             ('mod', './foo', c1["foo"], c2["foo"]),
1209             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1210         ])
1211
1212         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1213         c1.apply(d)
1214         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1215
1216     def test_conflict_keep_local_change(self):
1217         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1218         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1219         d = c1.diff(c2)
1220         self.assertEqual(sorted(d), [
1221             ('add', './count2.txt', c2["count2.txt"]),
1222             ('del', './count1.txt', c1["count1.txt"]),
1223         ])
1224         f = c1.open("count1.txt", "wb")
1225         f.write(b"zzzzz")
1226
1227         # c1 changed, so it should not be deleted.
1228         c1.apply(d)
1229         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1230
1231     def test_conflict_mod(self):
1232         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1233         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1234         d = c1.diff(c2)
1235         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1236         f = c1.open("count1.txt", "wb")
1237         f.write(b"zzzzz")
1238
1239         # c1 changed, so c2 mod will go to a conflict file
1240         c1.apply(d)
1241         self.assertRegex(
1242             c1.portable_manifest_text(),
1243             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1244
1245     def test_conflict_add(self):
1246         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1247         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1248         d = c1.diff(c2)
1249         self.assertEqual(sorted(d), [
1250             ('add', './count1.txt', c2["count1.txt"]),
1251             ('del', './count2.txt', c1["count2.txt"]),
1252         ])
1253         f = c1.open("count1.txt", "wb")
1254         f.write(b"zzzzz")
1255
1256         # c1 added count1.txt, so c2 add will go to a conflict file
1257         c1.apply(d)
1258         self.assertRegex(
1259             c1.portable_manifest_text(),
1260             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1261
1262     def test_conflict_del(self):
1263         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1264         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1265         d = c1.diff(c2)
1266         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1267         c1.remove("count1.txt")
1268
1269         # c1 deleted, so c2 mod will go to a conflict file
1270         c1.apply(d)
1271         self.assertRegex(
1272             c1.portable_manifest_text(),
1273             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1274
1275     def test_notify(self):
1276         c1 = Collection()
1277         events = []
1278         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1279         f = c1.open("foo.txt", "wb")
1280         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1281
1282     def test_open_w(self):
1283         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1284         self.assertEqual(c1["count1.txt"].size(), 10)
1285         c1.open("count1.txt", "wb").close()
1286         self.assertEqual(c1["count1.txt"].size(), 0)
1287
1288
1289 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1290     MAIN_SERVER = {}
1291     KEEP_SERVER = {}
1292     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1293     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1294
1295     def setUp(self):
1296         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1297
1298     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1299     def test_storage_classes_desired(self, put_mock):
1300         put_mock.side_effect = self.keep_put
1301         c = Collection(storage_classes_desired=['default'])
1302         with c.open("file.txt", 'wb') as f:
1303             f.write('content')
1304         c.save_new()
1305         _, kwargs = put_mock.call_args
1306         self.assertEqual(['default'], kwargs['classes'])
1307
1308     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1309     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1310         '''
1311         Make sure that those blocks that are committed after repacking small ones,
1312         get their permission tokens assigned on the collection manifest.
1313         '''
1314         def wrapped_keep_put(*args, **kwargs):
1315             # Simulate slow put operations
1316             time.sleep(1)
1317             return self.keep_put(*args, **kwargs)
1318
1319         mocked_put.side_effect = wrapped_keep_put
1320         c = Collection()
1321         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1322         # small ones before finishing the upload.
1323         for i in range(70):
1324             f = c.open("file_{}.txt".format(i), 'wb')
1325             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1326             f.close(flush=False)
1327         # We should get 2 blocks with their tokens
1328         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1329
1330     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1331     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1332         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1333         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1334         rs_mock.return_value = local_block_loc
1335         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1336         self.assertEqual(
1337             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1338         self.assertEqual(
1339             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1340         c.save_new()
1341         rs_mock.assert_called()
1342         self.assertEqual(
1343             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1344         self.assertEqual(
1345             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1346
1347     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1348     def test_copy_remote_blocks_on_save(self, rs_mock):
1349         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1350         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1351         rs_mock.return_value = local_block_loc
1352         # Remote collection
1353         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1354         self.assertEqual(
1355             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1356         # Local collection
1357         local_c = Collection()
1358         with local_c.open('barfile.txt', 'wb') as f:
1359             f.write('bar')
1360         local_c.save_new()
1361         self.assertEqual(
1362             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1363         self.assertEqual(
1364             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1365         # Copy remote file to local collection
1366         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1367         self.assertEqual(
1368             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1369         self.assertEqual(
1370             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1371         # Save local collection: remote block should be copied
1372         local_c.save()
1373         rs_mock.assert_called()
1374         self.assertEqual(
1375             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1376         self.assertEqual(
1377             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1378
1379
1380 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1381     def test_preserve_version_on_save(self):
1382         c = Collection()
1383         c.save_new(preserve_version=True)
1384         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1385         self.assertEqual(coll_record['version'], 1)
1386         self.assertEqual(coll_record['preserve_version'], True)
1387         with c.open("foo.txt", "wb") as foo:
1388             foo.write(b"foo")
1389         c.save(preserve_version=True)
1390         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1391         self.assertEqual(coll_record['version'], 2)
1392         self.assertEqual(coll_record['preserve_version'], True)
1393         with c.open("bar.txt", "wb") as foo:
1394             foo.write(b"bar")
1395         c.save(preserve_version=False)
1396         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1397         self.assertEqual(coll_record['version'], 3)
1398         self.assertEqual(coll_record['preserve_version'], False)
1399
1400     def test_get_manifest_text_only_committed(self):
1401         c = Collection()
1402         with c.open("count.txt", "wb") as f:
1403             # One file committed
1404             with c.open("foo.txt", "wb") as foo:
1405                 foo.write(b"foo")
1406                 foo.flush() # Force block commit
1407             f.write(b"0123456789")
1408             # Other file not committed. Block not written to keep yet.
1409             self.assertEqual(
1410                 c._get_manifest_text(".",
1411                                      strip=False,
1412                                      normalize=False,
1413                                      only_committed=True),
1414                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1415             # And now with the file closed...
1416             f.flush() # Force block commit
1417         self.assertEqual(
1418             c._get_manifest_text(".",
1419                                  strip=False,
1420                                  normalize=False,
1421                                  only_committed=True),
1422             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1423
1424     def test_only_small_blocks_are_packed_together(self):
1425         c = Collection()
1426         # Write a couple of small files,
1427         f = c.open("count.txt", "wb")
1428         f.write(b"0123456789")
1429         f.close(flush=False)
1430         foo = c.open("foo.txt", "wb")
1431         foo.write(b"foo")
1432         foo.close(flush=False)
1433         # Then, write a big file, it shouldn't be packed with the ones above
1434         big = c.open("bigfile.txt", "wb")
1435         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1436         big.close(flush=False)
1437         self.assertEqual(
1438             c.manifest_text("."),
1439             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1440
1441     def test_flush_after_small_block_packing(self):
1442         c = Collection()
1443         # Write a couple of small files,
1444         f = c.open("count.txt", "wb")
1445         f.write(b"0123456789")
1446         f.close(flush=False)
1447         foo = c.open("foo.txt", "wb")
1448         foo.write(b"foo")
1449         foo.close(flush=False)
1450
1451         self.assertEqual(
1452             c.manifest_text(),
1453             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1454
1455         f = c.open("count.txt", "rb+")
1456         f.close(flush=True)
1457
1458         self.assertEqual(
1459             c.manifest_text(),
1460             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1461
1462     def test_write_after_small_block_packing2(self):
1463         c = Collection()
1464         # Write a couple of small files,
1465         f = c.open("count.txt", "wb")
1466         f.write(b"0123456789")
1467         f.close(flush=False)
1468         foo = c.open("foo.txt", "wb")
1469         foo.write(b"foo")
1470         foo.close(flush=False)
1471
1472         self.assertEqual(
1473             c.manifest_text(),
1474             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1475
1476         f = c.open("count.txt", "rb+")
1477         f.write(b"abc")
1478         f.close(flush=False)
1479
1480         self.assertEqual(
1481             c.manifest_text(),
1482             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1483
1484
1485     def test_small_block_packing_with_overwrite(self):
1486         c = Collection()
1487         c.open("b1", "wb").close()
1488         c["b1"].writeto(0, b"b1", 0)
1489
1490         c.open("b2", "wb").close()
1491         c["b2"].writeto(0, b"b2", 0)
1492
1493         c["b1"].writeto(0, b"1b", 0)
1494
1495         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1496         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1497         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1498
1499
1500 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1501     MAIN_SERVER = {}
1502     KEEP_SERVER = {}
1503
1504     def create_count_txt(self):
1505         # Create an empty collection, save it to the API server, then write a
1506         # file, but don't save it.
1507
1508         c = Collection()
1509         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1510         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1511         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1512
1513         with c.open("count.txt", "wb") as f:
1514             f.write(b"0123456789")
1515
1516         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1517
1518         return c
1519
1520     def test_create_and_save(self):
1521         c = self.create_count_txt()
1522         c.save(properties={'type' : 'Intermediate'},
1523                storage_classes=['archive'],
1524                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1525
1526         self.assertRegex(
1527             c.manifest_text(),
1528             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1529         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1530         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1531         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1532
1533
1534     def test_create_and_save_new(self):
1535         c = self.create_count_txt()
1536         c.save_new(properties={'type' : 'Intermediate'},
1537                    storage_classes=['archive'],
1538                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1539
1540         self.assertRegex(
1541             c.manifest_text(),
1542             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1543         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1544         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1545         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1546
1547     def test_create_and_save_after_commiting(self):
1548         c = self.create_count_txt()
1549         c.save(properties={'type' : 'Intermediate'},
1550                storage_classes=['hot'],
1551                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1552         c.save(properties={'type' : 'Output'},
1553                storage_classes=['cold'],
1554                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1555
1556         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1557         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1558         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1559
1560     def test_create_diff_apply(self):
1561         c1 = self.create_count_txt()
1562         c1.save()
1563
1564         c2 = Collection(c1.manifest_locator())
1565         with c2.open("count.txt", "wb") as f:
1566             f.write(b"abcdefg")
1567
1568         diff = c1.diff(c2)
1569
1570         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1571
1572         c1.apply(diff)
1573         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1574
1575     def test_diff_apply_with_token(self):
1576         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1577         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1578         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1579
1580         diff = baseline.diff(other)
1581         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1582
1583         c.apply(diff)
1584
1585         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1586
1587
1588     def test_create_and_update(self):
1589         c1 = self.create_count_txt()
1590         c1.save()
1591
1592         c2 = arvados.collection.Collection(c1.manifest_locator())
1593         with c2.open("count.txt", "wb") as f:
1594             f.write(b"abcdefg")
1595
1596         c2.save()
1597
1598         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1599         c1.update()
1600         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1601
1602
1603     def test_create_and_update_with_conflict(self):
1604         c1 = self.create_count_txt()
1605         c1.save()
1606
1607         with c1.open("count.txt", "wb") as f:
1608             f.write(b"XYZ")
1609
1610         c2 = arvados.collection.Collection(c1.manifest_locator())
1611         with c2.open("count.txt", "wb") as f:
1612             f.write(b"abcdefg")
1613
1614         c2.save()
1615
1616         c1.update()
1617         self.assertRegex(
1618             c1.manifest_text(),
1619             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1620
1621     def test_pdh_is_native_str(self):
1622         c1 = self.create_count_txt()
1623         pdh = c1.portable_data_hash()
1624         self.assertEqual(type(''), type(pdh))
1625
1626
1627 if __name__ == '__main__':
1628     unittest.main()