Merge branch 'main' from workbench2.git
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import mock
11 import os
12 import random
13 import re
14 import sys
15 import datetime
16 import ciso8601
17 import time
18 import unittest
19 import parameterized
20
21 from . import run_test_server
22 from arvados._ranges import Range, LocatorAndRange
23 from arvados.collection import Collection, CollectionReader
24 from . import arvados_testutil as tutil
25 from .arvados_testutil import make_block_cache
26
27 class TestResumableWriter(arvados.ResumableCollectionWriter):
28     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
29
30     def current_state(self):
31         return self.dump_state(copy.deepcopy)
32
33 @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
34 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
35                              tutil.ArvadosBaseTestCase):
36     disk_cache = False
37     MAIN_SERVER = {}
38
39     @classmethod
40     def setUpClass(cls):
41         super(ArvadosCollectionsTest, cls).setUpClass()
42         # need admin privileges to make collections with unsigned blocks
43         run_test_server.authorize_with('admin')
44         cls.api_client = arvados.api('v1')
45         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
46                                              local_store=cls.local_store,
47                                              block_cache=make_block_cache(cls.disk_cache))
48
49     def write_foo_bar_baz(self):
50         cw = arvados.CollectionWriter(self.api_client)
51         self.assertEqual(cw.current_stream_name(), '.',
52                          'current_stream_name() should be "." now')
53         cw.set_current_file_name('foo.txt')
54         cw.write(b'foo')
55         self.assertEqual(cw.current_file_name(), 'foo.txt',
56                          'current_file_name() should be foo.txt now')
57         cw.start_new_file('bar.txt')
58         cw.write(b'bar')
59         cw.start_new_stream('baz')
60         cw.write(b'baz')
61         cw.set_current_file_name('baz.txt')
62         self.assertEqual(cw.manifest_text(),
63                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
64                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
65                          "wrong manifest: got {}".format(cw.manifest_text()))
66         cw.save_new()
67         return cw.portable_data_hash()
68
69     def test_pdh_is_native_str(self):
70         pdh = self.write_foo_bar_baz()
71         self.assertEqual(type(''), type(pdh))
72
73     def test_keep_local_store(self):
74         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
75         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
76
77     def test_local_collection_writer(self):
78         self.assertEqual(self.write_foo_bar_baz(),
79                          '23ca013983d6239e98931cc779e68426+114',
80                          'wrong locator hash: ' + self.write_foo_bar_baz())
81
82     def test_local_collection_reader(self):
83         foobarbaz = self.write_foo_bar_baz()
84         cr = arvados.CollectionReader(
85             foobarbaz + '+Xzizzle', self.api_client)
86         got = []
87         for s in cr.all_streams():
88             for f in s.all_files():
89                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
90         expected = [[3, '.', 'foo.txt', b'foo'],
91                     [3, '.', 'bar.txt', b'bar'],
92                     [3, './baz', 'baz.txt', b'baz']]
93         self.assertEqual(got,
94                          expected)
95         stream0 = cr.all_streams()[0]
96         self.assertEqual(stream0.readfrom(0, 0),
97                          b'',
98                          'reading zero bytes should have returned empty string')
99         self.assertEqual(stream0.readfrom(0, 2**26),
100                          b'foobar',
101                          'reading entire stream failed')
102         self.assertEqual(stream0.readfrom(2**26, 0),
103                          b'',
104                          'reading zero bytes should have returned empty string')
105         self.assertEqual(3, len(cr))
106         self.assertTrue(cr)
107
108     def _test_subset(self, collection, expected):
109         cr = arvados.CollectionReader(collection, self.api_client)
110         for s in cr.all_streams():
111             for ex in expected:
112                 if ex[0] == s:
113                     f = s.files()[ex[2]]
114                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
115                     self.assertEqual(got,
116                                      ex,
117                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
118
119     def test_collection_manifest_subset(self):
120         foobarbaz = self.write_foo_bar_baz()
121         self._test_subset(foobarbaz,
122                           [[3, '.',     'bar.txt', b'bar'],
123                            [3, '.',     'foo.txt', b'foo'],
124                            [3, './baz', 'baz.txt', b'baz']])
125         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
126                            (self.keep_client.put(b"foo"),
127                             self.keep_client.put(b"bar"))),
128                           [[3, '.', 'bar.txt', b'bar'],
129                            [3, '.', 'foo.txt', b'foo']])
130         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
131                            (self.keep_client.put(b"foo"),
132                             self.keep_client.put(b"bar"))),
133                           [[2, '.', 'fo.txt', b'fo'],
134                            [4, '.', 'obar.txt', b'obar']])
135         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
136                            (self.keep_client.put(b"foo"),
137                             self.keep_client.put(b"bar"))),
138                           [[2, '.', 'ar.txt', b'ar'],
139                            [2, '.', 'fo.txt', b'fo'],
140                            [2, '.', 'ob.txt', b'ob'],
141                            [0, '.', 'zero.txt', b'']])
142
143     def test_collection_empty_file(self):
144         cw = arvados.CollectionWriter(self.api_client)
145         cw.start_new_file('zero.txt')
146         cw.write(b'')
147
148         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
149         self.check_manifest_file_sizes(cw.manifest_text(), [0])
150         cw = arvados.CollectionWriter(self.api_client)
151         cw.start_new_file('zero.txt')
152         cw.write(b'')
153         cw.start_new_file('one.txt')
154         cw.write(b'1')
155         cw.start_new_stream('foo')
156         cw.start_new_file('zero.txt')
157         cw.write(b'')
158         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
159
160     def test_no_implicit_normalize(self):
161         cw = arvados.CollectionWriter(self.api_client)
162         cw.start_new_file('b')
163         cw.write(b'b')
164         cw.start_new_file('a')
165         cw.write(b'')
166         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
167         self.check_manifest_file_sizes(
168             arvados.CollectionReader(
169                 cw.manifest_text()).manifest_text(normalize=True),
170             [0,1])
171
172     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
173         cr = arvados.CollectionReader(manifest_text, self.api_client)
174         got_sizes = []
175         for f in cr.all_files():
176             got_sizes += [f.size()]
177         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
178
179     def test_normalized_collection(self):
180         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
181 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
182 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
183 """
184         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
185                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
186 """)
187
188         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
189 """
190         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
191
192         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
193 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
194 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
195 """
196         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
197                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
198 """)
199
200         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
201 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
202 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
203 """
204         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
205                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
206 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
207 """)
208
209         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
210 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
211 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
212 """
213         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
214                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
215 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
216 """)
217
218         with self.data_file('1000G_ref_manifest') as f6:
219             m6 = f6.read()
220             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
221
222         with self.data_file('jlake_manifest') as f7:
223             m7 = f7.read()
224             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
225
226         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
227 """
228         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
229
230     def test_locators_and_ranges(self):
231         blocks2 = [Range('a', 0, 10),
232                    Range('b', 10, 10),
233                    Range('c', 20, 10),
234                    Range('d', 30, 10),
235                    Range('e', 40, 10),
236                    Range('f', 50, 10)]
237
238         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
239         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
246
247         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
248         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
255
256         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
259         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
260         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
261         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
262
263
264         blocks3 = [Range('a', 0, 10),
265                   Range('b', 10, 10),
266                   Range('c', 20, 10),
267                   Range('d', 30, 10),
268                   Range('e', 40, 10),
269                   Range('f', 50, 10),
270                    Range('g', 60, 10)]
271
272         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
273         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
275         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
276         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
277         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
278         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
279
280
281         blocks = [Range('a', 0, 10),
282                   Range('b', 10, 15),
283                   Range('c', 25, 5)]
284         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
285         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
286         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
287         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
288
289         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
290                                                                       LocatorAndRange('b', 15, 0, 1)])
291         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
292                                                                       LocatorAndRange('b', 15, 0, 2)])
293         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
294                                                                       LocatorAndRange('b', 15, 0, 15)])
295
296         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
297                                                                       LocatorAndRange('b', 15, 0, 15),
298                                                                       LocatorAndRange('c', 5, 0, 5)])
299         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
300                                                                       LocatorAndRange('b', 15, 0, 15),
301                                                                       LocatorAndRange('c', 5, 0, 5)])
302         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
303                                                                       LocatorAndRange('b', 15, 0, 15),
304                                                                       LocatorAndRange('c', 5, 0, 5)])
305
306         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
307
308         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
309                                                                       LocatorAndRange('b', 15, 0, 15)])
310
311         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
312                                                                       LocatorAndRange('b', 15, 0, 15),
313                                                                       LocatorAndRange('c', 5, 0, 3)])
314
315         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
316
317         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
318                                                                       LocatorAndRange('b', 15, 0, 14)])
319         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
320         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
321                                                                        LocatorAndRange('c', 5, 0, 1)])
322
323     class MockKeep(object):
324         def __init__(self, content, num_retries=0):
325             self.content = content
326             self.num_prefetch_threads = 1
327
328         def get(self, locator, num_retries=0, prefetch=False):
329             return self.content[locator]
330
331     def test_stream_reader(self):
332         keepblocks = {
333             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
334             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
335             'cccccccccccccccccccccccccccccccc+5': b'z0123',
336         }
337         mk = self.MockKeep(keepblocks)
338
339         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
340
341         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
342
343         self.assertEqual(sr.readfrom(0, 30), content[0:30])
344         self.assertEqual(sr.readfrom(2, 30), content[2:30])
345
346         self.assertEqual(sr.readfrom(2, 8), content[2:10])
347         self.assertEqual(sr.readfrom(0, 10), content[0:10])
348
349         self.assertEqual(sr.readfrom(0, 5), content[0:5])
350         self.assertEqual(sr.readfrom(5, 5), content[5:10])
351         self.assertEqual(sr.readfrom(10, 5), content[10:15])
352         self.assertEqual(sr.readfrom(15, 5), content[15:20])
353         self.assertEqual(sr.readfrom(20, 5), content[20:25])
354         self.assertEqual(sr.readfrom(25, 5), content[25:30])
355         self.assertEqual(sr.readfrom(30, 5), b'')
356
357     def test_extract_file(self):
358         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
359 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
360 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
361 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
362 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
363 """
364
365         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
366
367         self.assertEqual(m2,
368                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
369         files = arvados.CollectionReader(
370             m2, self.api_client).all_streams()[0].files()
371
372         self.assertEqual(files['md5sum.txt'].as_manifest(),
373                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
374         self.assertEqual(files['md6sum.txt'].as_manifest(),
375                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
376         self.assertEqual(files['md7sum.txt'].as_manifest(),
377                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
378         self.assertEqual(files['md9sum.txt'].as_manifest(),
379                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
380
381     def test_write_directory_tree(self):
382         cwriter = arvados.CollectionWriter(self.api_client)
383         cwriter.write_directory_tree(self.build_directory_tree(
384                 ['basefile', 'subdir/subfile']))
385         self.assertEqual(cwriter.manifest_text(),
386                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
387 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
388
389     def test_write_named_directory_tree(self):
390         cwriter = arvados.CollectionWriter(self.api_client)
391         cwriter.write_directory_tree(self.build_directory_tree(
392                 ['basefile', 'subdir/subfile']), 'root')
393         self.assertEqual(
394             cwriter.manifest_text(),
395             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
396 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
397
398     def test_write_directory_tree_in_one_stream(self):
399         cwriter = arvados.CollectionWriter(self.api_client)
400         cwriter.write_directory_tree(self.build_directory_tree(
401                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
402         self.assertEqual(cwriter.manifest_text(),
403                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
404
405     def test_write_directory_tree_with_limited_recursion(self):
406         cwriter = arvados.CollectionWriter(self.api_client)
407         cwriter.write_directory_tree(
408             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
409             max_manifest_depth=1)
410         self.assertEqual(cwriter.manifest_text(),
411                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
412 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
413
414     def test_write_directory_tree_with_zero_recursion(self):
415         cwriter = arvados.CollectionWriter(self.api_client)
416         content = 'd1/d2/f3d1/f2f1'
417         blockhash = tutil.str_keep_locator(content)
418         cwriter.write_directory_tree(
419             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
420             max_manifest_depth=0)
421         self.assertEqual(
422             cwriter.manifest_text(),
423             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
424
425     def test_write_one_file(self):
426         cwriter = arvados.CollectionWriter(self.api_client)
427         with self.make_test_file() as testfile:
428             cwriter.write_file(testfile.name)
429             self.assertEqual(
430                 cwriter.manifest_text(),
431                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
432                     os.path.basename(testfile.name)))
433
434     def test_write_named_file(self):
435         cwriter = arvados.CollectionWriter(self.api_client)
436         with self.make_test_file() as testfile:
437             cwriter.write_file(testfile.name, 'foo')
438             self.assertEqual(cwriter.manifest_text(),
439                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
440
441     def test_write_multiple_files(self):
442         cwriter = arvados.CollectionWriter(self.api_client)
443         for letter in 'ABC':
444             with self.make_test_file(letter.encode()) as testfile:
445                 cwriter.write_file(testfile.name, letter)
446         self.assertEqual(
447             cwriter.manifest_text(),
448             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
449
450     def test_basic_resume(self):
451         cwriter = TestResumableWriter()
452         with self.make_test_file() as testfile:
453             cwriter.write_file(testfile.name, 'test')
454             resumed = TestResumableWriter.from_state(cwriter.current_state())
455         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
456                           "resumed CollectionWriter had different manifest")
457
458     def test_resume_fails_when_missing_dependency(self):
459         cwriter = TestResumableWriter()
460         with self.make_test_file() as testfile:
461             cwriter.write_file(testfile.name, 'test')
462         self.assertRaises(arvados.errors.StaleWriterStateError,
463                           TestResumableWriter.from_state,
464                           cwriter.current_state())
465
466     def test_resume_fails_when_dependency_mtime_changed(self):
467         cwriter = TestResumableWriter()
468         with self.make_test_file() as testfile:
469             cwriter.write_file(testfile.name, 'test')
470             os.utime(testfile.name, (0, 0))
471             self.assertRaises(arvados.errors.StaleWriterStateError,
472                               TestResumableWriter.from_state,
473                               cwriter.current_state())
474
475     def test_resume_fails_when_dependency_is_nonfile(self):
476         cwriter = TestResumableWriter()
477         cwriter.write_file('/dev/null', 'empty')
478         self.assertRaises(arvados.errors.StaleWriterStateError,
479                           TestResumableWriter.from_state,
480                           cwriter.current_state())
481
482     def test_resume_fails_when_dependency_size_changed(self):
483         cwriter = TestResumableWriter()
484         with self.make_test_file() as testfile:
485             cwriter.write_file(testfile.name, 'test')
486             orig_mtime = os.fstat(testfile.fileno()).st_mtime
487             testfile.write(b'extra')
488             testfile.flush()
489             os.utime(testfile.name, (orig_mtime, orig_mtime))
490             self.assertRaises(arvados.errors.StaleWriterStateError,
491                               TestResumableWriter.from_state,
492                               cwriter.current_state())
493
494     def test_resume_fails_with_expired_locator(self):
495         cwriter = TestResumableWriter()
496         state = cwriter.current_state()
497         # Add an expired locator to the state.
498         state['_current_stream_locators'].append(''.join([
499                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
500         self.assertRaises(arvados.errors.StaleWriterStateError,
501                           TestResumableWriter.from_state, state)
502
503     def test_arbitrary_objects_not_resumable(self):
504         cwriter = TestResumableWriter()
505         with open('/dev/null') as badfile:
506             self.assertRaises(arvados.errors.AssertionError,
507                               cwriter.write_file, badfile)
508
509     def test_arbitrary_writes_not_resumable(self):
510         cwriter = TestResumableWriter()
511         self.assertRaises(arvados.errors.AssertionError,
512                           cwriter.write, "badtext")
513
514
515 class CollectionTestMixin(tutil.ApiClientMock):
516     API_COLLECTIONS = run_test_server.fixture('collections')
517     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
518     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
519     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
520     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
521     ALT_COLLECTION = API_COLLECTIONS['bar_file']
522     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
523     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
524
525     def api_client_mock(self, status=200):
526         client = super(CollectionTestMixin, self).api_client_mock()
527         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
528         return client
529
530
531 @tutil.skip_sleep
532 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
533     def mock_get_collection(self, api_mock, code, fixturename):
534         body = self.API_COLLECTIONS.get(fixturename)
535         self._mock_api_call(api_mock.collections().get, code, body)
536
537     def api_client_mock(self, status=200):
538         client = super(CollectionReaderTestCase, self).api_client_mock()
539         self.mock_get_collection(client, status, 'foo_file')
540         return client
541
542     def test_init_default_retries(self):
543         client = self.api_client_mock(200)
544         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
545         reader.manifest_text()
546         client.collections().get().execute.assert_called_with(num_retries=10)
547
548     def test_uuid_init_success(self):
549         client = self.api_client_mock(200)
550         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
551                                           num_retries=3)
552         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
553                          reader.manifest_text())
554         client.collections().get().execute.assert_called_with(num_retries=3)
555
556     def test_uuid_init_failure_raises_api_error(self):
557         client = self.api_client_mock(500)
558         with self.assertRaises(arvados.errors.ApiError):
559             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
560
561     def test_locator_init(self):
562         client = self.api_client_mock(200)
563         # Ensure Keep will not return anything if asked.
564         with tutil.mock_keep_responses(None, 404):
565             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
566                                               api_client=client)
567             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
568
569     def test_init_no_fallback_to_keep(self):
570         # Do not look up a collection UUID or PDH in Keep.
571         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
572             client = self.api_client_mock(404)
573             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
574                 with self.assertRaises(arvados.errors.ApiError):
575                     reader = arvados.CollectionReader(key, api_client=client)
576
577     def test_init_num_retries_propagated(self):
578         # More of an integration test...
579         client = self.api_client_mock(200)
580         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
581                                           num_retries=3)
582         with tutil.mock_keep_responses('foo', 500, 500, 200):
583             self.assertEqual(b'foo',
584                              b''.join(f.read(9) for f in reader.all_files()))
585
586     def test_read_nonnormalized_manifest_with_collection_reader(self):
587         # client should be able to use CollectionReader on a manifest without normalizing it
588         client = self.api_client_mock(500)
589         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
590         reader = arvados.CollectionReader(
591             nonnormal,
592             api_client=client, num_retries=0)
593         # Ensure stripped_manifest() doesn't mangle our manifest in
594         # any way other than stripping hints.
595         self.assertEqual(
596             re.sub(r'\+[^\d\s\+]+', '', nonnormal),
597             reader.stripped_manifest())
598         # Ensure stripped_manifest() didn't mutate our reader.
599         self.assertEqual(nonnormal, reader.manifest_text())
600         # Ensure the files appear in the order given in the manifest.
601         self.assertEqual(
602             [[6, '.', 'foo.txt'],
603              [0, '.', 'bar.txt']],
604             [[f.size(), f.stream_name(), f.name()]
605              for f in reader.all_streams()[0].all_files()])
606
607     def test_read_empty_collection(self):
608         client = self.api_client_mock(200)
609         self.mock_get_collection(client, 200, 'empty')
610         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
611                                           api_client=client)
612         self.assertEqual('', reader.manifest_text())
613         self.assertEqual(0, len(reader))
614         self.assertFalse(reader)
615
616     def test_api_response(self):
617         client = self.api_client_mock()
618         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
619         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
620
621     def check_open_file(self, coll_file, stream_name, file_name, file_size):
622         self.assertFalse(coll_file.closed, "returned file is not open")
623         self.assertEqual(stream_name, coll_file.stream_name())
624         self.assertEqual(file_name, coll_file.name)
625         self.assertEqual(file_size, coll_file.size())
626
627     def test_open_collection_file_one_argument(self):
628         client = self.api_client_mock(200)
629         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
630         cfile = reader.open('./foo', 'rb')
631         self.check_open_file(cfile, '.', 'foo', 3)
632
633     def test_open_deep_file(self):
634         coll_name = 'collection_with_files_in_subdir'
635         client = self.api_client_mock(200)
636         self.mock_get_collection(client, 200, coll_name)
637         reader = arvados.CollectionReader(
638             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
639         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
640         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
641                              32)
642
643     def test_open_nonexistent_stream(self):
644         client = self.api_client_mock(200)
645         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
646         self.assertRaises(IOError, reader.open, './nonexistent/foo')
647
648     def test_open_nonexistent_file(self):
649         client = self.api_client_mock(200)
650         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
651         self.assertRaises(IOError, reader.open, 'nonexistent')
652
653
654 @tutil.skip_sleep
655 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
656     def mock_keep(self, body, *codes, **headers):
657         headers.setdefault('x-keep-replicas-stored', 2)
658         return tutil.mock_keep_responses(body, *codes, **headers)
659
660     def foo_writer(self, **kwargs):
661         kwargs.setdefault('api_client', self.api_client_mock())
662         writer = arvados.CollectionWriter(**kwargs)
663         writer.start_new_file('foo')
664         writer.write(b'foo')
665         return writer
666
667     def test_write_whole_collection(self):
668         writer = self.foo_writer()
669         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
670             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
671
672     def test_write_no_default(self):
673         writer = self.foo_writer()
674         with self.mock_keep(None, 500):
675             with self.assertRaises(arvados.errors.KeepWriteError):
676                 writer.finish()
677
678     def test_write_insufficient_replicas_via_proxy(self):
679         writer = self.foo_writer(replication=3)
680         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
681             with self.assertRaises(arvados.errors.KeepWriteError):
682                 writer.manifest_text()
683
684     def test_write_insufficient_replicas_via_disks(self):
685         client = mock.MagicMock(name='api_client')
686         with self.mock_keep(
687                 None, 200, 200,
688                 **{'x-keep-replicas-stored': 1}) as keepmock:
689             self.mock_keep_services(client, status=200, service_type='disk', count=2)
690             writer = self.foo_writer(api_client=client, replication=3)
691             with self.assertRaises(arvados.errors.KeepWriteError):
692                 writer.manifest_text()
693
694     def test_write_three_replicas(self):
695         client = mock.MagicMock(name='api_client')
696         with self.mock_keep(
697                 "", 500, 500, 500, 200, 200, 200,
698                 **{'x-keep-replicas-stored': 1}) as keepmock:
699             self.mock_keep_services(client, status=200, service_type='disk', count=6)
700             writer = self.foo_writer(api_client=client, replication=3)
701             writer.manifest_text()
702             self.assertEqual(6, keepmock.call_count)
703
704     def test_write_whole_collection_through_retries(self):
705         writer = self.foo_writer(num_retries=2)
706         with self.mock_keep(self.DEFAULT_DATA_HASH,
707                             500, 500, 200, 500, 500, 200):
708             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
709
710     def test_flush_data_retries(self):
711         writer = self.foo_writer(num_retries=2)
712         foo_hash = self.DEFAULT_MANIFEST.split()[1]
713         with self.mock_keep(foo_hash, 500, 200):
714             writer.flush_data()
715         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
716
717     def test_one_open(self):
718         client = self.api_client_mock()
719         writer = arvados.CollectionWriter(client)
720         with writer.open('out') as out_file:
721             self.assertEqual('.', writer.current_stream_name())
722             self.assertEqual('out', writer.current_file_name())
723             out_file.write(b'test data')
724             data_loc = tutil.str_keep_locator('test data')
725         self.assertTrue(out_file.closed, "writer file not closed after context")
726         self.assertRaises(ValueError, out_file.write, 'extra text')
727         with self.mock_keep(data_loc, 200) as keep_mock:
728             self.assertEqual(". {} 0:9:out\n".format(data_loc),
729                              writer.manifest_text())
730
731     def test_open_writelines(self):
732         client = self.api_client_mock()
733         writer = arvados.CollectionWriter(client)
734         with writer.open('six') as out_file:
735             out_file.writelines(['12', '34', '56'])
736             data_loc = tutil.str_keep_locator('123456')
737         with self.mock_keep(data_loc, 200) as keep_mock:
738             self.assertEqual(". {} 0:6:six\n".format(data_loc),
739                              writer.manifest_text())
740
741     def test_open_flush(self):
742         client = self.api_client_mock()
743         data_loc1 = tutil.str_keep_locator('flush1')
744         data_loc2 = tutil.str_keep_locator('flush2')
745         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
746             writer = arvados.CollectionWriter(client)
747             with writer.open('flush_test') as out_file:
748                 out_file.write(b'flush1')
749                 out_file.flush()
750                 out_file.write(b'flush2')
751             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
752                                                                 data_loc2),
753                              writer.manifest_text())
754
755     def test_two_opens_same_stream(self):
756         client = self.api_client_mock()
757         writer = arvados.CollectionWriter(client)
758         with writer.open('.', '1') as out_file:
759             out_file.write(b'1st')
760         with writer.open('.', '2') as out_file:
761             out_file.write(b'2nd')
762         data_loc = tutil.str_keep_locator('1st2nd')
763         with self.mock_keep(data_loc, 200) as keep_mock:
764             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
765                              writer.manifest_text())
766
767     def test_two_opens_two_streams(self):
768         client = self.api_client_mock()
769         data_loc1 = tutil.str_keep_locator('file')
770         data_loc2 = tutil.str_keep_locator('indir')
771         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
772             writer = arvados.CollectionWriter(client)
773             with writer.open('file') as out_file:
774                 out_file.write(b'file')
775             with writer.open('./dir', 'indir') as out_file:
776                 out_file.write(b'indir')
777             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
778                 data_loc1, data_loc2)
779             self.assertEqual(expected, writer.manifest_text())
780
781     def test_dup_open_fails(self):
782         client = self.api_client_mock()
783         writer = arvados.CollectionWriter(client)
784         file1 = writer.open('one')
785         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
786
787
788 class CollectionMethods(run_test_server.TestCaseWithServers):
789
790     def test_keys_values_items_support_indexing(self):
791         c = Collection()
792         with c.open('foo', 'wb') as f:
793             f.write(b'foo')
794         with c.open('bar', 'wb') as f:
795             f.write(b'bar')
796         self.assertEqual(2, len(c.keys()))
797         if sys.version_info < (3, 0):
798             # keys() supports indexing only for python2 callers.
799             fn0 = c.keys()[0]
800             fn1 = c.keys()[1]
801         else:
802             fn0, fn1 = c.keys()
803         self.assertEqual(2, len(c.values()))
804         f0 = c.values()[0]
805         f1 = c.values()[1]
806         self.assertEqual(2, len(c.items()))
807         self.assertEqual(fn0, c.items()[0][0])
808         self.assertEqual(fn1, c.items()[1][0])
809
810     def test_get_properties(self):
811         c = Collection()
812         self.assertEqual(c.get_properties(), {})
813         c.save_new(properties={"foo":"bar"})
814         self.assertEqual(c.get_properties(), {"foo":"bar"})
815
816     def test_get_trash_at(self):
817         c = Collection()
818         self.assertEqual(c.get_trash_at(), None)
819         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
820         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
821
822
823 class CollectionOpenModes(run_test_server.TestCaseWithServers):
824
825     def test_open_binary_modes(self):
826         c = Collection()
827         for mode in ['wb', 'wb+', 'ab', 'ab+']:
828             with c.open('foo', mode) as f:
829                 f.write(b'foo')
830
831     def test_open_invalid_modes(self):
832         c = Collection()
833         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
834             with self.assertRaises(Exception):
835                 c.open('foo', mode)
836
837     def test_open_text_modes(self):
838         c = Collection()
839         with c.open('foo', 'wb') as f:
840             f.write('foo')
841         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
842             with c.open('foo', mode) as f:
843                 if mode[0] == 'r' and '+' not in mode:
844                     self.assertEqual('foo', f.read(3))
845                 else:
846                     f.write('bar')
847                     f.seek(0, os.SEEK_SET)
848                     self.assertEqual('bar', f.read(3))
849
850
851 class TextModes(run_test_server.TestCaseWithServers):
852
853     def setUp(self):
854         arvados.config.KEEP_BLOCK_SIZE = 4
855         if sys.version_info < (3, 0):
856             import unicodedata
857             self.sailboat = unicodedata.lookup('SAILBOAT')
858             self.snowman = unicodedata.lookup('SNOWMAN')
859         else:
860             self.sailboat = '\N{SAILBOAT}'
861             self.snowman = '\N{SNOWMAN}'
862
863     def tearDown(self):
864         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
865
866     def test_read_sailboat_across_block_boundary(self):
867         c = Collection()
868         f = c.open('sailboats', 'wb')
869         data = self.sailboat.encode('utf-8')
870         f.write(data)
871         f.write(data[:1])
872         f.write(data[1:])
873         f.write(b'\n')
874         f.close()
875         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
876
877         f = c.open('sailboats', 'r')
878         string = f.readline()
879         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
880         f.close()
881
882     def test_write_snowman_across_block_boundary(self):
883         c = Collection()
884         f = c.open('snowmany', 'w')
885         data = self.snowman
886         f.write(data+data+'\n'+data+'\n')
887         f.close()
888         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
889
890         f = c.open('snowmany', 'r')
891         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
892         self.assertEqual(f.readline(), self.snowman+'\n')
893         f.close()
894
895
896 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
897
898     def test_replication_desired_kept_on_load(self):
899         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
900         c1 = Collection(m, replication_desired=1)
901         c1.save_new()
902         loc = c1.manifest_locator()
903         c2 = Collection(loc)
904         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
905         self.assertEqual(c1.replication_desired, c2.replication_desired)
906
907     def test_replication_desired_not_loaded_if_provided(self):
908         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
909         c1 = Collection(m, replication_desired=1)
910         c1.save_new()
911         loc = c1.manifest_locator()
912         c2 = Collection(loc, replication_desired=2)
913         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
914         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
915
916     def test_storage_classes_desired_kept_on_load(self):
917         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
918         c1 = Collection(m, storage_classes_desired=['archival'])
919         c1.save_new()
920         loc = c1.manifest_locator()
921         c2 = Collection(loc)
922         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
923         self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
924
925     def test_storage_classes_change_after_save(self):
926         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
927         c1 = Collection(m, storage_classes_desired=['archival'])
928         c1.save_new()
929         loc = c1.manifest_locator()
930         c2 = Collection(loc)
931         self.assertEqual(['archival'], c2.storage_classes_desired())
932         c2.save(storage_classes=['highIO'])
933         self.assertEqual(['highIO'], c2.storage_classes_desired())
934         c3 = Collection(loc)
935         self.assertEqual(c1.manifest_text(strip=True), c3.manifest_text(strip=True))
936         self.assertEqual(['highIO'], c3.storage_classes_desired())
937
938     def test_storage_classes_desired_not_loaded_if_provided(self):
939         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
940         c1 = Collection(m, storage_classes_desired=['archival'])
941         c1.save_new()
942         loc = c1.manifest_locator()
943         c2 = Collection(loc, storage_classes_desired=['default'])
944         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
945         self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
946
947     def test_init_manifest(self):
948         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
949 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
950 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
951 """
952         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
953         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
954
955     def test_init_manifest_with_collision(self):
956         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
957 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
958 """
959         with self.assertRaises(arvados.errors.ArgumentError):
960             self.assertEqual(m1, CollectionReader(m1))
961
962     def test_init_manifest_with_error(self):
963         m1 = """. 0:43:md5sum.txt"""
964         with self.assertRaises(arvados.errors.ArgumentError):
965             self.assertEqual(m1, CollectionReader(m1))
966
967     def test_remove(self):
968         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
969         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
970         self.assertIn("count1.txt", c)
971         c.remove("count1.txt")
972         self.assertNotIn("count1.txt", c)
973         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
974         with self.assertRaises(arvados.errors.ArgumentError):
975             c.remove("")
976
977     def test_remove_recursive(self):
978         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
979         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
980         self.assertIn("a", c)
981         self.assertEqual(1, len(c["a"].keys()))
982         # cannot remove non-empty directory with default recursive=False
983         with self.assertRaises(OSError):
984             c.remove("a/b")
985         with self.assertRaises(OSError):
986             c.remove("a/b/c/d")
987         c.remove("a/b", recursive=True)
988         self.assertEqual(0, len(c["a"].keys()))
989         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
990
991     def test_find(self):
992         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
993         self.assertIs(c.find("."), c)
994         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
995         self.assertIs(c.find("count1.txt"), c["count1.txt"])
996         with self.assertRaises(IOError):
997             c.find("/.")
998         with self.assertRaises(arvados.errors.ArgumentError):
999             c.find("")
1000         self.assertIs(c.find("./nonexistant.txt"), None)
1001         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
1002
1003     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
1004         # Dir & file names are literally '\056' (escaped form: \134056)
1005         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1006         c = Collection(manifest)
1007         self.assertEqual(c.portable_manifest_text(), manifest)
1008
1009     def test_other_special_chars_on_file_token(self):
1010         cases = [
1011             ('\\000', '\0'),
1012             ('\\011', '\t'),
1013             ('\\012', '\n'),
1014             ('\\072', ':'),
1015             ('\\134400', '\\400'),
1016         ]
1017         for encoded, decoded in cases:
1018             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
1019             c = Collection(manifest)
1020             self.assertEqual(c.portable_manifest_text(), manifest)
1021             self.assertIn('some%sfile.txt' % decoded, c.keys())
1022
1023     def test_escaped_paths_do_get_unescaped_on_listing(self):
1024         # Dir & file names are literally '\056' (escaped form: \134056)
1025         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1026         c = Collection(manifest)
1027         self.assertIn('\\056 Test', c.keys())
1028         self.assertIn('\\056', c['\\056 Test'].keys())
1029
1030     def test_make_empty_dir_with_escaped_chars(self):
1031         c = Collection()
1032         c.mkdirs('./Empty\\056Dir')
1033         self.assertEqual(c.portable_manifest_text(),
1034                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1035
1036     def test_make_empty_dir_with_spaces(self):
1037         c = Collection()
1038         c.mkdirs('./foo bar/baz waz')
1039         self.assertEqual(c.portable_manifest_text(),
1040                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1041
1042     def test_remove_in_subdir(self):
1043         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1044         c.remove("foo/count2.txt")
1045         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1046
1047     def test_remove_empty_subdir(self):
1048         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1049         c.remove("foo/count2.txt")
1050         c.remove("foo")
1051         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1052
1053     def test_remove_nonempty_subdir(self):
1054         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1055         with self.assertRaises(IOError):
1056             c.remove("foo")
1057         c.remove("foo", recursive=True)
1058         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1059
1060     def test_copy_to_file_in_dir(self):
1061         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1062         c.copy("count1.txt", "foo/count2.txt")
1063         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1064
1065     def test_copy_file(self):
1066         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1067         c.copy("count1.txt", "count2.txt")
1068         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1069
1070     def test_copy_to_existing_dir(self):
1071         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1072         c.copy("count1.txt", "foo")
1073         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1074
1075     def test_copy_to_new_dir(self):
1076         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1077         c.copy("count1.txt", "foo/")
1078         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1079
1080     def test_rename_file(self):
1081         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1082         c.rename("count1.txt", "count2.txt")
1083         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1084
1085     def test_move_file_to_dir(self):
1086         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1087         c.mkdirs("foo")
1088         c.rename("count1.txt", "foo/count2.txt")
1089         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1090
1091     def test_move_file_to_other(self):
1092         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1093         c2 = Collection()
1094         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1095         self.assertEqual("", c1.manifest_text())
1096         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1097
1098     def test_clone(self):
1099         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1100         cl = c.clone()
1101         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1102
1103     def test_diff_del_add(self):
1104         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1105         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1106         d = c2.diff(c1)
1107         self.assertEqual(sorted(d), [
1108             ('add', './count1.txt', c1["count1.txt"]),
1109             ('del', './count2.txt', c2["count2.txt"]),
1110         ])
1111         d = c1.diff(c2)
1112         self.assertEqual(sorted(d), [
1113             ('add', './count2.txt', c2["count2.txt"]),
1114             ('del', './count1.txt', c1["count1.txt"]),
1115         ])
1116         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1117         c1.apply(d)
1118         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1119
1120     def test_diff_same(self):
1121         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1122         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1123         d = c2.diff(c1)
1124         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1125         d = c1.diff(c2)
1126         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1127
1128         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1129         c1.apply(d)
1130         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1131
1132     def test_diff_mod(self):
1133         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1134         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1135         d = c2.diff(c1)
1136         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1137         d = c1.diff(c2)
1138         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1139
1140         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1141         c1.apply(d)
1142         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1143
1144     def test_diff_add(self):
1145         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1146         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1147         d = c2.diff(c1)
1148         self.assertEqual(sorted(d), [
1149             ('del', './count2.txt', c2["count2.txt"]),
1150             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1151         ])
1152         d = c1.diff(c2)
1153         self.assertEqual(sorted(d), [
1154             ('add', './count2.txt', c2["count2.txt"]),
1155             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1156         ])
1157
1158         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1159         c1.apply(d)
1160         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1161
1162     def test_diff_add_in_subcollection(self):
1163         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1164         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1165         d = c2.diff(c1)
1166         self.assertEqual(sorted(d), [
1167             ('del', './foo', c2["foo"]),
1168             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1169         ])
1170         d = c1.diff(c2)
1171         self.assertEqual(sorted(d), [
1172             ('add', './foo', c2["foo"]),
1173             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1174         ])
1175         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1176         c1.apply(d)
1177         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1178
1179     def test_diff_del_add_in_subcollection(self):
1180         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1181         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1182         d = c2.diff(c1)
1183         self.assertEqual(sorted(d), [
1184             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1185             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1186             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1187         ])
1188         d = c1.diff(c2)
1189         self.assertEqual(sorted(d), [
1190             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1191             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1192             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1193         ])
1194
1195         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1196         c1.apply(d)
1197         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1198
1199     def test_diff_mod_in_subcollection(self):
1200         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1201         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1202         d = c2.diff(c1)
1203         self.assertEqual(sorted(d), [
1204             ('mod', './foo', c2["foo"], c1["foo"]),
1205             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1206         ])
1207         d = c1.diff(c2)
1208         self.assertEqual(sorted(d), [
1209             ('mod', './foo', c1["foo"], c2["foo"]),
1210             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1211         ])
1212
1213         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1214         c1.apply(d)
1215         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1216
1217     def test_conflict_keep_local_change(self):
1218         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1219         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1220         d = c1.diff(c2)
1221         self.assertEqual(sorted(d), [
1222             ('add', './count2.txt', c2["count2.txt"]),
1223             ('del', './count1.txt', c1["count1.txt"]),
1224         ])
1225         f = c1.open("count1.txt", "wb")
1226         f.write(b"zzzzz")
1227
1228         # c1 changed, so it should not be deleted.
1229         c1.apply(d)
1230         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1231
1232     def test_conflict_mod(self):
1233         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1234         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1235         d = c1.diff(c2)
1236         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1237         f = c1.open("count1.txt", "wb")
1238         f.write(b"zzzzz")
1239
1240         # c1 changed, so c2 mod will go to a conflict file
1241         c1.apply(d)
1242         self.assertRegex(
1243             c1.portable_manifest_text(),
1244             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1245
1246     def test_conflict_add(self):
1247         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1248         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1249         d = c1.diff(c2)
1250         self.assertEqual(sorted(d), [
1251             ('add', './count1.txt', c2["count1.txt"]),
1252             ('del', './count2.txt', c1["count2.txt"]),
1253         ])
1254         f = c1.open("count1.txt", "wb")
1255         f.write(b"zzzzz")
1256
1257         # c1 added count1.txt, so c2 add will go to a conflict file
1258         c1.apply(d)
1259         self.assertRegex(
1260             c1.portable_manifest_text(),
1261             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1262
1263     def test_conflict_del(self):
1264         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1265         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1266         d = c1.diff(c2)
1267         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1268         c1.remove("count1.txt")
1269
1270         # c1 deleted, so c2 mod will go to a conflict file
1271         c1.apply(d)
1272         self.assertRegex(
1273             c1.portable_manifest_text(),
1274             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1275
1276     def test_notify(self):
1277         c1 = Collection()
1278         events = []
1279         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1280         f = c1.open("foo.txt", "wb")
1281         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1282
1283     def test_open_w(self):
1284         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1285         self.assertEqual(c1["count1.txt"].size(), 10)
1286         c1.open("count1.txt", "wb").close()
1287         self.assertEqual(c1["count1.txt"].size(), 0)
1288
1289
1290 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1291     MAIN_SERVER = {}
1292     KEEP_SERVER = {}
1293     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1294     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1295
1296     def setUp(self):
1297         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1298
1299     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1300     def test_storage_classes_desired(self, put_mock):
1301         put_mock.side_effect = self.keep_put
1302         c = Collection(storage_classes_desired=['default'])
1303         with c.open("file.txt", 'wb') as f:
1304             f.write('content')
1305         c.save_new()
1306         _, kwargs = put_mock.call_args
1307         self.assertEqual(['default'], kwargs['classes'])
1308
1309     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1310     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1311         '''
1312         Make sure that those blocks that are committed after repacking small ones,
1313         get their permission tokens assigned on the collection manifest.
1314         '''
1315         def wrapped_keep_put(*args, **kwargs):
1316             # Simulate slow put operations
1317             time.sleep(1)
1318             return self.keep_put(*args, **kwargs)
1319
1320         mocked_put.side_effect = wrapped_keep_put
1321         c = Collection()
1322         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1323         # small ones before finishing the upload.
1324         for i in range(70):
1325             f = c.open("file_{}.txt".format(i), 'wb')
1326             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1327             f.close(flush=False)
1328         # We should get 2 blocks with their tokens
1329         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1330
1331     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1332     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1333         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1334         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1335         rs_mock.return_value = local_block_loc
1336         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1337         self.assertEqual(
1338             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1339         self.assertEqual(
1340             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1341         c.save_new()
1342         rs_mock.assert_called()
1343         self.assertEqual(
1344             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1345         self.assertEqual(
1346             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1347
1348     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1349     def test_copy_remote_blocks_on_save(self, rs_mock):
1350         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1351         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1352         rs_mock.return_value = local_block_loc
1353         # Remote collection
1354         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1355         self.assertEqual(
1356             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1357         # Local collection
1358         local_c = Collection()
1359         with local_c.open('barfile.txt', 'wb') as f:
1360             f.write('bar')
1361         local_c.save_new()
1362         self.assertEqual(
1363             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1364         self.assertEqual(
1365             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1366         # Copy remote file to local collection
1367         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1368         self.assertEqual(
1369             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1370         self.assertEqual(
1371             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1372         # Save local collection: remote block should be copied
1373         local_c.save()
1374         rs_mock.assert_called()
1375         self.assertEqual(
1376             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1377         self.assertEqual(
1378             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1379
1380
1381 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1382     def test_preserve_version_on_save(self):
1383         c = Collection()
1384         c.save_new(preserve_version=True)
1385         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1386         self.assertEqual(coll_record['version'], 1)
1387         self.assertEqual(coll_record['preserve_version'], True)
1388         with c.open("foo.txt", "wb") as foo:
1389             foo.write(b"foo")
1390         c.save(preserve_version=True)
1391         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1392         self.assertEqual(coll_record['version'], 2)
1393         self.assertEqual(coll_record['preserve_version'], True)
1394         with c.open("bar.txt", "wb") as foo:
1395             foo.write(b"bar")
1396         c.save(preserve_version=False)
1397         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1398         self.assertEqual(coll_record['version'], 3)
1399         self.assertEqual(coll_record['preserve_version'], False)
1400
1401     def test_get_manifest_text_only_committed(self):
1402         c = Collection()
1403         with c.open("count.txt", "wb") as f:
1404             # One file committed
1405             with c.open("foo.txt", "wb") as foo:
1406                 foo.write(b"foo")
1407                 foo.flush() # Force block commit
1408             f.write(b"0123456789")
1409             # Other file not committed. Block not written to keep yet.
1410             self.assertEqual(
1411                 c._get_manifest_text(".",
1412                                      strip=False,
1413                                      normalize=False,
1414                                      only_committed=True),
1415                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1416             # And now with the file closed...
1417             f.flush() # Force block commit
1418         self.assertEqual(
1419             c._get_manifest_text(".",
1420                                  strip=False,
1421                                  normalize=False,
1422                                  only_committed=True),
1423             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1424
1425     def test_only_small_blocks_are_packed_together(self):
1426         c = Collection()
1427         # Write a couple of small files,
1428         f = c.open("count.txt", "wb")
1429         f.write(b"0123456789")
1430         f.close(flush=False)
1431         foo = c.open("foo.txt", "wb")
1432         foo.write(b"foo")
1433         foo.close(flush=False)
1434         # Then, write a big file, it shouldn't be packed with the ones above
1435         big = c.open("bigfile.txt", "wb")
1436         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1437         big.close(flush=False)
1438         self.assertEqual(
1439             c.manifest_text("."),
1440             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1441
1442     def test_flush_after_small_block_packing(self):
1443         c = Collection()
1444         # Write a couple of small files,
1445         f = c.open("count.txt", "wb")
1446         f.write(b"0123456789")
1447         f.close(flush=False)
1448         foo = c.open("foo.txt", "wb")
1449         foo.write(b"foo")
1450         foo.close(flush=False)
1451
1452         self.assertEqual(
1453             c.manifest_text(),
1454             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1455
1456         f = c.open("count.txt", "rb+")
1457         f.close(flush=True)
1458
1459         self.assertEqual(
1460             c.manifest_text(),
1461             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1462
1463     def test_write_after_small_block_packing2(self):
1464         c = Collection()
1465         # Write a couple of small files,
1466         f = c.open("count.txt", "wb")
1467         f.write(b"0123456789")
1468         f.close(flush=False)
1469         foo = c.open("foo.txt", "wb")
1470         foo.write(b"foo")
1471         foo.close(flush=False)
1472
1473         self.assertEqual(
1474             c.manifest_text(),
1475             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1476
1477         f = c.open("count.txt", "rb+")
1478         f.write(b"abc")
1479         f.close(flush=False)
1480
1481         self.assertEqual(
1482             c.manifest_text(),
1483             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1484
1485
1486     def test_small_block_packing_with_overwrite(self):
1487         c = Collection()
1488         c.open("b1", "wb").close()
1489         c["b1"].writeto(0, b"b1", 0)
1490
1491         c.open("b2", "wb").close()
1492         c["b2"].writeto(0, b"b2", 0)
1493
1494         c["b1"].writeto(0, b"1b", 0)
1495
1496         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1497         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1498         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1499
1500
1501 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1502     MAIN_SERVER = {}
1503     KEEP_SERVER = {}
1504
1505     def create_count_txt(self):
1506         # Create an empty collection, save it to the API server, then write a
1507         # file, but don't save it.
1508
1509         c = Collection()
1510         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1511         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1512         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1513
1514         with c.open("count.txt", "wb") as f:
1515             f.write(b"0123456789")
1516
1517         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1518
1519         return c
1520
1521     def test_create_and_save(self):
1522         c = self.create_count_txt()
1523         c.save(properties={'type' : 'Intermediate'},
1524                storage_classes=['archive'],
1525                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1526
1527         self.assertRegex(
1528             c.manifest_text(),
1529             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1530         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1531         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1532         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1533
1534
1535     def test_create_and_save_new(self):
1536         c = self.create_count_txt()
1537         c.save_new(properties={'type' : 'Intermediate'},
1538                    storage_classes=['archive'],
1539                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1540
1541         self.assertRegex(
1542             c.manifest_text(),
1543             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1544         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1545         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1546         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1547
1548     def test_create_and_save_after_commiting(self):
1549         c = self.create_count_txt()
1550         c.save(properties={'type' : 'Intermediate'},
1551                storage_classes=['hot'],
1552                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1553         c.save(properties={'type' : 'Output'},
1554                storage_classes=['cold'],
1555                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1556
1557         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1558         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1559         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1560
1561     def test_create_diff_apply(self):
1562         c1 = self.create_count_txt()
1563         c1.save()
1564
1565         c2 = Collection(c1.manifest_locator())
1566         with c2.open("count.txt", "wb") as f:
1567             f.write(b"abcdefg")
1568
1569         diff = c1.diff(c2)
1570
1571         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1572
1573         c1.apply(diff)
1574         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1575
1576     def test_diff_apply_with_token(self):
1577         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1578         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1579         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1580
1581         diff = baseline.diff(other)
1582         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1583
1584         c.apply(diff)
1585
1586         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1587
1588
1589     def test_create_and_update(self):
1590         c1 = self.create_count_txt()
1591         c1.save()
1592
1593         c2 = arvados.collection.Collection(c1.manifest_locator())
1594         with c2.open("count.txt", "wb") as f:
1595             f.write(b"abcdefg")
1596
1597         c2.save()
1598
1599         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1600         c1.update()
1601         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1602
1603
1604     def test_create_and_update_with_conflict(self):
1605         c1 = self.create_count_txt()
1606         c1.save()
1607
1608         with c1.open("count.txt", "wb") as f:
1609             f.write(b"XYZ")
1610
1611         c2 = arvados.collection.Collection(c1.manifest_locator())
1612         with c2.open("count.txt", "wb") as f:
1613             f.write(b"abcdefg")
1614
1615         c2.save()
1616
1617         c1.update()
1618         self.assertRegex(
1619             c1.manifest_text(),
1620             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1621
1622     def test_pdh_is_native_str(self):
1623         c1 = self.create_count_txt()
1624         pdh = c1.portable_data_hash()
1625         self.assertEqual(type(''), type(pdh))
1626
1627
1628 if __name__ == '__main__':
1629     unittest.main()