21721: Remove ruamel.yaml dependency
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6
7 from builtins import object
8 import arvados
9 import copy
10 import os
11 import random
12 import re
13 import sys
14 import datetime
15 import ciso8601
16 import time
17 import unittest
18 import parameterized
19
20 from unittest import mock
21
22 from . import run_test_server
23 from arvados._ranges import Range, LocatorAndRange
24 from arvados.collection import Collection, CollectionReader
25 from . import arvados_testutil as tutil
26 from .arvados_testutil import make_block_cache
27
28 class TestResumableWriter(arvados.ResumableCollectionWriter):
29     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
30
31     def current_state(self):
32         return self.dump_state(copy.deepcopy)
33
34 @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
35 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
36                              tutil.ArvadosBaseTestCase):
37     disk_cache = False
38     MAIN_SERVER = {}
39
40     @classmethod
41     def setUpClass(cls):
42         super(ArvadosCollectionsTest, cls).setUpClass()
43         # need admin privileges to make collections with unsigned blocks
44         run_test_server.authorize_with('admin')
45         cls.api_client = arvados.api('v1')
46         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
47                                              local_store=cls.local_store,
48                                              block_cache=make_block_cache(cls.disk_cache))
49
50     def write_foo_bar_baz(self):
51         cw = arvados.CollectionWriter(self.api_client)
52         self.assertEqual(cw.current_stream_name(), '.',
53                          'current_stream_name() should be "." now')
54         cw.set_current_file_name('foo.txt')
55         cw.write(b'foo')
56         self.assertEqual(cw.current_file_name(), 'foo.txt',
57                          'current_file_name() should be foo.txt now')
58         cw.start_new_file('bar.txt')
59         cw.write(b'bar')
60         cw.start_new_stream('baz')
61         cw.write(b'baz')
62         cw.set_current_file_name('baz.txt')
63         self.assertEqual(cw.manifest_text(),
64                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
65                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
66                          "wrong manifest: got {}".format(cw.manifest_text()))
67         cw.save_new()
68         return cw.portable_data_hash()
69
70     def test_pdh_is_native_str(self):
71         pdh = self.write_foo_bar_baz()
72         self.assertEqual(type(''), type(pdh))
73
74     def test_keep_local_store(self):
75         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
76         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
77
78     def test_local_collection_writer(self):
79         self.assertEqual(self.write_foo_bar_baz(),
80                          '23ca013983d6239e98931cc779e68426+114',
81                          'wrong locator hash: ' + self.write_foo_bar_baz())
82
83     def test_local_collection_reader(self):
84         foobarbaz = self.write_foo_bar_baz()
85         cr = arvados.CollectionReader(
86             foobarbaz + '+Xzizzle', self.api_client)
87         got = []
88         for s in cr.all_streams():
89             for f in s.all_files():
90                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
91         expected = [[3, '.', 'foo.txt', b'foo'],
92                     [3, '.', 'bar.txt', b'bar'],
93                     [3, './baz', 'baz.txt', b'baz']]
94         self.assertEqual(got,
95                          expected)
96         stream0 = cr.all_streams()[0]
97         self.assertEqual(stream0.readfrom(0, 0),
98                          b'',
99                          'reading zero bytes should have returned empty string')
100         self.assertEqual(stream0.readfrom(0, 2**26),
101                          b'foobar',
102                          'reading entire stream failed')
103         self.assertEqual(stream0.readfrom(2**26, 0),
104                          b'',
105                          'reading zero bytes should have returned empty string')
106         self.assertEqual(3, len(cr))
107         self.assertTrue(cr)
108
109     def _test_subset(self, collection, expected):
110         cr = arvados.CollectionReader(collection, self.api_client)
111         for s in cr.all_streams():
112             for ex in expected:
113                 if ex[0] == s:
114                     f = s.files()[ex[2]]
115                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
116                     self.assertEqual(got,
117                                      ex,
118                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
119
120     def test_collection_manifest_subset(self):
121         foobarbaz = self.write_foo_bar_baz()
122         self._test_subset(foobarbaz,
123                           [[3, '.',     'bar.txt', b'bar'],
124                            [3, '.',     'foo.txt', b'foo'],
125                            [3, './baz', 'baz.txt', b'baz']])
126         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
127                            (self.keep_client.put(b"foo"),
128                             self.keep_client.put(b"bar"))),
129                           [[3, '.', 'bar.txt', b'bar'],
130                            [3, '.', 'foo.txt', b'foo']])
131         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
132                            (self.keep_client.put(b"foo"),
133                             self.keep_client.put(b"bar"))),
134                           [[2, '.', 'fo.txt', b'fo'],
135                            [4, '.', 'obar.txt', b'obar']])
136         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
137                            (self.keep_client.put(b"foo"),
138                             self.keep_client.put(b"bar"))),
139                           [[2, '.', 'ar.txt', b'ar'],
140                            [2, '.', 'fo.txt', b'fo'],
141                            [2, '.', 'ob.txt', b'ob'],
142                            [0, '.', 'zero.txt', b'']])
143
144     def test_collection_empty_file(self):
145         cw = arvados.CollectionWriter(self.api_client)
146         cw.start_new_file('zero.txt')
147         cw.write(b'')
148
149         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
150         self.check_manifest_file_sizes(cw.manifest_text(), [0])
151         cw = arvados.CollectionWriter(self.api_client)
152         cw.start_new_file('zero.txt')
153         cw.write(b'')
154         cw.start_new_file('one.txt')
155         cw.write(b'1')
156         cw.start_new_stream('foo')
157         cw.start_new_file('zero.txt')
158         cw.write(b'')
159         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
160
161     def test_no_implicit_normalize(self):
162         cw = arvados.CollectionWriter(self.api_client)
163         cw.start_new_file('b')
164         cw.write(b'b')
165         cw.start_new_file('a')
166         cw.write(b'')
167         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
168         self.check_manifest_file_sizes(
169             arvados.CollectionReader(
170                 cw.manifest_text()).manifest_text(normalize=True),
171             [0,1])
172
173     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
174         cr = arvados.CollectionReader(manifest_text, self.api_client)
175         got_sizes = []
176         for f in cr.all_files():
177             got_sizes += [f.size()]
178         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
179
180     def test_normalized_collection(self):
181         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
182 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
183 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
184 """
185         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
186                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
187 """)
188
189         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
190 """
191         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
192
193         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
194 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
195 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
196 """
197         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
198                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
199 """)
200
201         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
202 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
203 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
204 """
205         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
206                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
207 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
208 """)
209
210         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
211 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
212 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
213 """
214         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
215                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
216 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
217 """)
218
219         with self.data_file('1000G_ref_manifest') as f6:
220             m6 = f6.read()
221             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
222
223         with self.data_file('jlake_manifest') as f7:
224             m7 = f7.read()
225             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
226
227         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
228 """
229         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
230
231     def test_locators_and_ranges(self):
232         blocks2 = [Range('a', 0, 10),
233                    Range('b', 10, 10),
234                    Range('c', 20, 10),
235                    Range('d', 30, 10),
236                    Range('e', 40, 10),
237                    Range('f', 50, 10)]
238
239         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
240         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
241         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
242         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
243         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
244         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
245         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
246         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
247
248         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
249         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
250         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
251         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
252         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
253         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
256
257         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
259         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
260         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
261         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
262         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
263
264
265         blocks3 = [Range('a', 0, 10),
266                   Range('b', 10, 10),
267                   Range('c', 20, 10),
268                   Range('d', 30, 10),
269                   Range('e', 40, 10),
270                   Range('f', 50, 10),
271                    Range('g', 60, 10)]
272
273         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
274         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
275         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
276         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
277         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
278         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
279         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
280
281
282         blocks = [Range('a', 0, 10),
283                   Range('b', 10, 15),
284                   Range('c', 25, 5)]
285         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
286         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
287         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
288         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
289
290         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
291                                                                       LocatorAndRange('b', 15, 0, 1)])
292         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
293                                                                       LocatorAndRange('b', 15, 0, 2)])
294         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
295                                                                       LocatorAndRange('b', 15, 0, 15)])
296
297         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
298                                                                       LocatorAndRange('b', 15, 0, 15),
299                                                                       LocatorAndRange('c', 5, 0, 5)])
300         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
301                                                                       LocatorAndRange('b', 15, 0, 15),
302                                                                       LocatorAndRange('c', 5, 0, 5)])
303         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
304                                                                       LocatorAndRange('b', 15, 0, 15),
305                                                                       LocatorAndRange('c', 5, 0, 5)])
306
307         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
308
309         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
310                                                                       LocatorAndRange('b', 15, 0, 15)])
311
312         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
313                                                                       LocatorAndRange('b', 15, 0, 15),
314                                                                       LocatorAndRange('c', 5, 0, 3)])
315
316         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
317
318         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
319                                                                       LocatorAndRange('b', 15, 0, 14)])
320         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
321         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
322                                                                        LocatorAndRange('c', 5, 0, 1)])
323
324     class MockKeep(object):
325         def __init__(self, content, num_retries=0):
326             self.content = content
327             self.num_prefetch_threads = 1
328
329         def get(self, locator, num_retries=0, prefetch=False):
330             return self.content[locator]
331
332     def test_stream_reader(self):
333         keepblocks = {
334             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
335             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
336             'cccccccccccccccccccccccccccccccc+5': b'z0123',
337         }
338         mk = self.MockKeep(keepblocks)
339
340         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
341
342         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
343
344         self.assertEqual(sr.readfrom(0, 30), content[0:30])
345         self.assertEqual(sr.readfrom(2, 30), content[2:30])
346
347         self.assertEqual(sr.readfrom(2, 8), content[2:10])
348         self.assertEqual(sr.readfrom(0, 10), content[0:10])
349
350         self.assertEqual(sr.readfrom(0, 5), content[0:5])
351         self.assertEqual(sr.readfrom(5, 5), content[5:10])
352         self.assertEqual(sr.readfrom(10, 5), content[10:15])
353         self.assertEqual(sr.readfrom(15, 5), content[15:20])
354         self.assertEqual(sr.readfrom(20, 5), content[20:25])
355         self.assertEqual(sr.readfrom(25, 5), content[25:30])
356         self.assertEqual(sr.readfrom(30, 5), b'')
357
358     def test_extract_file(self):
359         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
360 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
361 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
362 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
363 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
364 """
365
366         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
367
368         self.assertEqual(m2,
369                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
370         files = arvados.CollectionReader(
371             m2, self.api_client).all_streams()[0].files()
372
373         self.assertEqual(files['md5sum.txt'].as_manifest(),
374                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
375         self.assertEqual(files['md6sum.txt'].as_manifest(),
376                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
377         self.assertEqual(files['md7sum.txt'].as_manifest(),
378                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
379         self.assertEqual(files['md9sum.txt'].as_manifest(),
380                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
381
382     def test_write_directory_tree(self):
383         cwriter = arvados.CollectionWriter(self.api_client)
384         cwriter.write_directory_tree(self.build_directory_tree(
385                 ['basefile', 'subdir/subfile']))
386         self.assertEqual(cwriter.manifest_text(),
387                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
388 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
389
390     def test_write_named_directory_tree(self):
391         cwriter = arvados.CollectionWriter(self.api_client)
392         cwriter.write_directory_tree(self.build_directory_tree(
393                 ['basefile', 'subdir/subfile']), 'root')
394         self.assertEqual(
395             cwriter.manifest_text(),
396             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
397 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
398
399     def test_write_directory_tree_in_one_stream(self):
400         cwriter = arvados.CollectionWriter(self.api_client)
401         cwriter.write_directory_tree(self.build_directory_tree(
402                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
403         self.assertEqual(cwriter.manifest_text(),
404                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
405
406     def test_write_directory_tree_with_limited_recursion(self):
407         cwriter = arvados.CollectionWriter(self.api_client)
408         cwriter.write_directory_tree(
409             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
410             max_manifest_depth=1)
411         self.assertEqual(cwriter.manifest_text(),
412                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
413 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
414
415     def test_write_directory_tree_with_zero_recursion(self):
416         cwriter = arvados.CollectionWriter(self.api_client)
417         content = 'd1/d2/f3d1/f2f1'
418         blockhash = tutil.str_keep_locator(content)
419         cwriter.write_directory_tree(
420             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
421             max_manifest_depth=0)
422         self.assertEqual(
423             cwriter.manifest_text(),
424             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
425
426     def test_write_one_file(self):
427         cwriter = arvados.CollectionWriter(self.api_client)
428         with self.make_test_file() as testfile:
429             cwriter.write_file(testfile.name)
430             self.assertEqual(
431                 cwriter.manifest_text(),
432                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
433                     os.path.basename(testfile.name)))
434
435     def test_write_named_file(self):
436         cwriter = arvados.CollectionWriter(self.api_client)
437         with self.make_test_file() as testfile:
438             cwriter.write_file(testfile.name, 'foo')
439             self.assertEqual(cwriter.manifest_text(),
440                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
441
442     def test_write_multiple_files(self):
443         cwriter = arvados.CollectionWriter(self.api_client)
444         for letter in 'ABC':
445             with self.make_test_file(letter.encode()) as testfile:
446                 cwriter.write_file(testfile.name, letter)
447         self.assertEqual(
448             cwriter.manifest_text(),
449             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
450
451     def test_basic_resume(self):
452         cwriter = TestResumableWriter()
453         with self.make_test_file() as testfile:
454             cwriter.write_file(testfile.name, 'test')
455             resumed = TestResumableWriter.from_state(cwriter.current_state())
456         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
457                           "resumed CollectionWriter had different manifest")
458
459     def test_resume_fails_when_missing_dependency(self):
460         cwriter = TestResumableWriter()
461         with self.make_test_file() as testfile:
462             cwriter.write_file(testfile.name, 'test')
463         self.assertRaises(arvados.errors.StaleWriterStateError,
464                           TestResumableWriter.from_state,
465                           cwriter.current_state())
466
467     def test_resume_fails_when_dependency_mtime_changed(self):
468         cwriter = TestResumableWriter()
469         with self.make_test_file() as testfile:
470             cwriter.write_file(testfile.name, 'test')
471             os.utime(testfile.name, (0, 0))
472             self.assertRaises(arvados.errors.StaleWriterStateError,
473                               TestResumableWriter.from_state,
474                               cwriter.current_state())
475
476     def test_resume_fails_when_dependency_is_nonfile(self):
477         cwriter = TestResumableWriter()
478         cwriter.write_file('/dev/null', 'empty')
479         self.assertRaises(arvados.errors.StaleWriterStateError,
480                           TestResumableWriter.from_state,
481                           cwriter.current_state())
482
483     def test_resume_fails_when_dependency_size_changed(self):
484         cwriter = TestResumableWriter()
485         with self.make_test_file() as testfile:
486             cwriter.write_file(testfile.name, 'test')
487             orig_mtime = os.fstat(testfile.fileno()).st_mtime
488             testfile.write(b'extra')
489             testfile.flush()
490             os.utime(testfile.name, (orig_mtime, orig_mtime))
491             self.assertRaises(arvados.errors.StaleWriterStateError,
492                               TestResumableWriter.from_state,
493                               cwriter.current_state())
494
495     def test_resume_fails_with_expired_locator(self):
496         cwriter = TestResumableWriter()
497         state = cwriter.current_state()
498         # Add an expired locator to the state.
499         state['_current_stream_locators'].append(''.join([
500                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
501         self.assertRaises(arvados.errors.StaleWriterStateError,
502                           TestResumableWriter.from_state, state)
503
504     def test_arbitrary_objects_not_resumable(self):
505         cwriter = TestResumableWriter()
506         with open('/dev/null') as badfile:
507             self.assertRaises(arvados.errors.AssertionError,
508                               cwriter.write_file, badfile)
509
510     def test_arbitrary_writes_not_resumable(self):
511         cwriter = TestResumableWriter()
512         self.assertRaises(arvados.errors.AssertionError,
513                           cwriter.write, "badtext")
514
515
516 class CollectionTestMixin(tutil.ApiClientMock):
517     API_COLLECTIONS = run_test_server.fixture('collections')
518     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
519     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
520     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
521     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
522     ALT_COLLECTION = API_COLLECTIONS['bar_file']
523     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
524     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
525
526     def api_client_mock(self, status=200):
527         client = super(CollectionTestMixin, self).api_client_mock()
528         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
529         return client
530
531
532 @tutil.skip_sleep
533 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
534     def mock_get_collection(self, api_mock, code, fixturename):
535         body = self.API_COLLECTIONS.get(fixturename)
536         self._mock_api_call(api_mock.collections().get, code, body)
537
538     def api_client_mock(self, status=200):
539         client = super(CollectionReaderTestCase, self).api_client_mock()
540         self.mock_get_collection(client, status, 'foo_file')
541         return client
542
543     def test_init_default_retries(self):
544         client = self.api_client_mock(200)
545         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
546         reader.manifest_text()
547         client.collections().get().execute.assert_called_with(num_retries=10)
548
549     def test_uuid_init_success(self):
550         client = self.api_client_mock(200)
551         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
552                                           num_retries=3)
553         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
554                          reader.manifest_text())
555         client.collections().get().execute.assert_called_with(num_retries=3)
556
557     def test_uuid_init_failure_raises_api_error(self):
558         client = self.api_client_mock(500)
559         with self.assertRaises(arvados.errors.ApiError):
560             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
561
562     def test_locator_init(self):
563         client = self.api_client_mock(200)
564         # Ensure Keep will not return anything if asked.
565         with tutil.mock_keep_responses(None, 404):
566             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
567                                               api_client=client)
568             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
569
570     def test_init_no_fallback_to_keep(self):
571         # Do not look up a collection UUID or PDH in Keep.
572         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
573             client = self.api_client_mock(404)
574             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
575                 with self.assertRaises(arvados.errors.ApiError):
576                     reader = arvados.CollectionReader(key, api_client=client)
577
578     def test_init_num_retries_propagated(self):
579         # More of an integration test...
580         client = self.api_client_mock(200)
581         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
582                                           num_retries=3)
583         with tutil.mock_keep_responses('foo', 500, 500, 200):
584             self.assertEqual(b'foo',
585                              b''.join(f.read(9) for f in reader.all_files()))
586
587     def test_read_nonnormalized_manifest_with_collection_reader(self):
588         # client should be able to use CollectionReader on a manifest without normalizing it
589         client = self.api_client_mock(500)
590         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
591         reader = arvados.CollectionReader(
592             nonnormal,
593             api_client=client, num_retries=0)
594         # Ensure stripped_manifest() doesn't mangle our manifest in
595         # any way other than stripping hints.
596         self.assertEqual(
597             re.sub(r'\+[^\d\s\+]+', '', nonnormal),
598             reader.stripped_manifest())
599         # Ensure stripped_manifest() didn't mutate our reader.
600         self.assertEqual(nonnormal, reader.manifest_text())
601         # Ensure the files appear in the order given in the manifest.
602         self.assertEqual(
603             [[6, '.', 'foo.txt'],
604              [0, '.', 'bar.txt']],
605             [[f.size(), f.stream_name(), f.name()]
606              for f in reader.all_streams()[0].all_files()])
607
608     def test_read_empty_collection(self):
609         client = self.api_client_mock(200)
610         self.mock_get_collection(client, 200, 'empty')
611         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
612                                           api_client=client)
613         self.assertEqual('', reader.manifest_text())
614         self.assertEqual(0, len(reader))
615         self.assertFalse(reader)
616
617     def test_api_response(self):
618         client = self.api_client_mock()
619         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
620         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
621
622     def check_open_file(self, coll_file, stream_name, file_name, file_size):
623         self.assertFalse(coll_file.closed, "returned file is not open")
624         self.assertEqual(stream_name, coll_file.stream_name())
625         self.assertEqual(file_name, coll_file.name)
626         self.assertEqual(file_size, coll_file.size())
627
628     def test_open_collection_file_one_argument(self):
629         client = self.api_client_mock(200)
630         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
631         cfile = reader.open('./foo', 'rb')
632         self.check_open_file(cfile, '.', 'foo', 3)
633
634     def test_open_deep_file(self):
635         coll_name = 'collection_with_files_in_subdir'
636         client = self.api_client_mock(200)
637         self.mock_get_collection(client, 200, coll_name)
638         reader = arvados.CollectionReader(
639             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
640         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
641         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
642                              32)
643
644     def test_open_nonexistent_stream(self):
645         client = self.api_client_mock(200)
646         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
647         self.assertRaises(IOError, reader.open, './nonexistent/foo')
648
649     def test_open_nonexistent_file(self):
650         client = self.api_client_mock(200)
651         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
652         self.assertRaises(IOError, reader.open, 'nonexistent')
653
654
655 @tutil.skip_sleep
656 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
657     def mock_keep(self, body, *codes, **headers):
658         headers.setdefault('x-keep-replicas-stored', 2)
659         return tutil.mock_keep_responses(body, *codes, **headers)
660
661     def foo_writer(self, **kwargs):
662         kwargs.setdefault('api_client', self.api_client_mock())
663         writer = arvados.CollectionWriter(**kwargs)
664         writer.start_new_file('foo')
665         writer.write(b'foo')
666         return writer
667
668     def test_write_whole_collection(self):
669         writer = self.foo_writer()
670         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
671             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
672
673     def test_write_no_default(self):
674         writer = self.foo_writer()
675         with self.mock_keep(None, 500):
676             with self.assertRaises(arvados.errors.KeepWriteError):
677                 writer.finish()
678
679     def test_write_insufficient_replicas_via_proxy(self):
680         writer = self.foo_writer(replication=3)
681         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
682             with self.assertRaises(arvados.errors.KeepWriteError):
683                 writer.manifest_text()
684
685     def test_write_insufficient_replicas_via_disks(self):
686         client = mock.MagicMock(name='api_client')
687         with self.mock_keep(
688                 None, 200, 200,
689                 **{'x-keep-replicas-stored': 1}) as keepmock:
690             self.mock_keep_services(client, status=200, service_type='disk', count=2)
691             writer = self.foo_writer(api_client=client, replication=3)
692             with self.assertRaises(arvados.errors.KeepWriteError):
693                 writer.manifest_text()
694
695     def test_write_three_replicas(self):
696         client = mock.MagicMock(name='api_client')
697         with self.mock_keep(
698                 "", 500, 500, 500, 200, 200, 200,
699                 **{'x-keep-replicas-stored': 1}) as keepmock:
700             self.mock_keep_services(client, status=200, service_type='disk', count=6)
701             writer = self.foo_writer(api_client=client, replication=3)
702             writer.manifest_text()
703             self.assertEqual(6, keepmock.call_count)
704
705     def test_write_whole_collection_through_retries(self):
706         writer = self.foo_writer(num_retries=2)
707         with self.mock_keep(self.DEFAULT_DATA_HASH,
708                             500, 500, 200, 500, 500, 200):
709             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
710
711     def test_flush_data_retries(self):
712         writer = self.foo_writer(num_retries=2)
713         foo_hash = self.DEFAULT_MANIFEST.split()[1]
714         with self.mock_keep(foo_hash, 500, 200):
715             writer.flush_data()
716         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
717
718     def test_one_open(self):
719         client = self.api_client_mock()
720         writer = arvados.CollectionWriter(client)
721         with writer.open('out') as out_file:
722             self.assertEqual('.', writer.current_stream_name())
723             self.assertEqual('out', writer.current_file_name())
724             out_file.write(b'test data')
725             data_loc = tutil.str_keep_locator('test data')
726         self.assertTrue(out_file.closed, "writer file not closed after context")
727         self.assertRaises(ValueError, out_file.write, 'extra text')
728         with self.mock_keep(data_loc, 200) as keep_mock:
729             self.assertEqual(". {} 0:9:out\n".format(data_loc),
730                              writer.manifest_text())
731
732     def test_open_writelines(self):
733         client = self.api_client_mock()
734         writer = arvados.CollectionWriter(client)
735         with writer.open('six') as out_file:
736             out_file.writelines(['12', '34', '56'])
737             data_loc = tutil.str_keep_locator('123456')
738         with self.mock_keep(data_loc, 200) as keep_mock:
739             self.assertEqual(". {} 0:6:six\n".format(data_loc),
740                              writer.manifest_text())
741
742     def test_open_flush(self):
743         client = self.api_client_mock()
744         data_loc1 = tutil.str_keep_locator('flush1')
745         data_loc2 = tutil.str_keep_locator('flush2')
746         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
747             writer = arvados.CollectionWriter(client)
748             with writer.open('flush_test') as out_file:
749                 out_file.write(b'flush1')
750                 out_file.flush()
751                 out_file.write(b'flush2')
752             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
753                                                                 data_loc2),
754                              writer.manifest_text())
755
756     def test_two_opens_same_stream(self):
757         client = self.api_client_mock()
758         writer = arvados.CollectionWriter(client)
759         with writer.open('.', '1') as out_file:
760             out_file.write(b'1st')
761         with writer.open('.', '2') as out_file:
762             out_file.write(b'2nd')
763         data_loc = tutil.str_keep_locator('1st2nd')
764         with self.mock_keep(data_loc, 200) as keep_mock:
765             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
766                              writer.manifest_text())
767
768     def test_two_opens_two_streams(self):
769         client = self.api_client_mock()
770         data_loc1 = tutil.str_keep_locator('file')
771         data_loc2 = tutil.str_keep_locator('indir')
772         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
773             writer = arvados.CollectionWriter(client)
774             with writer.open('file') as out_file:
775                 out_file.write(b'file')
776             with writer.open('./dir', 'indir') as out_file:
777                 out_file.write(b'indir')
778             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
779                 data_loc1, data_loc2)
780             self.assertEqual(expected, writer.manifest_text())
781
782     def test_dup_open_fails(self):
783         client = self.api_client_mock()
784         writer = arvados.CollectionWriter(client)
785         file1 = writer.open('one')
786         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
787
788
789 class CollectionMethods(run_test_server.TestCaseWithServers):
790
791     def test_keys_values_items_support_indexing(self):
792         c = Collection()
793         with c.open('foo', 'wb') as f:
794             f.write(b'foo')
795         with c.open('bar', 'wb') as f:
796             f.write(b'bar')
797         self.assertEqual(2, len(c.keys()))
798         if sys.version_info < (3, 0):
799             # keys() supports indexing only for python2 callers.
800             fn0 = c.keys()[0]
801             fn1 = c.keys()[1]
802         else:
803             fn0, fn1 = c.keys()
804         self.assertEqual(2, len(c.values()))
805         f0 = c.values()[0]
806         f1 = c.values()[1]
807         self.assertEqual(2, len(c.items()))
808         self.assertEqual(fn0, c.items()[0][0])
809         self.assertEqual(fn1, c.items()[1][0])
810
811     def test_get_properties(self):
812         c = Collection()
813         self.assertEqual(c.get_properties(), {})
814         c.save_new(properties={"foo":"bar"})
815         self.assertEqual(c.get_properties(), {"foo":"bar"})
816
817     def test_get_trash_at(self):
818         c = Collection()
819         self.assertEqual(c.get_trash_at(), None)
820         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
821         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
822
823
824 class CollectionOpenModes(run_test_server.TestCaseWithServers):
825
826     def test_open_binary_modes(self):
827         c = Collection()
828         for mode in ['wb', 'wb+', 'ab', 'ab+']:
829             with c.open('foo', mode) as f:
830                 f.write(b'foo')
831
832     def test_open_invalid_modes(self):
833         c = Collection()
834         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
835             with self.assertRaises(Exception):
836                 c.open('foo', mode)
837
838     def test_open_text_modes(self):
839         c = Collection()
840         with c.open('foo', 'wb') as f:
841             f.write('foo')
842         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
843             with c.open('foo', mode) as f:
844                 if mode[0] == 'r' and '+' not in mode:
845                     self.assertEqual('foo', f.read(3))
846                 else:
847                     f.write('bar')
848                     f.seek(0, os.SEEK_SET)
849                     self.assertEqual('bar', f.read(3))
850
851
852 class TextModes(run_test_server.TestCaseWithServers):
853
854     def setUp(self):
855         arvados.config.KEEP_BLOCK_SIZE = 4
856         if sys.version_info < (3, 0):
857             import unicodedata
858             self.sailboat = unicodedata.lookup('SAILBOAT')
859             self.snowman = unicodedata.lookup('SNOWMAN')
860         else:
861             self.sailboat = '\N{SAILBOAT}'
862             self.snowman = '\N{SNOWMAN}'
863
864     def tearDown(self):
865         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
866
867     def test_read_sailboat_across_block_boundary(self):
868         c = Collection()
869         f = c.open('sailboats', 'wb')
870         data = self.sailboat.encode('utf-8')
871         f.write(data)
872         f.write(data[:1])
873         f.write(data[1:])
874         f.write(b'\n')
875         f.close()
876         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
877
878         f = c.open('sailboats', 'r')
879         string = f.readline()
880         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
881         f.close()
882
883     def test_write_snowman_across_block_boundary(self):
884         c = Collection()
885         f = c.open('snowmany', 'w')
886         data = self.snowman
887         f.write(data+data+'\n'+data+'\n')
888         f.close()
889         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
890
891         f = c.open('snowmany', 'r')
892         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
893         self.assertEqual(f.readline(), self.snowman+'\n')
894         f.close()
895
896
897 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
898
899     def test_replication_desired_kept_on_load(self):
900         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
901         c1 = Collection(m, replication_desired=1)
902         c1.save_new()
903         loc = c1.manifest_locator()
904         c2 = Collection(loc)
905         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
906         self.assertEqual(c1.replication_desired, c2.replication_desired)
907
908     def test_replication_desired_not_loaded_if_provided(self):
909         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
910         c1 = Collection(m, replication_desired=1)
911         c1.save_new()
912         loc = c1.manifest_locator()
913         c2 = Collection(loc, replication_desired=2)
914         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
915         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
916
917     def test_storage_classes_desired_kept_on_load(self):
918         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
919         c1 = Collection(m, storage_classes_desired=['archival'])
920         c1.save_new()
921         loc = c1.manifest_locator()
922         c2 = Collection(loc)
923         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
924         self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
925
926     def test_storage_classes_change_after_save(self):
927         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
928         c1 = Collection(m, storage_classes_desired=['archival'])
929         c1.save_new()
930         loc = c1.manifest_locator()
931         c2 = Collection(loc)
932         self.assertEqual(['archival'], c2.storage_classes_desired())
933         c2.save(storage_classes=['highIO'])
934         self.assertEqual(['highIO'], c2.storage_classes_desired())
935         c3 = Collection(loc)
936         self.assertEqual(c1.manifest_text(strip=True), c3.manifest_text(strip=True))
937         self.assertEqual(['highIO'], c3.storage_classes_desired())
938
939     def test_storage_classes_desired_not_loaded_if_provided(self):
940         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
941         c1 = Collection(m, storage_classes_desired=['archival'])
942         c1.save_new()
943         loc = c1.manifest_locator()
944         c2 = Collection(loc, storage_classes_desired=['default'])
945         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
946         self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
947
948     def test_init_manifest(self):
949         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
950 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
951 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
952 """
953         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
954         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
955
956     def test_init_manifest_with_collision(self):
957         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
958 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
959 """
960         with self.assertRaises(arvados.errors.ArgumentError):
961             self.assertEqual(m1, CollectionReader(m1))
962
963     def test_init_manifest_with_error(self):
964         m1 = """. 0:43:md5sum.txt"""
965         with self.assertRaises(arvados.errors.ArgumentError):
966             self.assertEqual(m1, CollectionReader(m1))
967
968     def test_remove(self):
969         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
970         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
971         self.assertIn("count1.txt", c)
972         c.remove("count1.txt")
973         self.assertNotIn("count1.txt", c)
974         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
975         with self.assertRaises(arvados.errors.ArgumentError):
976             c.remove("")
977
978     def test_remove_recursive(self):
979         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
980         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
981         self.assertIn("a", c)
982         self.assertEqual(1, len(c["a"].keys()))
983         # cannot remove non-empty directory with default recursive=False
984         with self.assertRaises(OSError):
985             c.remove("a/b")
986         with self.assertRaises(OSError):
987             c.remove("a/b/c/d")
988         c.remove("a/b", recursive=True)
989         self.assertEqual(0, len(c["a"].keys()))
990         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
991
992     def test_find(self):
993         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
994         self.assertIs(c.find("."), c)
995         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
996         self.assertIs(c.find("count1.txt"), c["count1.txt"])
997         with self.assertRaises(IOError):
998             c.find("/.")
999         with self.assertRaises(arvados.errors.ArgumentError):
1000             c.find("")
1001         self.assertIs(c.find("./nonexistant.txt"), None)
1002         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
1003
1004     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
1005         # Dir & file names are literally '\056' (escaped form: \134056)
1006         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1007         c = Collection(manifest)
1008         self.assertEqual(c.portable_manifest_text(), manifest)
1009
1010     def test_other_special_chars_on_file_token(self):
1011         cases = [
1012             ('\\000', '\0'),
1013             ('\\011', '\t'),
1014             ('\\012', '\n'),
1015             ('\\072', ':'),
1016             ('\\134400', '\\400'),
1017         ]
1018         for encoded, decoded in cases:
1019             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
1020             c = Collection(manifest)
1021             self.assertEqual(c.portable_manifest_text(), manifest)
1022             self.assertIn('some%sfile.txt' % decoded, c.keys())
1023
1024     def test_escaped_paths_do_get_unescaped_on_listing(self):
1025         # Dir & file names are literally '\056' (escaped form: \134056)
1026         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1027         c = Collection(manifest)
1028         self.assertIn('\\056 Test', c.keys())
1029         self.assertIn('\\056', c['\\056 Test'].keys())
1030
1031     def test_make_empty_dir_with_escaped_chars(self):
1032         c = Collection()
1033         c.mkdirs('./Empty\\056Dir')
1034         self.assertEqual(c.portable_manifest_text(),
1035                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1036
1037     def test_make_empty_dir_with_spaces(self):
1038         c = Collection()
1039         c.mkdirs('./foo bar/baz waz')
1040         self.assertEqual(c.portable_manifest_text(),
1041                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1042
1043     def test_remove_in_subdir(self):
1044         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1045         c.remove("foo/count2.txt")
1046         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1047
1048     def test_remove_empty_subdir(self):
1049         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1050         c.remove("foo/count2.txt")
1051         c.remove("foo")
1052         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1053
1054     def test_remove_nonempty_subdir(self):
1055         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1056         with self.assertRaises(IOError):
1057             c.remove("foo")
1058         c.remove("foo", recursive=True)
1059         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1060
1061     def test_copy_to_file_in_dir(self):
1062         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1063         c.copy("count1.txt", "foo/count2.txt")
1064         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1065
1066     def test_copy_file(self):
1067         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1068         c.copy("count1.txt", "count2.txt")
1069         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1070
1071     def test_copy_to_existing_dir(self):
1072         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1073         c.copy("count1.txt", "foo")
1074         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1075
1076     def test_copy_to_new_dir(self):
1077         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1078         c.copy("count1.txt", "foo/")
1079         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1080
1081     def test_rename_file(self):
1082         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1083         c.rename("count1.txt", "count2.txt")
1084         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1085
1086     def test_move_file_to_dir(self):
1087         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1088         c.mkdirs("foo")
1089         c.rename("count1.txt", "foo/count2.txt")
1090         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1091
1092     def test_move_file_to_other(self):
1093         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1094         c2 = Collection()
1095         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1096         self.assertEqual("", c1.manifest_text())
1097         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1098
1099     def test_clone(self):
1100         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1101         cl = c.clone()
1102         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1103
1104     def test_diff_del_add(self):
1105         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1106         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1107         d = c2.diff(c1)
1108         self.assertEqual(sorted(d), [
1109             ('add', './count1.txt', c1["count1.txt"]),
1110             ('del', './count2.txt', c2["count2.txt"]),
1111         ])
1112         d = c1.diff(c2)
1113         self.assertEqual(sorted(d), [
1114             ('add', './count2.txt', c2["count2.txt"]),
1115             ('del', './count1.txt', c1["count1.txt"]),
1116         ])
1117         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1118         c1.apply(d)
1119         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1120
1121     def test_diff_same(self):
1122         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1123         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1124         d = c2.diff(c1)
1125         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1126         d = c1.diff(c2)
1127         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1128
1129         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1130         c1.apply(d)
1131         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1132
1133     def test_diff_mod(self):
1134         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1135         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1136         d = c2.diff(c1)
1137         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1138         d = c1.diff(c2)
1139         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1140
1141         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1142         c1.apply(d)
1143         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1144
1145     def test_diff_add(self):
1146         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1147         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1148         d = c2.diff(c1)
1149         self.assertEqual(sorted(d), [
1150             ('del', './count2.txt', c2["count2.txt"]),
1151             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1152         ])
1153         d = c1.diff(c2)
1154         self.assertEqual(sorted(d), [
1155             ('add', './count2.txt', c2["count2.txt"]),
1156             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1157         ])
1158
1159         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1160         c1.apply(d)
1161         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1162
1163     def test_diff_add_in_subcollection(self):
1164         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1165         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1166         d = c2.diff(c1)
1167         self.assertEqual(sorted(d), [
1168             ('del', './foo', c2["foo"]),
1169             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1170         ])
1171         d = c1.diff(c2)
1172         self.assertEqual(sorted(d), [
1173             ('add', './foo', c2["foo"]),
1174             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1175         ])
1176         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1177         c1.apply(d)
1178         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1179
1180     def test_diff_del_add_in_subcollection(self):
1181         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1182         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1183         d = c2.diff(c1)
1184         self.assertEqual(sorted(d), [
1185             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1186             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1187             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1188         ])
1189         d = c1.diff(c2)
1190         self.assertEqual(sorted(d), [
1191             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1192             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1193             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1194         ])
1195
1196         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1197         c1.apply(d)
1198         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1199
1200     def test_diff_mod_in_subcollection(self):
1201         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1202         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1203         d = c2.diff(c1)
1204         self.assertEqual(sorted(d), [
1205             ('mod', './foo', c2["foo"], c1["foo"]),
1206             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1207         ])
1208         d = c1.diff(c2)
1209         self.assertEqual(sorted(d), [
1210             ('mod', './foo', c1["foo"], c2["foo"]),
1211             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1212         ])
1213
1214         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1215         c1.apply(d)
1216         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1217
1218     def test_conflict_keep_local_change(self):
1219         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1220         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1221         d = c1.diff(c2)
1222         self.assertEqual(sorted(d), [
1223             ('add', './count2.txt', c2["count2.txt"]),
1224             ('del', './count1.txt', c1["count1.txt"]),
1225         ])
1226         f = c1.open("count1.txt", "wb")
1227         f.write(b"zzzzz")
1228
1229         # c1 changed, so it should not be deleted.
1230         c1.apply(d)
1231         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1232
1233     def test_conflict_mod(self):
1234         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1235         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1236         d = c1.diff(c2)
1237         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1238         f = c1.open("count1.txt", "wb")
1239         f.write(b"zzzzz")
1240
1241         # c1 changed, so c2 mod will go to a conflict file
1242         c1.apply(d)
1243         self.assertRegex(
1244             c1.portable_manifest_text(),
1245             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1246
1247     def test_conflict_add(self):
1248         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1249         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1250         d = c1.diff(c2)
1251         self.assertEqual(sorted(d), [
1252             ('add', './count1.txt', c2["count1.txt"]),
1253             ('del', './count2.txt', c1["count2.txt"]),
1254         ])
1255         f = c1.open("count1.txt", "wb")
1256         f.write(b"zzzzz")
1257
1258         # c1 added count1.txt, so c2 add will go to a conflict file
1259         c1.apply(d)
1260         self.assertRegex(
1261             c1.portable_manifest_text(),
1262             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1263
1264     def test_conflict_del(self):
1265         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1266         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1267         d = c1.diff(c2)
1268         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1269         c1.remove("count1.txt")
1270
1271         # c1 deleted, so c2 mod will go to a conflict file
1272         c1.apply(d)
1273         self.assertRegex(
1274             c1.portable_manifest_text(),
1275             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1276
1277     def test_notify(self):
1278         c1 = Collection()
1279         events = []
1280         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1281         f = c1.open("foo.txt", "wb")
1282         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1283
1284     def test_open_w(self):
1285         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1286         self.assertEqual(c1["count1.txt"].size(), 10)
1287         c1.open("count1.txt", "wb").close()
1288         self.assertEqual(c1["count1.txt"].size(), 0)
1289
1290
1291 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1292     MAIN_SERVER = {}
1293     KEEP_SERVER = {}
1294     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1295     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1296
1297     def setUp(self):
1298         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1299
1300     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1301     def test_storage_classes_desired(self, put_mock):
1302         put_mock.side_effect = self.keep_put
1303         c = Collection(storage_classes_desired=['default'])
1304         with c.open("file.txt", 'wb') as f:
1305             f.write('content')
1306         c.save_new()
1307         _, kwargs = put_mock.call_args
1308         self.assertEqual(['default'], kwargs['classes'])
1309
1310     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1311     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1312         '''
1313         Make sure that those blocks that are committed after repacking small ones,
1314         get their permission tokens assigned on the collection manifest.
1315         '''
1316         def wrapped_keep_put(*args, **kwargs):
1317             # Simulate slow put operations
1318             time.sleep(1)
1319             return self.keep_put(*args, **kwargs)
1320
1321         mocked_put.side_effect = wrapped_keep_put
1322         c = Collection()
1323         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1324         # small ones before finishing the upload.
1325         for i in range(70):
1326             f = c.open("file_{}.txt".format(i), 'wb')
1327             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1328             f.close(flush=False)
1329         # We should get 2 blocks with their tokens
1330         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1331
1332     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1333     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1334         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1335         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1336         rs_mock.return_value = local_block_loc
1337         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1338         self.assertEqual(
1339             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1340         self.assertEqual(
1341             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1342         c.save_new()
1343         rs_mock.assert_called()
1344         self.assertEqual(
1345             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1346         self.assertEqual(
1347             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1348
1349     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1350     def test_copy_remote_blocks_on_save(self, rs_mock):
1351         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1352         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1353         rs_mock.return_value = local_block_loc
1354         # Remote collection
1355         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1356         self.assertEqual(
1357             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1358         # Local collection
1359         local_c = Collection()
1360         with local_c.open('barfile.txt', 'wb') as f:
1361             f.write('bar')
1362         local_c.save_new()
1363         self.assertEqual(
1364             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1365         self.assertEqual(
1366             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1367         # Copy remote file to local collection
1368         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1369         self.assertEqual(
1370             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1371         self.assertEqual(
1372             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1373         # Save local collection: remote block should be copied
1374         local_c.save()
1375         rs_mock.assert_called()
1376         self.assertEqual(
1377             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1378         self.assertEqual(
1379             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1380
1381
1382 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1383     def test_preserve_version_on_save(self):
1384         c = Collection()
1385         c.save_new(preserve_version=True)
1386         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1387         self.assertEqual(coll_record['version'], 1)
1388         self.assertEqual(coll_record['preserve_version'], True)
1389         with c.open("foo.txt", "wb") as foo:
1390             foo.write(b"foo")
1391         c.save(preserve_version=True)
1392         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1393         self.assertEqual(coll_record['version'], 2)
1394         self.assertEqual(coll_record['preserve_version'], True)
1395         with c.open("bar.txt", "wb") as foo:
1396             foo.write(b"bar")
1397         c.save(preserve_version=False)
1398         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1399         self.assertEqual(coll_record['version'], 3)
1400         self.assertEqual(coll_record['preserve_version'], False)
1401
1402     def test_get_manifest_text_only_committed(self):
1403         c = Collection()
1404         with c.open("count.txt", "wb") as f:
1405             # One file committed
1406             with c.open("foo.txt", "wb") as foo:
1407                 foo.write(b"foo")
1408                 foo.flush() # Force block commit
1409             f.write(b"0123456789")
1410             # Other file not committed. Block not written to keep yet.
1411             self.assertEqual(
1412                 c._get_manifest_text(".",
1413                                      strip=False,
1414                                      normalize=False,
1415                                      only_committed=True),
1416                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1417             # And now with the file closed...
1418             f.flush() # Force block commit
1419         self.assertEqual(
1420             c._get_manifest_text(".",
1421                                  strip=False,
1422                                  normalize=False,
1423                                  only_committed=True),
1424             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1425
1426     def test_only_small_blocks_are_packed_together(self):
1427         c = Collection()
1428         # Write a couple of small files,
1429         f = c.open("count.txt", "wb")
1430         f.write(b"0123456789")
1431         f.close(flush=False)
1432         foo = c.open("foo.txt", "wb")
1433         foo.write(b"foo")
1434         foo.close(flush=False)
1435         # Then, write a big file, it shouldn't be packed with the ones above
1436         big = c.open("bigfile.txt", "wb")
1437         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1438         big.close(flush=False)
1439         self.assertEqual(
1440             c.manifest_text("."),
1441             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1442
1443     def test_flush_after_small_block_packing(self):
1444         c = Collection()
1445         # Write a couple of small files,
1446         f = c.open("count.txt", "wb")
1447         f.write(b"0123456789")
1448         f.close(flush=False)
1449         foo = c.open("foo.txt", "wb")
1450         foo.write(b"foo")
1451         foo.close(flush=False)
1452
1453         self.assertEqual(
1454             c.manifest_text(),
1455             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1456
1457         f = c.open("count.txt", "rb+")
1458         f.close(flush=True)
1459
1460         self.assertEqual(
1461             c.manifest_text(),
1462             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1463
1464     def test_write_after_small_block_packing2(self):
1465         c = Collection()
1466         # Write a couple of small files,
1467         f = c.open("count.txt", "wb")
1468         f.write(b"0123456789")
1469         f.close(flush=False)
1470         foo = c.open("foo.txt", "wb")
1471         foo.write(b"foo")
1472         foo.close(flush=False)
1473
1474         self.assertEqual(
1475             c.manifest_text(),
1476             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1477
1478         f = c.open("count.txt", "rb+")
1479         f.write(b"abc")
1480         f.close(flush=False)
1481
1482         self.assertEqual(
1483             c.manifest_text(),
1484             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1485
1486
1487     def test_small_block_packing_with_overwrite(self):
1488         c = Collection()
1489         c.open("b1", "wb").close()
1490         c["b1"].writeto(0, b"b1", 0)
1491
1492         c.open("b2", "wb").close()
1493         c["b2"].writeto(0, b"b2", 0)
1494
1495         c["b1"].writeto(0, b"1b", 0)
1496
1497         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1498         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1499         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1500
1501
1502 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1503     MAIN_SERVER = {}
1504     KEEP_SERVER = {}
1505
1506     def create_count_txt(self):
1507         # Create an empty collection, save it to the API server, then write a
1508         # file, but don't save it.
1509
1510         c = Collection()
1511         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1512         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1513         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1514
1515         with c.open("count.txt", "wb") as f:
1516             f.write(b"0123456789")
1517
1518         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1519
1520         return c
1521
1522     def test_create_and_save(self):
1523         c = self.create_count_txt()
1524         c.save(properties={'type' : 'Intermediate'},
1525                storage_classes=['archive'],
1526                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1527
1528         self.assertRegex(
1529             c.manifest_text(),
1530             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1531         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1532         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1533         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1534
1535
1536     def test_create_and_save_new(self):
1537         c = self.create_count_txt()
1538         c.save_new(properties={'type' : 'Intermediate'},
1539                    storage_classes=['archive'],
1540                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1541
1542         self.assertRegex(
1543             c.manifest_text(),
1544             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1545         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1546         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1547         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1548
1549     def test_create_and_save_after_commiting(self):
1550         c = self.create_count_txt()
1551         c.save(properties={'type' : 'Intermediate'},
1552                storage_classes=['hot'],
1553                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1554         c.save(properties={'type' : 'Output'},
1555                storage_classes=['cold'],
1556                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1557
1558         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1559         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1560         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1561
1562     def test_create_diff_apply(self):
1563         c1 = self.create_count_txt()
1564         c1.save()
1565
1566         c2 = Collection(c1.manifest_locator())
1567         with c2.open("count.txt", "wb") as f:
1568             f.write(b"abcdefg")
1569
1570         diff = c1.diff(c2)
1571
1572         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1573
1574         c1.apply(diff)
1575         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1576
1577     def test_diff_apply_with_token(self):
1578         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1579         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1580         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1581
1582         diff = baseline.diff(other)
1583         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1584
1585         c.apply(diff)
1586
1587         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1588
1589
1590     def test_create_and_update(self):
1591         c1 = self.create_count_txt()
1592         c1.save()
1593
1594         c2 = arvados.collection.Collection(c1.manifest_locator())
1595         with c2.open("count.txt", "wb") as f:
1596             f.write(b"abcdefg")
1597
1598         c2.save()
1599
1600         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1601         c1.update()
1602         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1603
1604
1605     def test_create_and_update_with_conflict(self):
1606         c1 = self.create_count_txt()
1607         c1.save()
1608
1609         with c1.open("count.txt", "wb") as f:
1610             f.write(b"XYZ")
1611
1612         c2 = arvados.collection.Collection(c1.manifest_locator())
1613         with c2.open("count.txt", "wb") as f:
1614             f.write(b"abcdefg")
1615
1616         c2.save()
1617
1618         c1.update()
1619         self.assertRegex(
1620             c1.manifest_text(),
1621             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1622
1623     def test_pdh_is_native_str(self):
1624         c1 = self.create_count_txt()
1625         pdh = c1.portable_data_hash()
1626         self.assertEqual(type(''), type(pdh))
1627
1628
1629 if __name__ == '__main__':
1630     unittest.main()