21388: Update deb instructions in Python READMEs
[arvados.git] / sdk / python / tests / test_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import ciso8601
6 import copy
7 import datetime
8 import os
9 import random
10 import re
11 import shutil
12 import sys
13 import tempfile
14 import time
15 import unittest
16
17 import parameterized
18 from unittest import mock
19
20 import arvados
21 import arvados.keep
22 from arvados.collection import Collection, CollectionReader
23 from arvados._ranges import Range, LocatorAndRange
24
25 from . import arvados_testutil as tutil
26 from . import run_test_server
27
28 class TestResumableWriter(arvados.ResumableCollectionWriter):
29     KEEP_BLOCK_SIZE = 1024  # PUT to Keep every 1K.
30
31     def current_state(self):
32         return self.dump_state(copy.deepcopy)
33
34
35 @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
36 class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
37                              tutil.ArvadosBaseTestCase):
38     disk_cache = False
39     MAIN_SERVER = {}
40
41     @classmethod
42     def setUpClass(cls):
43         super(ArvadosCollectionsTest, cls).setUpClass()
44         # need admin privileges to make collections with unsigned blocks
45         run_test_server.authorize_with('admin')
46         if cls.disk_cache:
47             cls._disk_cache_dir = tempfile.mkdtemp(prefix='CollectionsTest-')
48         else:
49             cls._disk_cache_dir = None
50         block_cache = arvados.keep.KeepBlockCache(
51             disk_cache=cls.disk_cache,
52             disk_cache_dir=cls._disk_cache_dir,
53         )
54         cls.api_client = arvados.api('v1')
55         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
56                                              local_store=cls.local_store,
57                                              block_cache=block_cache)
58
59     @classmethod
60     def tearDownClass(cls):
61         if cls._disk_cache_dir:
62             shutil.rmtree(cls._disk_cache_dir)
63
64     def write_foo_bar_baz(self):
65         cw = arvados.CollectionWriter(self.api_client)
66         self.assertEqual(cw.current_stream_name(), '.',
67                          'current_stream_name() should be "." now')
68         cw.set_current_file_name('foo.txt')
69         cw.write(b'foo')
70         self.assertEqual(cw.current_file_name(), 'foo.txt',
71                          'current_file_name() should be foo.txt now')
72         cw.start_new_file('bar.txt')
73         cw.write(b'bar')
74         cw.start_new_stream('baz')
75         cw.write(b'baz')
76         cw.set_current_file_name('baz.txt')
77         self.assertEqual(cw.manifest_text(),
78                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
79                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
80                          "wrong manifest: got {}".format(cw.manifest_text()))
81         cw.save_new()
82         return cw.portable_data_hash()
83
84     def test_pdh_is_native_str(self):
85         pdh = self.write_foo_bar_baz()
86         self.assertEqual(type(''), type(pdh))
87
88     def test_keep_local_store(self):
89         self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
90         self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
91
92     def test_local_collection_writer(self):
93         self.assertEqual(self.write_foo_bar_baz(),
94                          '23ca013983d6239e98931cc779e68426+114',
95                          'wrong locator hash: ' + self.write_foo_bar_baz())
96
97     def test_local_collection_reader(self):
98         foobarbaz = self.write_foo_bar_baz()
99         cr = arvados.CollectionReader(
100             foobarbaz + '+Xzizzle', self.api_client)
101         got = []
102         for s in cr.all_streams():
103             for f in s.all_files():
104                 got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
105         expected = [[3, '.', 'foo.txt', b'foo'],
106                     [3, '.', 'bar.txt', b'bar'],
107                     [3, './baz', 'baz.txt', b'baz']]
108         self.assertEqual(got,
109                          expected)
110         stream0 = cr.all_streams()[0]
111         self.assertEqual(stream0.readfrom(0, 0),
112                          b'',
113                          'reading zero bytes should have returned empty string')
114         self.assertEqual(stream0.readfrom(0, 2**26),
115                          b'foobar',
116                          'reading entire stream failed')
117         self.assertEqual(stream0.readfrom(2**26, 0),
118                          b'',
119                          'reading zero bytes should have returned empty string')
120         self.assertEqual(3, len(cr))
121         self.assertTrue(cr)
122
123     def _test_subset(self, collection, expected):
124         cr = arvados.CollectionReader(collection, self.api_client)
125         for s in cr.all_streams():
126             for ex in expected:
127                 if ex[0] == s:
128                     f = s.files()[ex[2]]
129                     got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))]
130                     self.assertEqual(got,
131                                      ex,
132                                      'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, ex))
133
134     def test_collection_manifest_subset(self):
135         foobarbaz = self.write_foo_bar_baz()
136         self._test_subset(foobarbaz,
137                           [[3, '.',     'bar.txt', b'bar'],
138                            [3, '.',     'foo.txt', b'foo'],
139                            [3, './baz', 'baz.txt', b'baz']])
140         self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
141                            (self.keep_client.put(b"foo"),
142                             self.keep_client.put(b"bar"))),
143                           [[3, '.', 'bar.txt', b'bar'],
144                            [3, '.', 'foo.txt', b'foo']])
145         self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
146                            (self.keep_client.put(b"foo"),
147                             self.keep_client.put(b"bar"))),
148                           [[2, '.', 'fo.txt', b'fo'],
149                            [4, '.', 'obar.txt', b'obar']])
150         self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
151                            (self.keep_client.put(b"foo"),
152                             self.keep_client.put(b"bar"))),
153                           [[2, '.', 'ar.txt', b'ar'],
154                            [2, '.', 'fo.txt', b'fo'],
155                            [2, '.', 'ob.txt', b'ob'],
156                            [0, '.', 'zero.txt', b'']])
157
158     def test_collection_empty_file(self):
159         cw = arvados.CollectionWriter(self.api_client)
160         cw.start_new_file('zero.txt')
161         cw.write(b'')
162
163         self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
164         self.check_manifest_file_sizes(cw.manifest_text(), [0])
165         cw = arvados.CollectionWriter(self.api_client)
166         cw.start_new_file('zero.txt')
167         cw.write(b'')
168         cw.start_new_file('one.txt')
169         cw.write(b'1')
170         cw.start_new_stream('foo')
171         cw.start_new_file('zero.txt')
172         cw.write(b'')
173         self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
174
175     def test_no_implicit_normalize(self):
176         cw = arvados.CollectionWriter(self.api_client)
177         cw.start_new_file('b')
178         cw.write(b'b')
179         cw.start_new_file('a')
180         cw.write(b'')
181         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
182         self.check_manifest_file_sizes(
183             arvados.CollectionReader(
184                 cw.manifest_text()).manifest_text(normalize=True),
185             [0,1])
186
187     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
188         cr = arvados.CollectionReader(manifest_text, self.api_client)
189         got_sizes = []
190         for f in cr.all_files():
191             got_sizes += [f.size()]
192         self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes))
193
194     def test_normalized_collection(self):
195         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
196 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
197 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
198 """
199         self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
200                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
201 """)
202
203         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
204 """
205         self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
206
207         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
208 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
209 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
210 """
211         self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
212                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
213 """)
214
215         m4 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
216 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
217 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
218 """
219         self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
220                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
221 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
222 """)
223
224         m5 = """. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
225 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
226 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
227 """
228         self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
229                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
230 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
231 """)
232
233         with self.data_file('1000G_ref_manifest') as f6:
234             m6 = f6.read()
235             self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
236
237         with self.data_file('jlake_manifest') as f7:
238             m7 = f7.read()
239             self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
240
241         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
242 """
243         self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
244
245     def test_locators_and_ranges(self):
246         blocks2 = [Range('a', 0, 10),
247                    Range('b', 10, 10),
248                    Range('c', 20, 10),
249                    Range('d', 30, 10),
250                    Range('e', 40, 10),
251                    Range('f', 50, 10)]
252
253         self.assertEqual(arvados.locators_and_ranges(blocks2,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
254         self.assertEqual(arvados.locators_and_ranges(blocks2, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
255         self.assertEqual(arvados.locators_and_ranges(blocks2, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
256         self.assertEqual(arvados.locators_and_ranges(blocks2, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
257         self.assertEqual(arvados.locators_and_ranges(blocks2, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
258         self.assertEqual(arvados.locators_and_ranges(blocks2, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
259         self.assertEqual(arvados.locators_and_ranges(blocks2, 62, 2), [])
260         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
261
262         self.assertEqual(arvados.locators_and_ranges(blocks2,  0,  2), [LocatorAndRange('a', 10, 0, 2)])
263         self.assertEqual(arvados.locators_and_ranges(blocks2, 10, 2), [LocatorAndRange('b', 10, 0, 2)])
264         self.assertEqual(arvados.locators_and_ranges(blocks2, 20, 2), [LocatorAndRange('c', 10, 0, 2)])
265         self.assertEqual(arvados.locators_and_ranges(blocks2, 30, 2), [LocatorAndRange('d', 10, 0, 2)])
266         self.assertEqual(arvados.locators_and_ranges(blocks2, 40, 2), [LocatorAndRange('e', 10, 0, 2)])
267         self.assertEqual(arvados.locators_and_ranges(blocks2, 50, 2), [LocatorAndRange('f', 10, 0, 2)])
268         self.assertEqual(arvados.locators_and_ranges(blocks2, 60, 2), [])
269         self.assertEqual(arvados.locators_and_ranges(blocks2, -2, 2), [])
270
271         self.assertEqual(arvados.locators_and_ranges(blocks2,  9,  2), [LocatorAndRange('a', 10, 9, 1), LocatorAndRange('b', 10, 0, 1)])
272         self.assertEqual(arvados.locators_and_ranges(blocks2, 19, 2), [LocatorAndRange('b', 10, 9, 1), LocatorAndRange('c', 10, 0, 1)])
273         self.assertEqual(arvados.locators_and_ranges(blocks2, 29, 2), [LocatorAndRange('c', 10, 9, 1), LocatorAndRange('d', 10, 0, 1)])
274         self.assertEqual(arvados.locators_and_ranges(blocks2, 39, 2), [LocatorAndRange('d', 10, 9, 1), LocatorAndRange('e', 10, 0, 1)])
275         self.assertEqual(arvados.locators_and_ranges(blocks2, 49, 2), [LocatorAndRange('e', 10, 9, 1), LocatorAndRange('f', 10, 0, 1)])
276         self.assertEqual(arvados.locators_and_ranges(blocks2, 59, 2), [LocatorAndRange('f', 10, 9, 1)])
277
278
279         blocks3 = [Range('a', 0, 10),
280                   Range('b', 10, 10),
281                   Range('c', 20, 10),
282                   Range('d', 30, 10),
283                   Range('e', 40, 10),
284                   Range('f', 50, 10),
285                    Range('g', 60, 10)]
286
287         self.assertEqual(arvados.locators_and_ranges(blocks3,  2,  2), [LocatorAndRange('a', 10, 2, 2)])
288         self.assertEqual(arvados.locators_and_ranges(blocks3, 12, 2), [LocatorAndRange('b', 10, 2, 2)])
289         self.assertEqual(arvados.locators_and_ranges(blocks3, 22, 2), [LocatorAndRange('c', 10, 2, 2)])
290         self.assertEqual(arvados.locators_and_ranges(blocks3, 32, 2), [LocatorAndRange('d', 10, 2, 2)])
291         self.assertEqual(arvados.locators_and_ranges(blocks3, 42, 2), [LocatorAndRange('e', 10, 2, 2)])
292         self.assertEqual(arvados.locators_and_ranges(blocks3, 52, 2), [LocatorAndRange('f', 10, 2, 2)])
293         self.assertEqual(arvados.locators_and_ranges(blocks3, 62, 2), [LocatorAndRange('g', 10, 2, 2)])
294
295
296         blocks = [Range('a', 0, 10),
297                   Range('b', 10, 15),
298                   Range('c', 25, 5)]
299         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 0), [])
300         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 5), [LocatorAndRange('a', 10, 0, 5)])
301         self.assertEqual(arvados.locators_and_ranges(blocks, 3, 5), [LocatorAndRange('a', 10, 3, 5)])
302         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 10), [LocatorAndRange('a', 10, 0, 10)])
303
304         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 11), [LocatorAndRange('a', 10, 0, 10),
305                                                                       LocatorAndRange('b', 15, 0, 1)])
306         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 11), [LocatorAndRange('a', 10, 1, 9),
307                                                                       LocatorAndRange('b', 15, 0, 2)])
308         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 25), [LocatorAndRange('a', 10, 0, 10),
309                                                                       LocatorAndRange('b', 15, 0, 15)])
310
311         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 30), [LocatorAndRange('a', 10, 0, 10),
312                                                                       LocatorAndRange('b', 15, 0, 15),
313                                                                       LocatorAndRange('c', 5, 0, 5)])
314         self.assertEqual(arvados.locators_and_ranges(blocks, 1, 30), [LocatorAndRange('a', 10, 1, 9),
315                                                                       LocatorAndRange('b', 15, 0, 15),
316                                                                       LocatorAndRange('c', 5, 0, 5)])
317         self.assertEqual(arvados.locators_and_ranges(blocks, 0, 31), [LocatorAndRange('a', 10, 0, 10),
318                                                                       LocatorAndRange('b', 15, 0, 15),
319                                                                       LocatorAndRange('c', 5, 0, 5)])
320
321         self.assertEqual(arvados.locators_and_ranges(blocks, 15, 5), [LocatorAndRange('b', 15, 5, 5)])
322
323         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 17), [LocatorAndRange('a', 10, 8, 2),
324                                                                       LocatorAndRange('b', 15, 0, 15)])
325
326         self.assertEqual(arvados.locators_and_ranges(blocks, 8, 20), [LocatorAndRange('a', 10, 8, 2),
327                                                                       LocatorAndRange('b', 15, 0, 15),
328                                                                       LocatorAndRange('c', 5, 0, 3)])
329
330         self.assertEqual(arvados.locators_and_ranges(blocks, 26, 2), [LocatorAndRange('c', 5, 1, 2)])
331
332         self.assertEqual(arvados.locators_and_ranges(blocks, 9, 15), [LocatorAndRange('a', 10, 9, 1),
333                                                                       LocatorAndRange('b', 15, 0, 14)])
334         self.assertEqual(arvados.locators_and_ranges(blocks, 10, 15), [LocatorAndRange('b', 15, 0, 15)])
335         self.assertEqual(arvados.locators_and_ranges(blocks, 11, 15), [LocatorAndRange('b', 15, 1, 14),
336                                                                        LocatorAndRange('c', 5, 0, 1)])
337
338     class MockKeep(object):
339         def __init__(self, content, num_retries=0):
340             self.content = content
341             self.num_prefetch_threads = 1
342
343         def get(self, locator, num_retries=0, prefetch=False):
344             return self.content[locator]
345
346     def test_stream_reader(self):
347         keepblocks = {
348             'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
349             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
350             'cccccccccccccccccccccccccccccccc+5': b'z0123',
351         }
352         mk = self.MockKeep(keepblocks)
353
354         sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
355
356         content = b'abcdefghijklmnopqrstuvwxyz0123456789'
357
358         self.assertEqual(sr.readfrom(0, 30), content[0:30])
359         self.assertEqual(sr.readfrom(2, 30), content[2:30])
360
361         self.assertEqual(sr.readfrom(2, 8), content[2:10])
362         self.assertEqual(sr.readfrom(0, 10), content[0:10])
363
364         self.assertEqual(sr.readfrom(0, 5), content[0:5])
365         self.assertEqual(sr.readfrom(5, 5), content[5:10])
366         self.assertEqual(sr.readfrom(10, 5), content[10:15])
367         self.assertEqual(sr.readfrom(15, 5), content[15:20])
368         self.assertEqual(sr.readfrom(20, 5), content[20:25])
369         self.assertEqual(sr.readfrom(25, 5), content[25:30])
370         self.assertEqual(sr.readfrom(30, 5), b'')
371
372     def test_extract_file(self):
373         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
374 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt
375 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt
376 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 47:80:md8sum.txt
377 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
378 """
379
380         m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
381
382         self.assertEqual(m2,
383                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")
384         files = arvados.CollectionReader(
385             m2, self.api_client).all_streams()[0].files()
386
387         self.assertEqual(files['md5sum.txt'].as_manifest(),
388                          ". 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt\n")
389         self.assertEqual(files['md6sum.txt'].as_manifest(),
390                          ". 085c37f02916da1cad16f93c54d899b7+41 0:41:md6sum.txt\n")
391         self.assertEqual(files['md7sum.txt'].as_manifest(),
392                          ". 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md7sum.txt\n")
393         self.assertEqual(files['md9sum.txt'].as_manifest(),
394                          ". 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt\n")
395
396     def test_write_directory_tree(self):
397         cwriter = arvados.CollectionWriter(self.api_client)
398         cwriter.write_directory_tree(self.build_directory_tree(
399                 ['basefile', 'subdir/subfile']))
400         self.assertEqual(cwriter.manifest_text(),
401                          """. c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
402 ./subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
403
404     def test_write_named_directory_tree(self):
405         cwriter = arvados.CollectionWriter(self.api_client)
406         cwriter.write_directory_tree(self.build_directory_tree(
407                 ['basefile', 'subdir/subfile']), 'root')
408         self.assertEqual(
409             cwriter.manifest_text(),
410             """./root c5110c5ac93202d8e0f9e381f22bac0f+8 0:8:basefile
411 ./root/subdir 1ca4dec89403084bf282ad31e6cf7972+14 0:14:subfile\n""")
412
413     def test_write_directory_tree_in_one_stream(self):
414         cwriter = arvados.CollectionWriter(self.api_client)
415         cwriter.write_directory_tree(self.build_directory_tree(
416                 ['basefile', 'subdir/subfile']), max_manifest_depth=0)
417         self.assertEqual(cwriter.manifest_text(),
418                          """. 4ace875ffdc6824a04950f06858f4465+22 0:8:basefile 8:14:subdir/subfile\n""")
419
420     def test_write_directory_tree_with_limited_recursion(self):
421         cwriter = arvados.CollectionWriter(self.api_client)
422         cwriter.write_directory_tree(
423             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
424             max_manifest_depth=1)
425         self.assertEqual(cwriter.manifest_text(),
426                          """. bd19836ddb62c11c55ab251ccaca5645+2 0:2:f1
427 ./d1 50170217e5b04312024aa5cd42934494+13 0:8:d2/f3 8:5:f2\n""")
428
429     def test_write_directory_tree_with_zero_recursion(self):
430         cwriter = arvados.CollectionWriter(self.api_client)
431         content = 'd1/d2/f3d1/f2f1'
432         blockhash = tutil.str_keep_locator(content)
433         cwriter.write_directory_tree(
434             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
435             max_manifest_depth=0)
436         self.assertEqual(
437             cwriter.manifest_text(),
438             ". {} 0:8:d1/d2/f3 8:5:d1/f2 13:2:f1\n".format(blockhash))
439
440     def test_write_one_file(self):
441         cwriter = arvados.CollectionWriter(self.api_client)
442         with self.make_test_file() as testfile:
443             cwriter.write_file(testfile.name)
444             self.assertEqual(
445                 cwriter.manifest_text(),
446                 ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:{}\n".format(
447                     os.path.basename(testfile.name)))
448
449     def test_write_named_file(self):
450         cwriter = arvados.CollectionWriter(self.api_client)
451         with self.make_test_file() as testfile:
452             cwriter.write_file(testfile.name, 'foo')
453             self.assertEqual(cwriter.manifest_text(),
454                              ". 098f6bcd4621d373cade4e832627b4f6+4 0:4:foo\n")
455
456     def test_write_multiple_files(self):
457         cwriter = arvados.CollectionWriter(self.api_client)
458         for letter in 'ABC':
459             with self.make_test_file(letter.encode()) as testfile:
460                 cwriter.write_file(testfile.name, letter)
461         self.assertEqual(
462             cwriter.manifest_text(),
463             ". 902fbdd2b1df0c4f70b4a5d23525e932+3 0:1:A 1:1:B 2:1:C\n")
464
465     def test_basic_resume(self):
466         cwriter = TestResumableWriter()
467         with self.make_test_file() as testfile:
468             cwriter.write_file(testfile.name, 'test')
469             resumed = TestResumableWriter.from_state(cwriter.current_state())
470         self.assertEqual(cwriter.manifest_text(), resumed.manifest_text(),
471                           "resumed CollectionWriter had different manifest")
472
473     def test_resume_fails_when_missing_dependency(self):
474         cwriter = TestResumableWriter()
475         with self.make_test_file() as testfile:
476             cwriter.write_file(testfile.name, 'test')
477         self.assertRaises(arvados.errors.StaleWriterStateError,
478                           TestResumableWriter.from_state,
479                           cwriter.current_state())
480
481     def test_resume_fails_when_dependency_mtime_changed(self):
482         cwriter = TestResumableWriter()
483         with self.make_test_file() as testfile:
484             cwriter.write_file(testfile.name, 'test')
485             os.utime(testfile.name, (0, 0))
486             self.assertRaises(arvados.errors.StaleWriterStateError,
487                               TestResumableWriter.from_state,
488                               cwriter.current_state())
489
490     def test_resume_fails_when_dependency_is_nonfile(self):
491         cwriter = TestResumableWriter()
492         cwriter.write_file('/dev/null', 'empty')
493         self.assertRaises(arvados.errors.StaleWriterStateError,
494                           TestResumableWriter.from_state,
495                           cwriter.current_state())
496
497     def test_resume_fails_when_dependency_size_changed(self):
498         cwriter = TestResumableWriter()
499         with self.make_test_file() as testfile:
500             cwriter.write_file(testfile.name, 'test')
501             orig_mtime = os.fstat(testfile.fileno()).st_mtime
502             testfile.write(b'extra')
503             testfile.flush()
504             os.utime(testfile.name, (orig_mtime, orig_mtime))
505             self.assertRaises(arvados.errors.StaleWriterStateError,
506                               TestResumableWriter.from_state,
507                               cwriter.current_state())
508
509     def test_resume_fails_with_expired_locator(self):
510         cwriter = TestResumableWriter()
511         state = cwriter.current_state()
512         # Add an expired locator to the state.
513         state['_current_stream_locators'].append(''.join([
514                     'a' * 32, '+1+A', 'b' * 40, '@', '10000000']))
515         self.assertRaises(arvados.errors.StaleWriterStateError,
516                           TestResumableWriter.from_state, state)
517
518     def test_arbitrary_objects_not_resumable(self):
519         cwriter = TestResumableWriter()
520         with open('/dev/null') as badfile:
521             self.assertRaises(arvados.errors.AssertionError,
522                               cwriter.write_file, badfile)
523
524     def test_arbitrary_writes_not_resumable(self):
525         cwriter = TestResumableWriter()
526         self.assertRaises(arvados.errors.AssertionError,
527                           cwriter.write, "badtext")
528
529
530 class CollectionTestMixin(tutil.ApiClientMock):
531     API_COLLECTIONS = run_test_server.fixture('collections')
532     DEFAULT_COLLECTION = API_COLLECTIONS['foo_file']
533     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
534     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
535     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
536     ALT_COLLECTION = API_COLLECTIONS['bar_file']
537     ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
538     ALT_MANIFEST = ALT_COLLECTION['manifest_text']
539
540     def api_client_mock(self, status=200):
541         client = super(CollectionTestMixin, self).api_client_mock()
542         self.mock_keep_services(client, status=status, service_type='proxy', count=1)
543         return client
544
545
546 @tutil.skip_sleep
547 class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
548     def mock_get_collection(self, api_mock, code, fixturename):
549         body = self.API_COLLECTIONS.get(fixturename)
550         self._mock_api_call(api_mock.collections().get, code, body)
551
552     def api_client_mock(self, status=200):
553         client = super(CollectionReaderTestCase, self).api_client_mock()
554         self.mock_get_collection(client, status, 'foo_file')
555         return client
556
557     def test_init_default_retries(self):
558         client = self.api_client_mock(200)
559         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
560         reader.manifest_text()
561         client.collections().get().execute.assert_called_with(num_retries=10)
562
563     def test_uuid_init_success(self):
564         client = self.api_client_mock(200)
565         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
566                                           num_retries=3)
567         self.assertEqual(self.DEFAULT_COLLECTION['manifest_text'],
568                          reader.manifest_text())
569         client.collections().get().execute.assert_called_with(num_retries=3)
570
571     def test_uuid_init_failure_raises_api_error(self):
572         client = self.api_client_mock(500)
573         with self.assertRaises(arvados.errors.ApiError):
574             reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
575
576     def test_locator_init(self):
577         client = self.api_client_mock(200)
578         # Ensure Keep will not return anything if asked.
579         with tutil.mock_keep_responses(None, 404):
580             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
581                                               api_client=client)
582             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
583
584     def test_init_no_fallback_to_keep(self):
585         # Do not look up a collection UUID or PDH in Keep.
586         for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
587             client = self.api_client_mock(404)
588             with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
589                 with self.assertRaises(arvados.errors.ApiError):
590                     reader = arvados.CollectionReader(key, api_client=client)
591
592     def test_init_num_retries_propagated(self):
593         # More of an integration test...
594         client = self.api_client_mock(200)
595         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
596                                           num_retries=3)
597         with tutil.mock_keep_responses('foo', 500, 500, 200):
598             self.assertEqual(b'foo',
599                              b''.join(f.read(9) for f in reader.all_files()))
600
601     def test_read_nonnormalized_manifest_with_collection_reader(self):
602         # client should be able to use CollectionReader on a manifest without normalizing it
603         client = self.api_client_mock(500)
604         nonnormal = ". acbd18db4cc2f85cedef654fccc4a4d8+3+Aabadbadbee@abeebdee 0:3:foo.txt 1:0:bar.txt 0:3:foo.txt\n"
605         reader = arvados.CollectionReader(
606             nonnormal,
607             api_client=client, num_retries=0)
608         # Ensure stripped_manifest() doesn't mangle our manifest in
609         # any way other than stripping hints.
610         self.assertEqual(
611             re.sub(r'\+[^\d\s\+]+', '', nonnormal),
612             reader.stripped_manifest())
613         # Ensure stripped_manifest() didn't mutate our reader.
614         self.assertEqual(nonnormal, reader.manifest_text())
615         # Ensure the files appear in the order given in the manifest.
616         self.assertEqual(
617             [[6, '.', 'foo.txt'],
618              [0, '.', 'bar.txt']],
619             [[f.size(), f.stream_name(), f.name()]
620              for f in reader.all_streams()[0].all_files()])
621
622     def test_read_empty_collection(self):
623         client = self.api_client_mock(200)
624         self.mock_get_collection(client, 200, 'empty')
625         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
626                                           api_client=client)
627         self.assertEqual('', reader.manifest_text())
628         self.assertEqual(0, len(reader))
629         self.assertFalse(reader)
630
631     def test_api_response(self):
632         client = self.api_client_mock()
633         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
634         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
635
636     def check_open_file(self, coll_file, stream_name, file_name, file_size):
637         self.assertFalse(coll_file.closed, "returned file is not open")
638         self.assertEqual(stream_name, coll_file.stream_name())
639         self.assertEqual(file_name, coll_file.name)
640         self.assertEqual(file_size, coll_file.size())
641
642     def test_open_collection_file_one_argument(self):
643         client = self.api_client_mock(200)
644         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
645         cfile = reader.open('./foo', 'rb')
646         self.check_open_file(cfile, '.', 'foo', 3)
647
648     def test_open_deep_file(self):
649         coll_name = 'collection_with_files_in_subdir'
650         client = self.api_client_mock(200)
651         self.mock_get_collection(client, 200, coll_name)
652         reader = arvados.CollectionReader(
653             self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
654         cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
655         self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
656                              32)
657
658     def test_open_nonexistent_stream(self):
659         client = self.api_client_mock(200)
660         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
661         self.assertRaises(IOError, reader.open, './nonexistent/foo')
662
663     def test_open_nonexistent_file(self):
664         client = self.api_client_mock(200)
665         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
666         self.assertRaises(IOError, reader.open, 'nonexistent')
667
668
669 @unittest.skip("will be removed in #15397")
670 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
671     def mock_keep(self, body, *codes, **headers):
672         headers.setdefault('x-keep-replicas-stored', 2)
673         return tutil.mock_keep_responses(body, *codes, **headers)
674
675     def foo_writer(self, **kwargs):
676         kwargs.setdefault('api_client', self.api_client_mock())
677         writer = arvados.CollectionWriter(**kwargs)
678         writer.start_new_file('foo')
679         writer.write(b'foo')
680         return writer
681
682     def test_write_whole_collection(self):
683         writer = self.foo_writer()
684         with self.mock_keep(self.DEFAULT_DATA_HASH, 200, 200):
685             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
686
687     def test_write_no_default(self):
688         writer = self.foo_writer()
689         with self.mock_keep(None, 500):
690             with self.assertRaises(arvados.errors.KeepWriteError):
691                 writer.finish()
692
693     def test_write_insufficient_replicas_via_proxy(self):
694         writer = self.foo_writer(replication=3)
695         with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
696             with self.assertRaises(arvados.errors.KeepWriteError):
697                 writer.manifest_text()
698
699     def test_write_insufficient_replicas_via_disks(self):
700         client = mock.MagicMock(name='api_client')
701         with self.mock_keep(
702                 None, 200, 200,
703                 **{'x-keep-replicas-stored': 1}) as keepmock:
704             self.mock_keep_services(client, status=200, service_type='disk', count=2)
705             writer = self.foo_writer(api_client=client, replication=3)
706             with self.assertRaises(arvados.errors.KeepWriteError):
707                 writer.manifest_text()
708
709     def test_write_three_replicas(self):
710         client = mock.MagicMock(name='api_client')
711         with self.mock_keep(
712                 "", 500, 500, 500, 200, 200, 200,
713                 **{'x-keep-replicas-stored': 1}) as keepmock:
714             self.mock_keep_services(client, status=200, service_type='disk', count=6)
715             writer = self.foo_writer(api_client=client, replication=3)
716             writer.manifest_text()
717             self.assertEqual(6, keepmock.call_count)
718
719     def test_write_whole_collection_through_retries(self):
720         writer = self.foo_writer(num_retries=2)
721         with self.mock_keep(self.DEFAULT_DATA_HASH,
722                             500, 500, 200, 500, 500, 200):
723             self.assertEqual(self.DEFAULT_DATA_HASH, writer.finish())
724
725     def test_flush_data_retries(self):
726         writer = self.foo_writer(num_retries=2)
727         foo_hash = self.DEFAULT_MANIFEST.split()[1]
728         with self.mock_keep(foo_hash, 500, 200):
729             writer.flush_data()
730         self.assertEqual(self.DEFAULT_MANIFEST, writer.manifest_text())
731
732     def test_one_open(self):
733         client = self.api_client_mock()
734         writer = arvados.CollectionWriter(client)
735         with writer.open('out') as out_file:
736             self.assertEqual('.', writer.current_stream_name())
737             self.assertEqual('out', writer.current_file_name())
738             out_file.write(b'test data')
739             data_loc = tutil.str_keep_locator('test data')
740         self.assertTrue(out_file.closed, "writer file not closed after context")
741         self.assertRaises(ValueError, out_file.write, 'extra text')
742         with self.mock_keep(data_loc, 200) as keep_mock:
743             self.assertEqual(". {} 0:9:out\n".format(data_loc),
744                              writer.manifest_text())
745
746     def test_open_writelines(self):
747         client = self.api_client_mock()
748         writer = arvados.CollectionWriter(client)
749         with writer.open('six') as out_file:
750             out_file.writelines(['12', '34', '56'])
751             data_loc = tutil.str_keep_locator('123456')
752         with self.mock_keep(data_loc, 200) as keep_mock:
753             self.assertEqual(". {} 0:6:six\n".format(data_loc),
754                              writer.manifest_text())
755
756     def test_open_flush(self):
757         client = self.api_client_mock()
758         data_loc1 = tutil.str_keep_locator('flush1')
759         data_loc2 = tutil.str_keep_locator('flush2')
760         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
761             writer = arvados.CollectionWriter(client)
762             with writer.open('flush_test') as out_file:
763                 out_file.write(b'flush1')
764                 out_file.flush()
765                 out_file.write(b'flush2')
766             self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
767                                                                 data_loc2),
768                              writer.manifest_text())
769
770     def test_two_opens_same_stream(self):
771         client = self.api_client_mock()
772         writer = arvados.CollectionWriter(client)
773         with writer.open('.', '1') as out_file:
774             out_file.write(b'1st')
775         with writer.open('.', '2') as out_file:
776             out_file.write(b'2nd')
777         data_loc = tutil.str_keep_locator('1st2nd')
778         with self.mock_keep(data_loc, 200) as keep_mock:
779             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
780                              writer.manifest_text())
781
782     def test_two_opens_two_streams(self):
783         client = self.api_client_mock()
784         data_loc1 = tutil.str_keep_locator('file')
785         data_loc2 = tutil.str_keep_locator('indir')
786         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
787             writer = arvados.CollectionWriter(client)
788             with writer.open('file') as out_file:
789                 out_file.write(b'file')
790             with writer.open('./dir', 'indir') as out_file:
791                 out_file.write(b'indir')
792             expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
793                 data_loc1, data_loc2)
794             self.assertEqual(expected, writer.manifest_text())
795
796     def test_dup_open_fails(self):
797         client = self.api_client_mock()
798         writer = arvados.CollectionWriter(client)
799         file1 = writer.open('one')
800         self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
801
802
803 class CollectionMethods(run_test_server.TestCaseWithServers):
804
805     def test_keys_values_items_support_indexing(self):
806         c = Collection()
807         with c.open('foo', 'wb') as f:
808             f.write(b'foo')
809         with c.open('bar', 'wb') as f:
810             f.write(b'bar')
811         self.assertEqual(2, len(c.keys()))
812         if sys.version_info < (3, 0):
813             # keys() supports indexing only for python2 callers.
814             fn0 = c.keys()[0]
815             fn1 = c.keys()[1]
816         else:
817             fn0, fn1 = c.keys()
818         self.assertEqual(2, len(c.values()))
819         f0 = c.values()[0]
820         f1 = c.values()[1]
821         self.assertEqual(2, len(c.items()))
822         self.assertEqual(fn0, c.items()[0][0])
823         self.assertEqual(fn1, c.items()[1][0])
824
825     def test_get_properties(self):
826         c = Collection()
827         self.assertEqual(c.get_properties(), {})
828         c.save_new(properties={"foo":"bar"})
829         self.assertEqual(c.get_properties(), {"foo":"bar"})
830
831     def test_get_trash_at(self):
832         c = Collection()
833         self.assertEqual(c.get_trash_at(), None)
834         c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
835         self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
836
837
838 class CollectionOpenModes(run_test_server.TestCaseWithServers):
839
840     def test_open_binary_modes(self):
841         c = Collection()
842         for mode in ['wb', 'wb+', 'ab', 'ab+']:
843             with c.open('foo', mode) as f:
844                 f.write(b'foo')
845
846     def test_open_invalid_modes(self):
847         c = Collection()
848         for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
849             with self.assertRaises(Exception):
850                 c.open('foo', mode)
851
852     def test_open_text_modes(self):
853         c = Collection()
854         with c.open('foo', 'wb') as f:
855             f.write('foo')
856         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
857             with c.open('foo', mode) as f:
858                 if mode[0] == 'r' and '+' not in mode:
859                     self.assertEqual('foo', f.read(3))
860                 else:
861                     f.write('bar')
862                     f.seek(0, os.SEEK_SET)
863                     self.assertEqual('bar', f.read(3))
864
865
866 class TextModes(run_test_server.TestCaseWithServers):
867
868     def setUp(self):
869         arvados.config.KEEP_BLOCK_SIZE = 4
870         if sys.version_info < (3, 0):
871             import unicodedata
872             self.sailboat = unicodedata.lookup('SAILBOAT')
873             self.snowman = unicodedata.lookup('SNOWMAN')
874         else:
875             self.sailboat = '\N{SAILBOAT}'
876             self.snowman = '\N{SNOWMAN}'
877
878     def tearDown(self):
879         arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
880
881     def test_read_sailboat_across_block_boundary(self):
882         c = Collection()
883         f = c.open('sailboats', 'wb')
884         data = self.sailboat.encode('utf-8')
885         f.write(data)
886         f.write(data[:1])
887         f.write(data[1:])
888         f.write(b'\n')
889         f.close()
890         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
891
892         f = c.open('sailboats', 'r')
893         string = f.readline()
894         self.assertEqual(string, self.sailboat+self.sailboat+'\n')
895         f.close()
896
897     def test_write_snowman_across_block_boundary(self):
898         c = Collection()
899         f = c.open('snowmany', 'w')
900         data = self.snowman
901         f.write(data+data+'\n'+data+'\n')
902         f.close()
903         self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
904
905         f = c.open('snowmany', 'r')
906         self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
907         self.assertEqual(f.readline(), self.snowman+'\n')
908         f.close()
909
910
911 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
912
913     def test_replication_desired_kept_on_load(self):
914         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
915         c1 = Collection(m, replication_desired=1)
916         c1.save_new()
917         loc = c1.manifest_locator()
918         c2 = Collection(loc)
919         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
920         self.assertEqual(c1.replication_desired, c2.replication_desired)
921
922     def test_replication_desired_not_loaded_if_provided(self):
923         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
924         c1 = Collection(m, replication_desired=1)
925         c1.save_new()
926         loc = c1.manifest_locator()
927         c2 = Collection(loc, replication_desired=2)
928         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
929         self.assertNotEqual(c1.replication_desired, c2.replication_desired)
930
931     def test_storage_classes_desired_kept_on_load(self):
932         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
933         c1 = Collection(m, storage_classes_desired=['archival'])
934         c1.save_new()
935         loc = c1.manifest_locator()
936         c2 = Collection(loc)
937         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
938         self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
939
940     def test_storage_classes_change_after_save(self):
941         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
942         c1 = Collection(m, storage_classes_desired=['archival'])
943         c1.save_new()
944         loc = c1.manifest_locator()
945         c2 = Collection(loc)
946         self.assertEqual(['archival'], c2.storage_classes_desired())
947         c2.save(storage_classes=['highIO'])
948         self.assertEqual(['highIO'], c2.storage_classes_desired())
949         c3 = Collection(loc)
950         self.assertEqual(c1.manifest_text(strip=True), c3.manifest_text(strip=True))
951         self.assertEqual(['highIO'], c3.storage_classes_desired())
952
953     def test_storage_classes_desired_not_loaded_if_provided(self):
954         m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n'
955         c1 = Collection(m, storage_classes_desired=['archival'])
956         c1.save_new()
957         loc = c1.manifest_locator()
958         c2 = Collection(loc, storage_classes_desired=['default'])
959         self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True))
960         self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired())
961
962     def test_init_manifest(self):
963         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
964 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
965 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
966 """
967         self.assertEqual(m1, CollectionReader(m1).manifest_text(normalize=False))
968         self.assertEqual(". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt\n", CollectionReader(m1).manifest_text(normalize=True))
969
970     def test_init_manifest_with_collision(self):
971         m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
972 ./md5sum.txt 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
973 """
974         with self.assertRaises(arvados.errors.ArgumentError):
975             self.assertEqual(m1, CollectionReader(m1))
976
977     def test_init_manifest_with_error(self):
978         m1 = """. 0:43:md5sum.txt"""
979         with self.assertRaises(arvados.errors.ArgumentError):
980             self.assertEqual(m1, CollectionReader(m1))
981
982     def test_remove(self):
983         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
984         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
985         self.assertIn("count1.txt", c)
986         c.remove("count1.txt")
987         self.assertNotIn("count1.txt", c)
988         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
989         with self.assertRaises(arvados.errors.ArgumentError):
990             c.remove("")
991
992     def test_remove_recursive(self):
993         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n')
994         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text())
995         self.assertIn("a", c)
996         self.assertEqual(1, len(c["a"].keys()))
997         # cannot remove non-empty directory with default recursive=False
998         with self.assertRaises(OSError):
999             c.remove("a/b")
1000         with self.assertRaises(OSError):
1001             c.remove("a/b/c/d")
1002         c.remove("a/b", recursive=True)
1003         self.assertEqual(0, len(c["a"].keys()))
1004         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1005
1006     def test_find(self):
1007         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
1008         self.assertIs(c.find("."), c)
1009         self.assertIs(c.find("./count1.txt"), c["count1.txt"])
1010         self.assertIs(c.find("count1.txt"), c["count1.txt"])
1011         with self.assertRaises(IOError):
1012             c.find("/.")
1013         with self.assertRaises(arvados.errors.ArgumentError):
1014             c.find("")
1015         self.assertIs(c.find("./nonexistant.txt"), None)
1016         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
1017
1018     def test_escaped_paths_dont_get_unescaped_on_manifest(self):
1019         # Dir & file names are literally '\056' (escaped form: \134056)
1020         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1021         c = Collection(manifest)
1022         self.assertEqual(c.portable_manifest_text(), manifest)
1023
1024     def test_other_special_chars_on_file_token(self):
1025         cases = [
1026             ('\\000', '\0'),
1027             ('\\011', '\t'),
1028             ('\\012', '\n'),
1029             ('\\072', ':'),
1030             ('\\134400', '\\400'),
1031         ]
1032         for encoded, decoded in cases:
1033             manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
1034             c = Collection(manifest)
1035             self.assertEqual(c.portable_manifest_text(), manifest)
1036             self.assertIn('some%sfile.txt' % decoded, c.keys())
1037
1038     def test_escaped_paths_do_get_unescaped_on_listing(self):
1039         # Dir & file names are literally '\056' (escaped form: \134056)
1040         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
1041         c = Collection(manifest)
1042         self.assertIn('\\056 Test', c.keys())
1043         self.assertIn('\\056', c['\\056 Test'].keys())
1044
1045     def test_make_empty_dir_with_escaped_chars(self):
1046         c = Collection()
1047         c.mkdirs('./Empty\\056Dir')
1048         self.assertEqual(c.portable_manifest_text(),
1049                          './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1050
1051     def test_make_empty_dir_with_spaces(self):
1052         c = Collection()
1053         c.mkdirs('./foo bar/baz waz')
1054         self.assertEqual(c.portable_manifest_text(),
1055                          './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
1056
1057     def test_remove_in_subdir(self):
1058         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1059         c.remove("foo/count2.txt")
1060         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
1061
1062     def test_remove_empty_subdir(self):
1063         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1064         c.remove("foo/count2.txt")
1065         c.remove("foo")
1066         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1067
1068     def test_remove_nonempty_subdir(self):
1069         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1070         with self.assertRaises(IOError):
1071             c.remove("foo")
1072         c.remove("foo", recursive=True)
1073         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1074
1075     def test_copy_to_file_in_dir(self):
1076         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1077         c.copy("count1.txt", "foo/count2.txt")
1078         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
1079
1080     def test_copy_file(self):
1081         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1082         c.copy("count1.txt", "count2.txt")
1083         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1084
1085     def test_copy_to_existing_dir(self):
1086         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1087         c.copy("count1.txt", "foo")
1088         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
1089
1090     def test_copy_to_new_dir(self):
1091         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1092         c.copy("count1.txt", "foo/")
1093         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
1094
1095     def test_rename_file(self):
1096         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1097         c.rename("count1.txt", "count2.txt")
1098         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1099
1100     def test_move_file_to_dir(self):
1101         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1102         c.mkdirs("foo")
1103         c.rename("count1.txt", "foo/count2.txt")
1104         self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
1105
1106     def test_move_file_to_other(self):
1107         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1108         c2 = Collection()
1109         c2.rename("count1.txt", "count2.txt", source_collection=c1)
1110         self.assertEqual("", c1.manifest_text())
1111         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
1112
1113     def test_clone(self):
1114         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1115         cl = c.clone()
1116         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
1117
1118     def test_diff_del_add(self):
1119         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1120         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1121         d = c2.diff(c1)
1122         self.assertEqual(sorted(d), [
1123             ('add', './count1.txt', c1["count1.txt"]),
1124             ('del', './count2.txt', c2["count2.txt"]),
1125         ])
1126         d = c1.diff(c2)
1127         self.assertEqual(sorted(d), [
1128             ('add', './count2.txt', c2["count2.txt"]),
1129             ('del', './count1.txt', c1["count1.txt"]),
1130         ])
1131         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1132         c1.apply(d)
1133         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1134
1135     def test_diff_same(self):
1136         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1137         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1138         d = c2.diff(c1)
1139         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1140         d = c1.diff(c2)
1141         self.assertEqual(d, [('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1142
1143         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1144         c1.apply(d)
1145         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1146
1147     def test_diff_mod(self):
1148         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1149         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1150         d = c2.diff(c1)
1151         self.assertEqual(d, [('mod', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
1152         d = c1.diff(c2)
1153         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1154
1155         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1156         c1.apply(d)
1157         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1158
1159     def test_diff_add(self):
1160         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1161         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
1162         d = c2.diff(c1)
1163         self.assertEqual(sorted(d), [
1164             ('del', './count2.txt', c2["count2.txt"]),
1165             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1166         ])
1167         d = c1.diff(c2)
1168         self.assertEqual(sorted(d), [
1169             ('add', './count2.txt', c2["count2.txt"]),
1170             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1171         ])
1172
1173         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1174         c1.apply(d)
1175         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1176
1177     def test_diff_add_in_subcollection(self):
1178         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1179         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1180         d = c2.diff(c1)
1181         self.assertEqual(sorted(d), [
1182             ('del', './foo', c2["foo"]),
1183             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1184         ])
1185         d = c1.diff(c2)
1186         self.assertEqual(sorted(d), [
1187             ('add', './foo', c2["foo"]),
1188             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1189         ])
1190         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1191         c1.apply(d)
1192         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1193
1194     def test_diff_del_add_in_subcollection(self):
1195         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1196         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
1197         d = c2.diff(c1)
1198         self.assertEqual(sorted(d), [
1199             ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
1200             ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
1201             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1202         ])
1203         d = c1.diff(c2)
1204         self.assertEqual(sorted(d), [
1205             ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
1206             ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
1207             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1208         ])
1209
1210         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1211         c1.apply(d)
1212         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1213
1214     def test_diff_mod_in_subcollection(self):
1215         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1216         c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
1217         d = c2.diff(c1)
1218         self.assertEqual(sorted(d), [
1219             ('mod', './foo', c2["foo"], c1["foo"]),
1220             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1221         ])
1222         d = c1.diff(c2)
1223         self.assertEqual(sorted(d), [
1224             ('mod', './foo', c1["foo"], c2["foo"]),
1225             ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
1226         ])
1227
1228         self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1229         c1.apply(d)
1230         self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
1231
1232     def test_conflict_keep_local_change(self):
1233         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
1234         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
1235         d = c1.diff(c2)
1236         self.assertEqual(sorted(d), [
1237             ('add', './count2.txt', c2["count2.txt"]),
1238             ('del', './count1.txt', c1["count1.txt"]),
1239         ])
1240         f = c1.open("count1.txt", "wb")
1241         f.write(b"zzzzz")
1242
1243         # c1 changed, so it should not be deleted.
1244         c1.apply(d)
1245         self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
1246
1247     def test_conflict_mod(self):
1248         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1249         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1250         d = c1.diff(c2)
1251         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1252         f = c1.open("count1.txt", "wb")
1253         f.write(b"zzzzz")
1254
1255         # c1 changed, so c2 mod will go to a conflict file
1256         c1.apply(d)
1257         self.assertRegex(
1258             c1.portable_manifest_text(),
1259             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1260
1261     def test_conflict_add(self):
1262         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
1263         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
1264         d = c1.diff(c2)
1265         self.assertEqual(sorted(d), [
1266             ('add', './count1.txt', c2["count1.txt"]),
1267             ('del', './count2.txt', c1["count2.txt"]),
1268         ])
1269         f = c1.open("count1.txt", "wb")
1270         f.write(b"zzzzz")
1271
1272         # c1 added count1.txt, so c2 add will go to a conflict file
1273         c1.apply(d)
1274         self.assertRegex(
1275             c1.portable_manifest_text(),
1276             r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1277
1278     def test_conflict_del(self):
1279         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
1280         c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
1281         d = c1.diff(c2)
1282         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
1283         c1.remove("count1.txt")
1284
1285         # c1 deleted, so c2 mod will go to a conflict file
1286         c1.apply(d)
1287         self.assertRegex(
1288             c1.portable_manifest_text(),
1289             r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1290
1291     def test_notify(self):
1292         c1 = Collection()
1293         events = []
1294         c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
1295         f = c1.open("foo.txt", "wb")
1296         self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
1297
1298     def test_open_w(self):
1299         c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
1300         self.assertEqual(c1["count1.txt"].size(), 10)
1301         c1.open("count1.txt", "wb").close()
1302         self.assertEqual(c1["count1.txt"].size(), 0)
1303
1304
1305 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
1306     MAIN_SERVER = {}
1307     KEEP_SERVER = {}
1308     local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
1309     remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
1310
1311     def setUp(self):
1312         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
1313
1314     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1315     def test_storage_classes_desired(self, put_mock):
1316         put_mock.side_effect = self.keep_put
1317         c = Collection(storage_classes_desired=['default'])
1318         with c.open("file.txt", 'wb') as f:
1319             f.write('content')
1320         c.save_new()
1321         _, kwargs = put_mock.call_args
1322         self.assertEqual(['default'], kwargs['classes'])
1323
1324     @mock.patch('arvados.keep.KeepClient.put', autospec=True)
1325     def test_repacked_block_submission_get_permission_token(self, mocked_put):
1326         '''
1327         Make sure that those blocks that are committed after repacking small ones,
1328         get their permission tokens assigned on the collection manifest.
1329         '''
1330         def wrapped_keep_put(*args, **kwargs):
1331             # Simulate slow put operations
1332             time.sleep(1)
1333             return self.keep_put(*args, **kwargs)
1334
1335         mocked_put.side_effect = wrapped_keep_put
1336         c = Collection()
1337         # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
1338         # small ones before finishing the upload.
1339         for i in range(70):
1340             f = c.open("file_{}.txt".format(i), 'wb')
1341             f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
1342             f.close(flush=False)
1343         # We should get 2 blocks with their tokens
1344         self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
1345
1346     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1347     def test_copy_remote_blocks_on_save_new(self, rs_mock):
1348         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1349         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1350         rs_mock.return_value = local_block_loc
1351         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1352         self.assertEqual(
1353             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
1354         self.assertEqual(
1355             len(re.findall(self.local_locator_re, c.manifest_text())), 0)
1356         c.save_new()
1357         rs_mock.assert_called()
1358         self.assertEqual(
1359             len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
1360         self.assertEqual(
1361             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
1362
1363     @mock.patch('arvados.keep.KeepClient.refresh_signature')
1364     def test_copy_remote_blocks_on_save(self, rs_mock):
1365         remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
1366         local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
1367         rs_mock.return_value = local_block_loc
1368         # Remote collection
1369         remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
1370         self.assertEqual(
1371             len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
1372         # Local collection
1373         local_c = Collection()
1374         with local_c.open('barfile.txt', 'wb') as f:
1375             f.write('bar')
1376         local_c.save_new()
1377         self.assertEqual(
1378             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1379         self.assertEqual(
1380             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1381         # Copy remote file to local collection
1382         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
1383         self.assertEqual(
1384             len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
1385         self.assertEqual(
1386             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
1387         # Save local collection: remote block should be copied
1388         local_c.save()
1389         rs_mock.assert_called()
1390         self.assertEqual(
1391             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
1392         self.assertEqual(
1393             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
1394
1395
1396 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
1397     def test_preserve_version_on_save(self):
1398         c = Collection()
1399         c.save_new(preserve_version=True)
1400         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1401         self.assertEqual(coll_record['version'], 1)
1402         self.assertEqual(coll_record['preserve_version'], True)
1403         with c.open("foo.txt", "wb") as foo:
1404             foo.write(b"foo")
1405         c.save(preserve_version=True)
1406         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1407         self.assertEqual(coll_record['version'], 2)
1408         self.assertEqual(coll_record['preserve_version'], True)
1409         with c.open("bar.txt", "wb") as foo:
1410             foo.write(b"bar")
1411         c.save(preserve_version=False)
1412         coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute()
1413         self.assertEqual(coll_record['version'], 3)
1414         self.assertEqual(coll_record['preserve_version'], False)
1415
1416     def test_get_manifest_text_only_committed(self):
1417         c = Collection()
1418         with c.open("count.txt", "wb") as f:
1419             # One file committed
1420             with c.open("foo.txt", "wb") as foo:
1421                 foo.write(b"foo")
1422                 foo.flush() # Force block commit
1423             f.write(b"0123456789")
1424             # Other file not committed. Block not written to keep yet.
1425             self.assertEqual(
1426                 c._get_manifest_text(".",
1427                                      strip=False,
1428                                      normalize=False,
1429                                      only_committed=True),
1430                 '. acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:count.txt 0:3:foo.txt\n')
1431             # And now with the file closed...
1432             f.flush() # Force block commit
1433         self.assertEqual(
1434             c._get_manifest_text(".",
1435                                  strip=False,
1436                                  normalize=False,
1437                                  only_committed=True),
1438             ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:10:count.txt 10:3:foo.txt\n")
1439
1440     def test_only_small_blocks_are_packed_together(self):
1441         c = Collection()
1442         # Write a couple of small files,
1443         f = c.open("count.txt", "wb")
1444         f.write(b"0123456789")
1445         f.close(flush=False)
1446         foo = c.open("foo.txt", "wb")
1447         foo.write(b"foo")
1448         foo.close(flush=False)
1449         # Then, write a big file, it shouldn't be packed with the ones above
1450         big = c.open("bigfile.txt", "wb")
1451         big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
1452         big.close(flush=False)
1453         self.assertEqual(
1454             c.manifest_text("."),
1455             '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n')
1456
1457     def test_flush_after_small_block_packing(self):
1458         c = Collection()
1459         # Write a couple of small files,
1460         f = c.open("count.txt", "wb")
1461         f.write(b"0123456789")
1462         f.close(flush=False)
1463         foo = c.open("foo.txt", "wb")
1464         foo.write(b"foo")
1465         foo.close(flush=False)
1466
1467         self.assertEqual(
1468             c.manifest_text(),
1469             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1470
1471         f = c.open("count.txt", "rb+")
1472         f.close(flush=True)
1473
1474         self.assertEqual(
1475             c.manifest_text(),
1476             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1477
1478     def test_write_after_small_block_packing2(self):
1479         c = Collection()
1480         # Write a couple of small files,
1481         f = c.open("count.txt", "wb")
1482         f.write(b"0123456789")
1483         f.close(flush=False)
1484         foo = c.open("foo.txt", "wb")
1485         foo.write(b"foo")
1486         foo.close(flush=False)
1487
1488         self.assertEqual(
1489             c.manifest_text(),
1490             '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n')
1491
1492         f = c.open("count.txt", "rb+")
1493         f.write(b"abc")
1494         f.close(flush=False)
1495
1496         self.assertEqual(
1497             c.manifest_text(),
1498             '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n')
1499
1500
1501     def test_small_block_packing_with_overwrite(self):
1502         c = Collection()
1503         c.open("b1", "wb").close()
1504         c["b1"].writeto(0, b"b1", 0)
1505
1506         c.open("b2", "wb").close()
1507         c["b2"].writeto(0, b"b2", 0)
1508
1509         c["b1"].writeto(0, b"1b", 0)
1510
1511         self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
1512         self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
1513         self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
1514
1515
1516 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
1517     MAIN_SERVER = {}
1518     KEEP_SERVER = {}
1519
1520     def create_count_txt(self):
1521         # Create an empty collection, save it to the API server, then write a
1522         # file, but don't save it.
1523
1524         c = Collection()
1525         c.save_new("CollectionCreateUpdateTest", ensure_unique_name=True)
1526         self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
1527         self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
1528
1529         with c.open("count.txt", "wb") as f:
1530             f.write(b"0123456789")
1531
1532         self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1533
1534         return c
1535
1536     def test_create_and_save(self):
1537         c = self.create_count_txt()
1538         c.save(properties={'type' : 'Intermediate'},
1539                storage_classes=['archive'],
1540                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1541
1542         self.assertRegex(
1543             c.manifest_text(),
1544             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1545         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1546         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1547         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1548
1549
1550     def test_create_and_save_new(self):
1551         c = self.create_count_txt()
1552         c.save_new(properties={'type' : 'Intermediate'},
1553                    storage_classes=['archive'],
1554                    trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1555
1556         self.assertRegex(
1557             c.manifest_text(),
1558             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
1559         self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
1560         self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
1561         self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
1562
1563     def test_create_and_save_after_commiting(self):
1564         c = self.create_count_txt()
1565         c.save(properties={'type' : 'Intermediate'},
1566                storage_classes=['hot'],
1567                trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
1568         c.save(properties={'type' : 'Output'},
1569                storage_classes=['cold'],
1570                trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
1571
1572         self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
1573         self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
1574         self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
1575
1576     def test_create_diff_apply(self):
1577         c1 = self.create_count_txt()
1578         c1.save()
1579
1580         c2 = Collection(c1.manifest_locator())
1581         with c2.open("count.txt", "wb") as f:
1582             f.write(b"abcdefg")
1583
1584         diff = c1.diff(c2)
1585
1586         self.assertEqual(diff[0], (arvados.collection.MOD, u'./count.txt', c1["count.txt"], c2["count.txt"]))
1587
1588         c1.apply(diff)
1589         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1590
1591     def test_diff_apply_with_token(self):
1592         baseline = CollectionReader(". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:10:count.txt\n")
1593         c = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
1594         other = CollectionReader(". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1595
1596         diff = baseline.diff(other)
1597         self.assertEqual(diff, [('mod', u'./count.txt', c["count.txt"], other["count.txt"])])
1598
1599         c.apply(diff)
1600
1601         self.assertEqual(c.manifest_text(), ". 7ac66c0f148de9519b8bd264312c4d64+7+A715fd31f8111894f717eb1003c1b0216799dd9ec@54f5dd1a 0:7:count.txt\n")
1602
1603
1604     def test_create_and_update(self):
1605         c1 = self.create_count_txt()
1606         c1.save()
1607
1608         c2 = arvados.collection.Collection(c1.manifest_locator())
1609         with c2.open("count.txt", "wb") as f:
1610             f.write(b"abcdefg")
1611
1612         c2.save()
1613
1614         self.assertNotEqual(c1.portable_data_hash(), c2.portable_data_hash())
1615         c1.update()
1616         self.assertEqual(c1.portable_data_hash(), c2.portable_data_hash())
1617
1618
1619     def test_create_and_update_with_conflict(self):
1620         c1 = self.create_count_txt()
1621         c1.save()
1622
1623         with c1.open("count.txt", "wb") as f:
1624             f.write(b"XYZ")
1625
1626         c2 = arvados.collection.Collection(c1.manifest_locator())
1627         with c2.open("count.txt", "wb") as f:
1628             f.write(b"abcdefg")
1629
1630         c2.save()
1631
1632         c1.update()
1633         self.assertRegex(
1634             c1.manifest_text(),
1635             r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
1636
1637     def test_pdh_is_native_str(self):
1638         c1 = self.create_count_txt()
1639         pdh = c1.portable_data_hash()
1640         self.assertEqual(type(''), type(pdh))
1641
1642
1643 if __name__ == '__main__':
1644     unittest.main()