14259: Collection class copies remote blocks when saving.
[arvados.git] / sdk / python / tests / test_arvfile.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from builtins import hex
7 from builtins import str
8 from builtins import range
9 from builtins import object
10 import bz2
11 import datetime
12 import gzip
13 import io
14 import mock
15 import os
16 import unittest
17 import time
18
19 import arvados
20 from arvados._ranges import Range
21 from arvados.keep import KeepLocator
22 from arvados.collection import Collection, CollectionReader
23 from arvados.arvfile import ArvadosFile, ArvadosFileReader
24
25 from . import arvados_testutil as tutil
26 from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
27
28 class ArvadosFileWriterTestCase(unittest.TestCase):
29     class MockKeep(object):
30         def __init__(self, blocks):
31             self.blocks = blocks
32             self.requests = []
33         def get(self, locator, num_retries=0):
34             self.requests.append(locator)
35             return self.blocks.get(locator)
36         def get_from_cache(self, locator):
37             self.requests.append(locator)
38             return self.blocks.get(locator)
39         def put(self, data, num_retries=None, copies=None):
40             pdh = tutil.str_keep_locator(data)
41             self.blocks[pdh] = bytes(data)
42             return pdh
43
44     class MockApi(object):
45         def __init__(self, b, r):
46             self.body = b
47             self.response = r
48             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
49             self._rootDesc = {}
50         class MockSchema(object):
51             def __init__(self):
52                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
53         class MockCollections(object):
54             def __init__(self, b, r):
55                 self.body = b
56                 self.response = r
57             class Execute(object):
58                 def __init__(self, r):
59                     self.response = r
60                 def execute(self, num_retries=None):
61                     return self.response
62             def create(self, ensure_unique_name=False, body=None):
63                 if body != self.body:
64                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
65                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
66             def update(self, uuid=None, body=None):
67                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
68         def collections(self):
69             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
70
71
72     def test_truncate(self):
73         keep = ArvadosFileWriterTestCase.MockKeep({
74             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
75         })
76         api = ArvadosFileWriterTestCase.MockApi({
77             "name": "test_truncate",
78             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
79             "replication_desired": None,
80         }, {
81             "uuid": "zzzzz-4zz18-mockcollection0",
82             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
83             "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52",
84         })
85         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
86                         api_client=api, keep_client=keep) as c:
87             writer = c.open("count.txt", "rb+")
88             self.assertEqual(writer.size(), 10)
89             self.assertEqual(b"0123456789", writer.read(12))
90
91             writer.truncate(8)
92
93             # Make sure reading off the end doesn't break
94             self.assertEqual(b"", writer.read(12))
95
96             self.assertEqual(writer.size(), 8)
97             writer.seek(0, os.SEEK_SET)
98             self.assertEqual(b"01234567", writer.read(12))
99
100             self.assertIsNone(c.manifest_locator())
101             self.assertTrue(c.modified())
102             c.save_new("test_truncate")
103             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
104             self.assertFalse(c.modified())
105
106
107     def test_truncate2(self):
108         keep = ArvadosFileWriterTestCase.MockKeep({
109             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
110         })
111         api = ArvadosFileWriterTestCase.MockApi({
112             "name": "test_truncate2",
113             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
114             "replication_desired": None,
115         }, {
116             "uuid": "zzzzz-4zz18-mockcollection0",
117             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
118             "portable_data_hash": "272da898abdf86ddc71994835e3155f8+95",
119         })
120         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
121                         api_client=api, keep_client=keep) as c:
122             writer = c.open("count.txt", "rb+")
123             self.assertEqual(writer.size(), 10)
124             self.assertEqual(b"0123456789", writer.read(12))
125
126             # extend file size
127             writer.truncate(12)
128
129             self.assertEqual(writer.size(), 12)
130             writer.seek(0, os.SEEK_SET)
131             self.assertEqual(b"0123456789\x00\x00", writer.read(12))
132
133             self.assertIsNone(c.manifest_locator())
134             self.assertTrue(c.modified())
135             c.save_new("test_truncate2")
136             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
137             self.assertFalse(c.modified())
138
139     def test_truncate3(self):
140         keep = ArvadosFileWriterTestCase.MockKeep({
141             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
142             "a925576942e94b2ef57a066101b48876+10": b"abcdefghij",
143         })
144         api = ArvadosFileWriterTestCase.MockApi({
145             "name": "test_truncate",
146             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
147             "replication_desired": None,
148         }, {
149             "uuid": "zzzzz-4zz18-mockcollection0",
150             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
151             "portable_data_hash": "7fcd0eaac3aad4c31a6a0e756475da92+52",
152         })
153         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n',
154                         api_client=api, keep_client=keep) as c:
155             writer = c.open("count.txt", "rb+")
156             self.assertEqual(writer.size(), 20)
157             self.assertEqual(b"0123456789ab", writer.read(12))
158             self.assertEqual(12, writer.tell())
159
160             writer.truncate(8)
161
162             # Make sure reading off the end doesn't break
163             self.assertEqual(12, writer.tell())
164             self.assertEqual(b"", writer.read(12))
165
166             self.assertEqual(writer.size(), 8)
167             self.assertEqual(2, writer.seek(-10, os.SEEK_CUR))
168             self.assertEqual(b"234567", writer.read(12))
169
170             self.assertIsNone(c.manifest_locator())
171             self.assertTrue(c.modified())
172             c.save_new("test_truncate")
173             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
174             self.assertFalse(c.modified())
175
176
177
178     def test_write_to_end(self):
179         keep = ArvadosFileWriterTestCase.MockKeep({
180             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
181         })
182         api = ArvadosFileWriterTestCase.MockApi({
183             "name": "test_append",
184             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
185             "replication_desired": None,
186         }, {
187             "uuid": "zzzzz-4zz18-mockcollection0",
188             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
189             "portable_data_hash": "c5c3af76565c8efb6a806546bcf073f3+88",
190         })
191         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
192                              api_client=api, keep_client=keep) as c:
193             writer = c.open("count.txt", "rb+")
194             self.assertEqual(writer.size(), 10)
195
196             self.assertEqual(5, writer.seek(5, os.SEEK_SET))
197             self.assertEqual(b"56789", writer.read(8))
198
199             writer.seek(10, os.SEEK_SET)
200             writer.write("foo")
201             self.assertEqual(writer.size(), 13)
202
203             writer.seek(5, os.SEEK_SET)
204             self.assertEqual(b"56789foo", writer.read(8))
205
206             self.assertIsNone(c.manifest_locator())
207             self.assertTrue(c.modified())
208             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
209
210             c.save_new("test_append")
211             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
212             self.assertFalse(c.modified())
213             self.assertEqual(b"foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
214
215
216     def test_append(self):
217         keep = ArvadosFileWriterTestCase.MockKeep({
218             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
219         })
220         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
221         writer = c.open("count.txt", "ab+")
222         self.assertEqual(writer.read(20), b"0123456789")
223
224         writer.seek(0, os.SEEK_SET)
225         writer.write("hello")
226         self.assertEqual(writer.read(), b"")
227         writer.seek(-5, os.SEEK_CUR)
228         self.assertEqual(writer.read(3), b"hel")
229         self.assertEqual(writer.read(), b"lo")
230         writer.seek(0, os.SEEK_SET)
231         self.assertEqual(writer.read(), b"0123456789hello")
232
233         writer.seek(0)
234         writer.write("world")
235         self.assertEqual(writer.read(), b"")
236         writer.seek(0)
237         self.assertEqual(writer.read(), b"0123456789helloworld")
238
239         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
240
241     def test_write_at_beginning(self):
242         keep = ArvadosFileWriterTestCase.MockKeep({
243             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
244         })
245         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
246                              keep_client=keep) as c:
247             writer = c.open("count.txt", "rb+")
248             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
249             writer.seek(0, os.SEEK_SET)
250             writer.write("foo")
251             self.assertEqual(writer.size(), 10)
252             self.assertEqual(b"foo3456789", writer.readfrom(0, 13))
253             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
254
255     def test_write_empty(self):
256         keep = ArvadosFileWriterTestCase.MockKeep({})
257         with Collection(keep_client=keep) as c:
258             writer = c.open("count.txt", "wb")
259             self.assertEqual(writer.size(), 0)
260             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
261
262     def test_save_manifest_text(self):
263         keep = ArvadosFileWriterTestCase.MockKeep({})
264         with Collection(keep_client=keep) as c:
265             writer = c.open("count.txt", "wb")
266             writer.write(b"0123456789")
267             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
268             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
269
270             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
271             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
272
273     def test_get_manifest_text_commits(self):
274          keep = ArvadosFileWriterTestCase.MockKeep({})
275          with Collection(keep_client=keep) as c:
276              writer = c.open("count.txt", "wb")
277              writer.write("0123456789")
278              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
279              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
280              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
281              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
282
283
284     def test_write_in_middle(self):
285         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
286         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
287                              keep_client=keep) as c:
288             writer = c.open("count.txt", "rb+")
289             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
290             writer.seek(3, os.SEEK_SET)
291             writer.write("foo")
292             self.assertEqual(writer.size(), 10)
293             self.assertEqual(b"012foo6789", writer.readfrom(0, 13))
294             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
295
296     def test_write_at_end(self):
297         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
298         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
299                              keep_client=keep) as c:
300             writer = c.open("count.txt", "rb+")
301             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
302             writer.seek(7, os.SEEK_SET)
303             writer.write("foo")
304             self.assertEqual(writer.size(), 10)
305             self.assertEqual(b"0123456foo", writer.readfrom(0, 13))
306             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
307
308     def test_write_across_segment_boundary(self):
309         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
310         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
311                              keep_client=keep) as c:
312             writer = c.open("count.txt", "rb+")
313             self.assertEqual(b"012345678901234", writer.readfrom(0, 15))
314             writer.seek(7, os.SEEK_SET)
315             writer.write("foobar")
316             self.assertEqual(writer.size(), 20)
317             self.assertEqual(b"0123456foobar34", writer.readfrom(0, 15))
318             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
319
320     def test_write_across_several_segments(self):
321         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
322         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
323                              keep_client=keep) as c:
324             writer = c.open("count.txt", "rb+")
325             self.assertEqual(b"012301230123", writer.readfrom(0, 15))
326             writer.seek(2, os.SEEK_SET)
327             writer.write("abcdefg")
328             self.assertEqual(writer.size(), 12)
329             self.assertEqual(b"01abcdefg123", writer.readfrom(0, 15))
330             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
331
332     def test_write_large(self):
333         keep = ArvadosFileWriterTestCase.MockKeep({})
334         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
335                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
336                                                  "replication_desired":None},
337                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
338                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
339                                                  "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
340         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
341                              api_client=api, keep_client=keep) as c:
342             writer = c.open("count.txt", "rb+")
343             text = "0123456789" * 100
344             for b in range(0, 100000):
345                 writer.write(text)
346             self.assertEqual(writer.size(), 100000000)
347
348             self.assertIsNone(c.manifest_locator())
349             self.assertTrue(c.modified())
350             c.save_new("test_write_large")
351             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
352             self.assertFalse(c.modified())
353
354
355     def test_large_write(self):
356         keep = ArvadosFileWriterTestCase.MockKeep({})
357         api = ArvadosFileWriterTestCase.MockApi({}, {})
358         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
359                              api_client=api, keep_client=keep) as c:
360             writer = c.open("count.txt", "rb+")
361             self.assertEqual(writer.size(), 0)
362
363             text = "0123456789"
364             writer.write(text)
365             text = "0123456789" * 9999999
366             writer.write(text)
367             self.assertEqual(writer.size(), 100000000)
368
369             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
370
371     def test_sparse_write(self):
372         keep = ArvadosFileWriterTestCase.MockKeep({})
373         api = ArvadosFileWriterTestCase.MockApi({}, {})
374         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
375                              api_client=api, keep_client=keep) as c:
376             writer = c.open("count.txt", "rb+")
377             self.assertEqual(writer.size(), 0)
378
379             text = b"0123456789"
380             writer.seek(2)
381             writer.write(text)
382             self.assertEqual(writer.size(), 12)
383             writer.seek(0, os.SEEK_SET)
384             self.assertEqual(writer.read(), b"\x00\x00"+text)
385
386             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n")
387
388
389     def test_sparse_write2(self):
390         keep = ArvadosFileWriterTestCase.MockKeep({})
391         api = ArvadosFileWriterTestCase.MockApi({}, {})
392         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
393                              api_client=api, keep_client=keep) as c:
394             writer = c.open("count.txt", "rb+")
395             self.assertEqual(writer.size(), 0)
396
397             text = "0123456789"
398             writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2)
399             writer.write(text)
400             self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12)
401             writer.seek(0, os.SEEK_SET)
402
403             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n")
404
405
406     def test_sparse_write3(self):
407         keep = ArvadosFileWriterTestCase.MockKeep({})
408         api = ArvadosFileWriterTestCase.MockApi({}, {})
409         for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]:
410             with Collection() as c:
411                 writer = c.open("count.txt", "rb+")
412                 self.assertEqual(writer.size(), 0)
413
414                 for i in r:
415                     w = ("%s" % i) * 10
416                     writer.seek(i*10)
417                     writer.write(w.encode())
418                 writer.seek(0)
419                 self.assertEqual(writer.read(), b"00000000001111111111222222222233333333334444444444")
420
421     def test_sparse_write4(self):
422         keep = ArvadosFileWriterTestCase.MockKeep({})
423         api = ArvadosFileWriterTestCase.MockApi({}, {})
424         for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]:
425             with Collection() as c:
426                 writer = c.open("count.txt", "rb+")
427                 self.assertEqual(writer.size(), 0)
428
429                 for i in r:
430                     w = ("%s" % i) * 10
431                     writer.seek(i*10)
432                     writer.write(w.encode())
433                 writer.seek(0)
434                 self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444")
435
436
437     def test_rewrite_on_empty_file(self):
438         keep = ArvadosFileWriterTestCase.MockKeep({})
439         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
440                              keep_client=keep) as c:
441             writer = c.open("count.txt", "rb+")
442             for b in range(0, 10):
443                 writer.seek(0, os.SEEK_SET)
444                 writer.write("0123456789")
445
446             self.assertEqual(writer.size(), 10)
447             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
448             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
449             writer.flush()
450             self.assertEqual(writer.size(), 10)
451             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
452             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
453
454     def test_rewrite_append_existing_file(self):
455         keep = ArvadosFileWriterTestCase.MockKeep({
456             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
457         })
458         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
459                              keep_client=keep) as c:
460             writer = c.open("count.txt", "rb+")
461             for b in range(0, 10):
462                 writer.seek(10, os.SEEK_SET)
463                 writer.write("abcdefghij")
464
465             self.assertEqual(writer.size(), 20)
466             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
467             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
468
469             writer.arvadosfile.flush()
470             self.assertEqual(writer.size(), 20)
471             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
472             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
473
474     def test_rewrite_over_existing_file(self):
475         keep = ArvadosFileWriterTestCase.MockKeep({
476             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
477         })
478         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
479                              keep_client=keep) as c:
480             writer = c.open("count.txt", "rb+")
481             for b in range(0, 10):
482                 writer.seek(5, os.SEEK_SET)
483                 writer.write("abcdefghij")
484
485             self.assertEqual(writer.size(), 15)
486             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
487             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
488
489             writer.arvadosfile.flush()
490
491             self.assertEqual(writer.size(), 15)
492             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
493             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
494
495     def test_write_large_rewrite(self):
496         keep = ArvadosFileWriterTestCase.MockKeep({})
497         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
498                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
499                                                  "replication_desired":None},
500                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
501                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
502                                                  "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
503         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
504                              api_client=api, keep_client=keep) as c:
505             writer = c.open("count.txt", "rb+")
506             text = b''.join([b"0123456789" for a in range(0, 100)])
507             for b in range(0, 100000):
508                 writer.write(text)
509             writer.seek(0, os.SEEK_SET)
510             writer.write("foo")
511             self.assertEqual(writer.size(), 100000000)
512
513             self.assertIsNone(c.manifest_locator())
514             self.assertTrue(c.modified())
515             c.save_new("test_write_large")
516             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
517             self.assertFalse(c.modified())
518
519     def test_create(self):
520         keep = ArvadosFileWriterTestCase.MockKeep({})
521         api = ArvadosFileWriterTestCase.MockApi({
522             "name":"test_create",
523             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
524             "replication_desired":None,
525         }, {
526             "uuid":"zzzzz-4zz18-mockcollection0",
527             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
528             "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51",
529         })
530         with Collection(api_client=api, keep_client=keep) as c:
531             writer = c.open("count.txt", "wb+")
532             self.assertEqual(writer.size(), 0)
533             writer.write("01234567")
534             self.assertEqual(writer.size(), 8)
535
536             self.assertIsNone(c.manifest_locator())
537             self.assertTrue(c.modified())
538             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
539             c.save_new("test_create")
540             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
541             self.assertFalse(c.modified())
542             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
543
544
545     def test_create_subdir(self):
546         keep = ArvadosFileWriterTestCase.MockKeep({})
547         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
548                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
549                                                  "replication_desired":None},
550                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
551                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
552                                                  "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
553         with Collection(api_client=api, keep_client=keep) as c:
554             self.assertIsNone(c.api_response())
555             writer = c.open("foo/bar/count.txt", "wb+")
556             writer.write("01234567")
557             self.assertFalse(c.committed())
558             c.save_new("test_create")
559             self.assertTrue(c.committed())
560             self.assertEqual(c.api_response(), api.response)
561
562     def test_overwrite(self):
563         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
564         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
565                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
566                                                  "replication_desired":None},
567                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
568                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
569                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
570         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
571                              api_client=api, keep_client=keep) as c:
572             writer = c.open("count.txt", "wb+")
573             self.assertEqual(writer.size(), 0)
574             writer.write("01234567")
575             self.assertEqual(writer.size(), 8)
576
577             self.assertIsNone(c.manifest_locator())
578             self.assertTrue(c.modified())
579             c.save_new("test_overwrite")
580             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
581             self.assertFalse(c.modified())
582
583     def test_file_not_found(self):
584         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
585             with self.assertRaises(IOError):
586                 writer = c.open("nocount.txt", "rb")
587
588     def test_cannot_open_directory(self):
589         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
590             with self.assertRaises(IOError):
591                 writer = c.open(".", "rb")
592
593     def test_create_multiple(self):
594         keep = ArvadosFileWriterTestCase.MockKeep({})
595         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
596                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
597                                                  "replication_desired":None},
598                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
599                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
600                                                  "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
601         with Collection(api_client=api, keep_client=keep) as c:
602             w1 = c.open("count1.txt", "wb")
603             w2 = c.open("count2.txt", "wb")
604             w1.write("01234567")
605             w2.write("abcdefgh")
606             self.assertEqual(w1.size(), 8)
607             self.assertEqual(w2.size(), 8)
608
609             self.assertIsNone(c.manifest_locator())
610             self.assertTrue(c.modified())
611             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
612             c.save_new("test_create_multiple")
613             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
614             self.assertFalse(c.modified())
615             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
616
617
618 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
619     class MockParent(object):
620         class MockBlockMgr(object):
621             def __init__(self, blocks, nocache):
622                 self.blocks = blocks
623                 self.nocache = nocache
624
625             def block_prefetch(self, loc):
626                 pass
627
628             def get_block_contents(self, loc, num_retries=0, cache_only=False):
629                 if self.nocache and cache_only:
630                     return None
631                 return self.blocks[loc]
632
633         def __init__(self, blocks, nocache):
634             self.blocks = blocks
635             self.nocache = nocache
636             self.lock = arvados.arvfile.NoopLock()
637
638         def root_collection(self):
639             return self
640
641         def _my_block_manager(self):
642             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
643
644
645     def make_count_reader(self, nocache=False):
646         stream = []
647         n = 0
648         blocks = {}
649         for d in [b'01234', b'34567', b'67890']:
650             loc = tutil.str_keep_locator(d)
651             blocks[loc] = d
652             stream.append(Range(loc, n, len(d)))
653             n += len(d)
654         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
655         return ArvadosFileReader(af, mode="rb")
656
657     def test_read_block_crossing_behavior(self):
658         # read() needs to return all the data requested if possible, even if it
659         # crosses uncached blocks: https://arvados.org/issues/5856
660         sfile = self.make_count_reader(nocache=True)
661         self.assertEqual(b'12345678', sfile.read(8))
662
663     def test_successive_reads(self):
664         # Override StreamFileReaderTestCase.test_successive_reads
665         sfile = self.make_count_reader(nocache=True)
666         self.assertEqual(b'1234', sfile.read(4))
667         self.assertEqual(b'5678', sfile.read(4))
668         self.assertEqual(b'9', sfile.read(4))
669         self.assertEqual(b'', sfile.read(4))
670
671     def test_tell_after_block_read(self):
672         # Override StreamFileReaderTestCase.test_tell_after_block_read
673         sfile = self.make_count_reader(nocache=True)
674         self.assertEqual(b'12345678', sfile.read(8))
675         self.assertEqual(8, sfile.tell())
676
677     def test_prefetch(self):
678         keep = ArvadosFileWriterTestCase.MockKeep({
679             "2e9ec317e197819358fbc43afca7d837+8": b"01234567",
680             "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh",
681         })
682         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
683             r = c.open("count.txt", "rb")
684             self.assertEqual(b"0123", r.read(4))
685         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
686         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
687
688     def test__eq__from_manifest(self):
689         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
690             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
691                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
692                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
693
694     def test__eq__from_writes(self):
695         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
696             with Collection() as c2:
697                 f = c2.open("count1.txt", "wb")
698                 f.write("0123456789")
699
700                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
701                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
702
703     def test__ne__(self):
704         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
705             with Collection() as c2:
706                 f = c2.open("count1.txt", "wb")
707                 f.write("1234567890")
708
709                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
710                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
711
712
713 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
714     def reader_for(self, coll_name, **kwargs):
715         stream = []
716         segments = []
717         n = 0
718         for d in self.manifest_for(coll_name).split():
719             try:
720                 k = KeepLocator(d)
721                 segments.append(Range(n, n, k.size))
722                 stream.append(Range(d, n, k.size))
723                 n += k.size
724             except ValueError:
725                 pass
726
727         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
728         blockmanager.prefetch_enabled = False
729         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
730         af = ArvadosFile(col, "test",
731                          stream=stream,
732                          segments=segments)
733         kwargs.setdefault('mode', 'rb')
734         return ArvadosFileReader(af, **kwargs)
735
736     def read_for_test(self, reader, byte_count, **kwargs):
737         return reader.read(byte_count, **kwargs)
738
739
740 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
741     def read_for_test(self, reader, byte_count, **kwargs):
742         return reader.readfrom(0, byte_count, **kwargs)
743
744
745 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
746     def read_for_test(self, reader, byte_count, **kwargs):
747         return b''.join(reader.readall(**kwargs))
748
749
750 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
751     def read_for_test(self, reader, byte_count, **kwargs):
752         return b''.join(reader.readall_decompressed(**kwargs))
753
754
755 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
756     def read_for_test(self, reader, byte_count, **kwargs):
757         return ''.join(reader.readlines(**kwargs)).encode()
758
759
760 class ArvadosFileTestCase(unittest.TestCase):
761     def datetime_to_hex(self, dt):
762         return hex(int(time.mktime(dt.timetuple())))[2:]
763
764     def test_permission_expired(self):
765         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
766         now = datetime.datetime.now()
767         a_week_ago = now - datetime.timedelta(days=7)
768         a_month_ago = now - datetime.timedelta(days=30)
769         a_week_from_now = now + datetime.timedelta(days=7)
770         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
771             self.assertFalse(c.find('count.txt').permission_expired())
772         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
773             f = c.find('count.txt')
774             self.assertTrue(f.permission_expired())
775             self.assertTrue(f.permission_expired(a_week_from_now))
776             self.assertFalse(f.permission_expired(a_month_ago))
777
778
779 class BlockManagerTest(unittest.TestCase):
780     def test_bufferblock_append(self):
781         keep = ArvadosFileWriterTestCase.MockKeep({})
782         with arvados.arvfile._BlockManager(keep) as blockmanager:
783             bufferblock = blockmanager.alloc_bufferblock()
784             bufferblock.append("foo")
785
786             self.assertEqual(bufferblock.size(), 3)
787             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
788             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
789
790             bufferblock.append("bar")
791
792             self.assertEqual(bufferblock.size(), 6)
793             self.assertEqual(bufferblock.buffer_view[0:6], b"foobar")
794             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
795
796             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
797             with self.assertRaises(arvados.errors.AssertionError):
798                 bufferblock.append("bar")
799
800     def test_bufferblock_dup(self):
801         keep = ArvadosFileWriterTestCase.MockKeep({})
802         with arvados.arvfile._BlockManager(keep) as blockmanager:
803             bufferblock = blockmanager.alloc_bufferblock()
804             bufferblock.append("foo")
805
806             self.assertEqual(bufferblock.size(), 3)
807             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
808             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
809             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
810
811             bufferblock2 = blockmanager.dup_block(bufferblock, None)
812             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
813
814             bufferblock2.append("bar")
815
816             self.assertEqual(bufferblock2.size(), 6)
817             self.assertEqual(bufferblock2.buffer_view[0:6], b"foobar")
818             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
819
820             self.assertEqual(bufferblock.size(), 3)
821             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
822             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
823
824     def test_bufferblock_get(self):
825         keep = ArvadosFileWriterTestCase.MockKeep({
826             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
827         })
828         with arvados.arvfile._BlockManager(keep) as blockmanager:
829             bufferblock = blockmanager.alloc_bufferblock()
830             bufferblock.append("foo")
831
832             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), b"0123456789")
833             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), b"foo")
834
835     def test_bufferblock_commit(self):
836         mockkeep = mock.MagicMock()
837         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
838             bufferblock = blockmanager.alloc_bufferblock()
839             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
840             def flush(sync=None):
841                 blockmanager.commit_bufferblock(bufferblock, sync)
842             bufferblock.owner.flush.side_effect = flush
843             bufferblock.append("foo")
844             blockmanager.commit_all()
845             self.assertTrue(bufferblock.owner.flush.called)
846             self.assertTrue(mockkeep.put.called)
847             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
848             self.assertIsNone(bufferblock.buffer_view)
849
850     def test_bufferblock_commit_pending(self):
851         # Test for bug #7225
852         mockkeep = mock.MagicMock()
853         mockkeep.put.side_effect = lambda x: time.sleep(1)
854         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
855             bufferblock = blockmanager.alloc_bufferblock()
856             bufferblock.append("foo")
857
858             blockmanager.commit_bufferblock(bufferblock, False)
859             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
860
861             blockmanager.commit_bufferblock(bufferblock, True)
862             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
863
864
865     def test_bufferblock_commit_with_error(self):
866         mockkeep = mock.MagicMock()
867         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
868         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
869             bufferblock = blockmanager.alloc_bufferblock()
870             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
871             def flush(sync=None):
872                 blockmanager.commit_bufferblock(bufferblock, sync)
873             bufferblock.owner.flush.side_effect = flush
874             bufferblock.append("foo")
875             with self.assertRaises(arvados.errors.KeepWriteError) as err:
876                 blockmanager.commit_all()
877             self.assertTrue(bufferblock.owner.flush.called)
878             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
879             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)