Merge branch '15361-acr-create-workflow' refs #15361
[arvados.git] / sdk / python / tests / test_arvfile.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from builtins import hex
7 from builtins import str
8 from builtins import range
9 from builtins import object
10 import bz2
11 import datetime
12 import gzip
13 import io
14 import mock
15 import os
16 import unittest
17 import time
18
19 import arvados
20 from arvados._ranges import Range
21 from arvados.keep import KeepLocator
22 from arvados.collection import Collection, CollectionReader
23 from arvados.arvfile import ArvadosFile, ArvadosFileReader
24
25 from . import arvados_testutil as tutil
26 from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
27
28 class ArvadosFileWriterTestCase(unittest.TestCase):
29     class MockKeep(object):
30         def __init__(self, blocks):
31             self.blocks = blocks
32             self.requests = []
33         def get(self, locator, num_retries=0):
34             self.requests.append(locator)
35             return self.blocks.get(locator)
36         def get_from_cache(self, locator):
37             self.requests.append(locator)
38             return self.blocks.get(locator)
39         def put(self, data, num_retries=None, copies=None):
40             pdh = tutil.str_keep_locator(data)
41             self.blocks[pdh] = bytes(data)
42             return pdh
43
44     class MockApi(object):
45         def __init__(self, b, r):
46             self.body = b
47             self.response = r
48             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
49             self._rootDesc = {}
50         class MockSchema(object):
51             def __init__(self):
52                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
53         class MockCollections(object):
54             def __init__(self, b, r):
55                 self.body = b
56                 self.response = r
57             class Execute(object):
58                 def __init__(self, r):
59                     self.response = r
60                 def execute(self, num_retries=None):
61                     return self.response
62             def create(self, ensure_unique_name=False, body=None):
63                 if body != self.body:
64                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
65                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
66             def update(self, uuid=None, body=None):
67                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
68         def collections(self):
69             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
70
71
72     def test_truncate(self):
73         keep = ArvadosFileWriterTestCase.MockKeep({
74             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
75         })
76         api = ArvadosFileWriterTestCase.MockApi({
77             "name": "test_truncate",
78             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
79             "replication_desired": None,
80         }, {
81             "uuid": "zzzzz-4zz18-mockcollection0",
82             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
83             "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52",
84         })
85         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
86                         api_client=api, keep_client=keep) as c:
87             writer = c.open("count.txt", "rb+")
88             self.assertEqual(writer.size(), 10)
89             self.assertEqual(b"0123456789", writer.read(12))
90
91             writer.truncate(8)
92
93             # Make sure reading off the end doesn't break
94             self.assertEqual(b"", writer.read(12))
95
96             self.assertEqual(writer.size(), 8)
97             writer.seek(0, os.SEEK_SET)
98             self.assertEqual(b"01234567", writer.read(12))
99
100             self.assertIsNone(c.manifest_locator())
101             self.assertTrue(c.modified())
102             c.save_new("test_truncate")
103             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
104             self.assertFalse(c.modified())
105
106
107     def test_truncate2(self):
108         keep = ArvadosFileWriterTestCase.MockKeep({
109             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
110         })
111         api = ArvadosFileWriterTestCase.MockApi({
112             "name": "test_truncate2",
113             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
114             "replication_desired": None,
115         }, {
116             "uuid": "zzzzz-4zz18-mockcollection0",
117             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
118             "portable_data_hash": "272da898abdf86ddc71994835e3155f8+95",
119         })
120         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
121                         api_client=api, keep_client=keep) as c:
122             writer = c.open("count.txt", "rb+")
123             self.assertEqual(writer.size(), 10)
124             self.assertEqual(b"0123456789", writer.read(12))
125
126             # extend file size
127             writer.truncate(12)
128
129             self.assertEqual(writer.size(), 12)
130             writer.seek(0, os.SEEK_SET)
131             self.assertEqual(b"0123456789\x00\x00", writer.read(12))
132
133             self.assertIsNone(c.manifest_locator())
134             self.assertTrue(c.modified())
135             c.save_new("test_truncate2")
136             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
137             self.assertFalse(c.modified())
138
139     def test_truncate3(self):
140         keep = ArvadosFileWriterTestCase.MockKeep({
141             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
142             "a925576942e94b2ef57a066101b48876+10": b"abcdefghij",
143         })
144         api = ArvadosFileWriterTestCase.MockApi({
145             "name": "test_truncate",
146             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
147             "replication_desired": None,
148         }, {
149             "uuid": "zzzzz-4zz18-mockcollection0",
150             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
151             "portable_data_hash": "7fcd0eaac3aad4c31a6a0e756475da92+52",
152         })
153         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n',
154                         api_client=api, keep_client=keep) as c:
155             writer = c.open("count.txt", "rb+")
156             self.assertEqual(writer.size(), 20)
157             self.assertEqual(b"0123456789ab", writer.read(12))
158             self.assertEqual(12, writer.tell())
159
160             writer.truncate(8)
161
162             # Make sure reading off the end doesn't break
163             self.assertEqual(12, writer.tell())
164             self.assertEqual(b"", writer.read(12))
165
166             self.assertEqual(writer.size(), 8)
167             self.assertEqual(2, writer.seek(-10, os.SEEK_CUR))
168             self.assertEqual(b"234567", writer.read(12))
169
170             self.assertIsNone(c.manifest_locator())
171             self.assertTrue(c.modified())
172             c.save_new("test_truncate")
173             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
174             self.assertFalse(c.modified())
175
176
177
178     def test_write_to_end(self):
179         keep = ArvadosFileWriterTestCase.MockKeep({
180             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
181         })
182         api = ArvadosFileWriterTestCase.MockApi({
183             "name": "test_append",
184             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
185             "replication_desired": None,
186         }, {
187             "uuid": "zzzzz-4zz18-mockcollection0",
188             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
189             "portable_data_hash": "c5c3af76565c8efb6a806546bcf073f3+88",
190         })
191         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
192                              api_client=api, keep_client=keep) as c:
193             writer = c.open("count.txt", "rb+")
194             self.assertEqual(writer.size(), 10)
195
196             self.assertEqual(5, writer.seek(5, os.SEEK_SET))
197             self.assertEqual(b"56789", writer.read(8))
198
199             writer.seek(10, os.SEEK_SET)
200             writer.write("foo")
201             self.assertEqual(writer.size(), 13)
202
203             writer.seek(5, os.SEEK_SET)
204             self.assertEqual(b"56789foo", writer.read(8))
205
206             self.assertIsNone(c.manifest_locator())
207             self.assertTrue(c.modified())
208             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
209
210             c.save_new("test_append")
211             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
212             self.assertFalse(c.modified())
213             self.assertEqual(b"foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
214
215
216     def test_append(self):
217         keep = ArvadosFileWriterTestCase.MockKeep({
218             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
219         })
220         for (mode, convert) in (
221                 ('a+', lambda data: data.decode(encoding='utf-8')),
222                 ('at+', lambda data: data.decode(encoding='utf-8')),
223                 ('ab+', lambda data: data)):
224             c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
225             writer = c.open("count.txt", mode)
226             self.assertEqual(writer.read(20), convert(b"0123456789"))
227
228             writer.seek(0, os.SEEK_SET)
229             writer.write(convert(b"hello"))
230             self.assertEqual(writer.read(), convert(b""))
231             if 'b' in mode:
232                 writer.seek(-5, os.SEEK_CUR)
233                 self.assertEqual(writer.read(3), convert(b"hel"))
234                 self.assertEqual(writer.read(), convert(b"lo"))
235             else:
236                 with self.assertRaises(IOError):
237                     writer.seek(-5, os.SEEK_CUR)
238                 with self.assertRaises(IOError):
239                     writer.seek(-3, os.SEEK_END)
240             writer.seek(0, os.SEEK_SET)
241             writer.read(7)
242             self.assertEqual(7, writer.tell())
243             self.assertEqual(7, writer.seek(7, os.SEEK_SET))
244
245             writer.seek(0, os.SEEK_SET)
246             self.assertEqual(writer.read(), convert(b"0123456789hello"))
247
248             writer.seek(0)
249             writer.write(convert(b"world"))
250             self.assertEqual(writer.read(), convert(b""))
251             writer.seek(0)
252             self.assertEqual(writer.read(), convert(b"0123456789helloworld"))
253
254             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
255
256     def test_write_at_beginning(self):
257         keep = ArvadosFileWriterTestCase.MockKeep({
258             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
259         })
260         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
261                              keep_client=keep) as c:
262             writer = c.open("count.txt", "rb+")
263             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
264             writer.seek(0, os.SEEK_SET)
265             writer.write("foo")
266             self.assertEqual(writer.size(), 10)
267             self.assertEqual(b"foo3456789", writer.readfrom(0, 13))
268             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
269
270     def test_write_empty(self):
271         keep = ArvadosFileWriterTestCase.MockKeep({})
272         with Collection(keep_client=keep) as c:
273             writer = c.open("count.txt", "wb")
274             self.assertEqual(writer.size(), 0)
275             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
276
277     def test_save_manifest_text(self):
278         keep = ArvadosFileWriterTestCase.MockKeep({})
279         with Collection(keep_client=keep) as c:
280             writer = c.open("count.txt", "wb")
281             writer.write(b"0123456789")
282             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
283             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
284
285             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
286             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
287
288     def test_get_manifest_text_commits(self):
289          keep = ArvadosFileWriterTestCase.MockKeep({})
290          with Collection(keep_client=keep) as c:
291              writer = c.open("count.txt", "wb")
292              writer.write("0123456789")
293              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
294              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
295              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
296              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
297
298
299     def test_write_in_middle(self):
300         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
301         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
302                              keep_client=keep) as c:
303             writer = c.open("count.txt", "rb+")
304             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
305             writer.seek(3, os.SEEK_SET)
306             writer.write("foo")
307             self.assertEqual(writer.size(), 10)
308             self.assertEqual(b"012foo6789", writer.readfrom(0, 13))
309             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
310
311     def test_write_at_end(self):
312         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
313         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
314                              keep_client=keep) as c:
315             writer = c.open("count.txt", "rb+")
316             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
317             writer.seek(7, os.SEEK_SET)
318             writer.write("foo")
319             self.assertEqual(writer.size(), 10)
320             self.assertEqual(b"0123456foo", writer.readfrom(0, 13))
321             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
322
323     def test_write_across_segment_boundary(self):
324         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
325         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
326                              keep_client=keep) as c:
327             writer = c.open("count.txt", "rb+")
328             self.assertEqual(b"012345678901234", writer.readfrom(0, 15))
329             writer.seek(7, os.SEEK_SET)
330             writer.write("foobar")
331             self.assertEqual(writer.size(), 20)
332             self.assertEqual(b"0123456foobar34", writer.readfrom(0, 15))
333             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
334
335     def test_write_across_several_segments(self):
336         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
337         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
338                              keep_client=keep) as c:
339             writer = c.open("count.txt", "rb+")
340             self.assertEqual(b"012301230123", writer.readfrom(0, 15))
341             writer.seek(2, os.SEEK_SET)
342             writer.write("abcdefg")
343             self.assertEqual(writer.size(), 12)
344             self.assertEqual(b"01abcdefg123", writer.readfrom(0, 15))
345             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
346
347     def test_write_large(self):
348         keep = ArvadosFileWriterTestCase.MockKeep({})
349         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
350                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
351                                                  "replication_desired":None},
352                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
353                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
354                                                  "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
355         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
356                              api_client=api, keep_client=keep) as c:
357             writer = c.open("count.txt", "rb+")
358             text = "0123456789" * 100
359             for b in range(0, 100000):
360                 writer.write(text)
361             self.assertEqual(writer.size(), 100000000)
362
363             self.assertIsNone(c.manifest_locator())
364             self.assertTrue(c.modified())
365             c.save_new("test_write_large")
366             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
367             self.assertFalse(c.modified())
368
369
370     def test_large_write(self):
371         keep = ArvadosFileWriterTestCase.MockKeep({})
372         api = ArvadosFileWriterTestCase.MockApi({}, {})
373         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
374                              api_client=api, keep_client=keep) as c:
375             writer = c.open("count.txt", "rb+")
376             self.assertEqual(writer.size(), 0)
377
378             text = "0123456789"
379             writer.write(text)
380             text = "0123456789" * 9999999
381             writer.write(text)
382             self.assertEqual(writer.size(), 100000000)
383
384             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
385
386     def test_sparse_write(self):
387         keep = ArvadosFileWriterTestCase.MockKeep({})
388         api = ArvadosFileWriterTestCase.MockApi({}, {})
389         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
390                              api_client=api, keep_client=keep) as c:
391             writer = c.open("count.txt", "rb+")
392             self.assertEqual(writer.size(), 0)
393
394             text = b"0123456789"
395             writer.seek(2)
396             writer.write(text)
397             self.assertEqual(writer.size(), 12)
398             writer.seek(0, os.SEEK_SET)
399             self.assertEqual(writer.read(), b"\x00\x00"+text)
400
401             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n")
402
403
404     def test_sparse_write2(self):
405         keep = ArvadosFileWriterTestCase.MockKeep({})
406         api = ArvadosFileWriterTestCase.MockApi({}, {})
407         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
408                              api_client=api, keep_client=keep) as c:
409             writer = c.open("count.txt", "rb+")
410             self.assertEqual(writer.size(), 0)
411
412             text = "0123456789"
413             writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2)
414             writer.write(text)
415             self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12)
416             writer.seek(0, os.SEEK_SET)
417
418             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n")
419
420
421     def test_sparse_write3(self):
422         keep = ArvadosFileWriterTestCase.MockKeep({})
423         api = ArvadosFileWriterTestCase.MockApi({}, {})
424         for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]:
425             with Collection() as c:
426                 writer = c.open("count.txt", "rb+")
427                 self.assertEqual(writer.size(), 0)
428
429                 for i in r:
430                     w = ("%s" % i) * 10
431                     writer.seek(i*10)
432                     writer.write(w.encode())
433                 writer.seek(0)
434                 self.assertEqual(writer.read(), b"00000000001111111111222222222233333333334444444444")
435
436     def test_sparse_write4(self):
437         keep = ArvadosFileWriterTestCase.MockKeep({})
438         api = ArvadosFileWriterTestCase.MockApi({}, {})
439         for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]:
440             with Collection() as c:
441                 writer = c.open("count.txt", "rb+")
442                 self.assertEqual(writer.size(), 0)
443
444                 for i in r:
445                     w = ("%s" % i) * 10
446                     writer.seek(i*10)
447                     writer.write(w.encode())
448                 writer.seek(0)
449                 self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444")
450
451
452     def test_rewrite_on_empty_file(self):
453         keep = ArvadosFileWriterTestCase.MockKeep({})
454         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
455                              keep_client=keep) as c:
456             writer = c.open("count.txt", "rb+")
457             for b in range(0, 10):
458                 writer.seek(0, os.SEEK_SET)
459                 writer.write("0123456789")
460
461             self.assertEqual(writer.size(), 10)
462             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
463             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
464             writer.flush()
465             self.assertEqual(writer.size(), 10)
466             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
467             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
468
469     def test_rewrite_append_existing_file(self):
470         keep = ArvadosFileWriterTestCase.MockKeep({
471             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
472         })
473         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
474                              keep_client=keep) as c:
475             writer = c.open("count.txt", "rb+")
476             for b in range(0, 10):
477                 writer.seek(10, os.SEEK_SET)
478                 writer.write("abcdefghij")
479
480             self.assertEqual(writer.size(), 20)
481             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
482             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
483
484             writer.arvadosfile.flush()
485             self.assertEqual(writer.size(), 20)
486             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
487             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
488
489     def test_rewrite_over_existing_file(self):
490         keep = ArvadosFileWriterTestCase.MockKeep({
491             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
492         })
493         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
494                              keep_client=keep) as c:
495             writer = c.open("count.txt", "rb+")
496             for b in range(0, 10):
497                 writer.seek(5, os.SEEK_SET)
498                 writer.write("abcdefghij")
499
500             self.assertEqual(writer.size(), 15)
501             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
502             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
503
504             writer.arvadosfile.flush()
505
506             self.assertEqual(writer.size(), 15)
507             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
508             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
509
510     def test_write_large_rewrite(self):
511         keep = ArvadosFileWriterTestCase.MockKeep({})
512         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
513                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
514                                                  "replication_desired":None},
515                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
516                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
517                                                  "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
518         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
519                              api_client=api, keep_client=keep) as c:
520             writer = c.open("count.txt", "rb+")
521             text = b''.join([b"0123456789" for a in range(0, 100)])
522             for b in range(0, 100000):
523                 writer.write(text)
524             writer.seek(0, os.SEEK_SET)
525             writer.write("foo")
526             self.assertEqual(writer.size(), 100000000)
527
528             self.assertIsNone(c.manifest_locator())
529             self.assertTrue(c.modified())
530             c.save_new("test_write_large")
531             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
532             self.assertFalse(c.modified())
533
534     def test_create(self):
535         keep = ArvadosFileWriterTestCase.MockKeep({})
536         api = ArvadosFileWriterTestCase.MockApi({
537             "name":"test_create",
538             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
539             "replication_desired":None,
540         }, {
541             "uuid":"zzzzz-4zz18-mockcollection0",
542             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
543             "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51",
544         })
545         with Collection(api_client=api, keep_client=keep) as c:
546             writer = c.open("count.txt", "wb+")
547             self.assertEqual(writer.size(), 0)
548             writer.write("01234567")
549             self.assertEqual(writer.size(), 8)
550
551             self.assertIsNone(c.manifest_locator())
552             self.assertTrue(c.modified())
553             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
554             c.save_new("test_create")
555             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
556             self.assertFalse(c.modified())
557             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
558
559
560     def test_create_subdir(self):
561         keep = ArvadosFileWriterTestCase.MockKeep({})
562         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
563                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
564                                                  "replication_desired":None},
565                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
566                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
567                                                  "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
568         with Collection(api_client=api, keep_client=keep) as c:
569             self.assertIsNone(c.api_response())
570             writer = c.open("foo/bar/count.txt", "wb+")
571             writer.write("01234567")
572             self.assertFalse(c.committed())
573             c.save_new("test_create")
574             self.assertTrue(c.committed())
575             self.assertEqual(c.api_response(), api.response)
576
577     def test_overwrite(self):
578         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
579         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
580                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
581                                                  "replication_desired":None},
582                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
583                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
584                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
585         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
586                              api_client=api, keep_client=keep) as c:
587             writer = c.open("count.txt", "wb+")
588             self.assertEqual(writer.size(), 0)
589             writer.write("01234567")
590             self.assertEqual(writer.size(), 8)
591
592             self.assertIsNone(c.manifest_locator())
593             self.assertTrue(c.modified())
594             c.save_new("test_overwrite")
595             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
596             self.assertFalse(c.modified())
597
598     def test_file_not_found(self):
599         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
600             with self.assertRaises(IOError):
601                 writer = c.open("nocount.txt", "rb")
602
603     def test_cannot_open_directory(self):
604         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
605             with self.assertRaises(IOError):
606                 writer = c.open(".", "rb")
607
608     def test_create_multiple(self):
609         keep = ArvadosFileWriterTestCase.MockKeep({})
610         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
611                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
612                                                  "replication_desired":None},
613                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
614                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
615                                                  "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
616         with Collection(api_client=api, keep_client=keep) as c:
617             w1 = c.open("count1.txt", "wb")
618             w2 = c.open("count2.txt", "wb")
619             w1.write("01234567")
620             w2.write("abcdefgh")
621             self.assertEqual(w1.size(), 8)
622             self.assertEqual(w2.size(), 8)
623
624             self.assertIsNone(c.manifest_locator())
625             self.assertTrue(c.modified())
626             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
627             c.save_new("test_create_multiple")
628             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
629             self.assertFalse(c.modified())
630             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
631
632
633 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
634     class MockParent(object):
635         class MockBlockMgr(object):
636             def __init__(self, blocks, nocache):
637                 self.blocks = blocks
638                 self.nocache = nocache
639
640             def block_prefetch(self, loc):
641                 pass
642
643             def get_block_contents(self, loc, num_retries=0, cache_only=False):
644                 if self.nocache and cache_only:
645                     return None
646                 return self.blocks[loc]
647
648         def __init__(self, blocks, nocache):
649             self.blocks = blocks
650             self.nocache = nocache
651             self.lock = arvados.arvfile.NoopLock()
652
653         def root_collection(self):
654             return self
655
656         def _my_block_manager(self):
657             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
658
659
660     def make_count_reader(self, nocache=False):
661         stream = []
662         n = 0
663         blocks = {}
664         for d in [b'01234', b'34567', b'67890']:
665             loc = tutil.str_keep_locator(d)
666             blocks[loc] = d
667             stream.append(Range(loc, n, len(d)))
668             n += len(d)
669         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
670         return ArvadosFileReader(af, mode="rb")
671
672     def test_read_block_crossing_behavior(self):
673         # read() needs to return all the data requested if possible, even if it
674         # crosses uncached blocks: https://arvados.org/issues/5856
675         sfile = self.make_count_reader(nocache=True)
676         self.assertEqual(b'12345678', sfile.read(8))
677
678     def test_successive_reads(self):
679         # Override StreamFileReaderTestCase.test_successive_reads
680         sfile = self.make_count_reader(nocache=True)
681         self.assertEqual(b'1234', sfile.read(4))
682         self.assertEqual(b'5678', sfile.read(4))
683         self.assertEqual(b'9', sfile.read(4))
684         self.assertEqual(b'', sfile.read(4))
685
686     def test_tell_after_block_read(self):
687         # Override StreamFileReaderTestCase.test_tell_after_block_read
688         sfile = self.make_count_reader(nocache=True)
689         self.assertEqual(b'12345678', sfile.read(8))
690         self.assertEqual(8, sfile.tell())
691
692     def test_prefetch(self):
693         keep = ArvadosFileWriterTestCase.MockKeep({
694             "2e9ec317e197819358fbc43afca7d837+8": b"01234567",
695             "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh",
696         })
697         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
698             r = c.open("count.txt", "rb")
699             self.assertEqual(b"0123", r.read(4))
700         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
701         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
702
703     def test__eq__from_manifest(self):
704         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
705             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
706                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
707                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
708
709     def test__eq__from_writes(self):
710         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
711             with Collection() as c2:
712                 f = c2.open("count1.txt", "wb")
713                 f.write("0123456789")
714
715                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
716                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
717
718     def test__ne__(self):
719         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
720             with Collection() as c2:
721                 f = c2.open("count1.txt", "wb")
722                 f.write("1234567890")
723
724                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
725                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
726
727
728 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
729     def reader_for(self, coll_name, **kwargs):
730         stream = []
731         segments = []
732         n = 0
733         for d in self.manifest_for(coll_name).split():
734             try:
735                 k = KeepLocator(d)
736                 segments.append(Range(n, n, k.size))
737                 stream.append(Range(d, n, k.size))
738                 n += k.size
739             except ValueError:
740                 pass
741
742         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
743         blockmanager.prefetch_enabled = False
744         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
745         af = ArvadosFile(col, "test",
746                          stream=stream,
747                          segments=segments)
748         kwargs.setdefault('mode', 'rb')
749         return ArvadosFileReader(af, **kwargs)
750
751     def read_for_test(self, reader, byte_count, **kwargs):
752         return reader.read(byte_count, **kwargs)
753
754
755 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
756     def read_for_test(self, reader, byte_count, **kwargs):
757         return reader.readfrom(0, byte_count, **kwargs)
758
759
760 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
761     def read_for_test(self, reader, byte_count, **kwargs):
762         return b''.join(reader.readall(**kwargs))
763
764
765 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
766     def read_for_test(self, reader, byte_count, **kwargs):
767         return b''.join(reader.readall_decompressed(**kwargs))
768
769
770 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
771     def read_for_test(self, reader, byte_count, **kwargs):
772         return ''.join(reader.readlines(**kwargs)).encode()
773
774
775 class ArvadosFileTestCase(unittest.TestCase):
776     def datetime_to_hex(self, dt):
777         return hex(int(time.mktime(dt.timetuple())))[2:]
778
779     def test_permission_expired(self):
780         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
781         now = datetime.datetime.now()
782         a_week_ago = now - datetime.timedelta(days=7)
783         a_month_ago = now - datetime.timedelta(days=30)
784         a_week_from_now = now + datetime.timedelta(days=7)
785         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
786             self.assertFalse(c.find('count.txt').permission_expired())
787         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
788             f = c.find('count.txt')
789             self.assertTrue(f.permission_expired())
790             self.assertTrue(f.permission_expired(a_week_from_now))
791             self.assertFalse(f.permission_expired(a_month_ago))
792
793
794 class BlockManagerTest(unittest.TestCase):
795     def test_bufferblock_append(self):
796         keep = ArvadosFileWriterTestCase.MockKeep({})
797         with arvados.arvfile._BlockManager(keep) as blockmanager:
798             bufferblock = blockmanager.alloc_bufferblock()
799             bufferblock.append("foo")
800
801             self.assertEqual(bufferblock.size(), 3)
802             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
803             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
804
805             bufferblock.append("bar")
806
807             self.assertEqual(bufferblock.size(), 6)
808             self.assertEqual(bufferblock.buffer_view[0:6], b"foobar")
809             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
810
811             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
812             with self.assertRaises(arvados.errors.AssertionError):
813                 bufferblock.append("bar")
814
815     def test_bufferblock_dup(self):
816         keep = ArvadosFileWriterTestCase.MockKeep({})
817         with arvados.arvfile._BlockManager(keep) as blockmanager:
818             bufferblock = blockmanager.alloc_bufferblock()
819             bufferblock.append("foo")
820
821             self.assertEqual(bufferblock.size(), 3)
822             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
823             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
824             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
825
826             bufferblock2 = blockmanager.dup_block(bufferblock, None)
827             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
828
829             bufferblock2.append("bar")
830
831             self.assertEqual(bufferblock2.size(), 6)
832             self.assertEqual(bufferblock2.buffer_view[0:6], b"foobar")
833             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
834
835             self.assertEqual(bufferblock.size(), 3)
836             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
837             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
838
839     def test_bufferblock_get(self):
840         keep = ArvadosFileWriterTestCase.MockKeep({
841             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
842         })
843         with arvados.arvfile._BlockManager(keep) as blockmanager:
844             bufferblock = blockmanager.alloc_bufferblock()
845             bufferblock.append("foo")
846
847             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), b"0123456789")
848             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), b"foo")
849
850     def test_bufferblock_commit(self):
851         mockkeep = mock.MagicMock()
852         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
853             bufferblock = blockmanager.alloc_bufferblock()
854             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
855             def flush(sync=None):
856                 blockmanager.commit_bufferblock(bufferblock, sync)
857             bufferblock.owner.flush.side_effect = flush
858             bufferblock.append("foo")
859             blockmanager.commit_all()
860             self.assertTrue(bufferblock.owner.flush.called)
861             self.assertTrue(mockkeep.put.called)
862             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
863             self.assertIsNone(bufferblock.buffer_view)
864
865     def test_bufferblock_commit_pending(self):
866         # Test for bug #7225
867         mockkeep = mock.MagicMock()
868         mockkeep.put.side_effect = lambda x: time.sleep(1)
869         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
870             bufferblock = blockmanager.alloc_bufferblock()
871             bufferblock.append("foo")
872
873             blockmanager.commit_bufferblock(bufferblock, False)
874             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
875
876             blockmanager.commit_bufferblock(bufferblock, True)
877             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
878
879
880     def test_bufferblock_commit_with_error(self):
881         mockkeep = mock.MagicMock()
882         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
883         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
884             bufferblock = blockmanager.alloc_bufferblock()
885             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
886             def flush(sync=None):
887                 blockmanager.commit_bufferblock(bufferblock, sync)
888             bufferblock.owner.flush.side_effect = flush
889             bufferblock.append("foo")
890             with self.assertRaises(arvados.errors.KeepWriteError) as err:
891                 blockmanager.commit_all()
892             self.assertTrue(bufferblock.owner.flush.called)
893             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
894             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)