19280: explicitly include/exclude primary
[arvados.git] / sdk / python / tests / test_arvfile.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from builtins import hex
7 from builtins import str
8 from builtins import range
9 from builtins import object
10 import datetime
11 import mock
12 import os
13 import unittest
14 import time
15
16 import arvados
17 from arvados._ranges import Range
18 from arvados.keep import KeepLocator
19 from arvados.collection import Collection
20 from arvados.arvfile import ArvadosFile, ArvadosFileReader
21
22 from . import arvados_testutil as tutil
23 from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
24
25 class ArvadosFileWriterTestCase(unittest.TestCase):
26     class MockKeep(object):
27         def __init__(self, blocks):
28             self.blocks = blocks
29             self.requests = []
30         def get(self, locator, num_retries=0, prefetch=False):
31             self.requests.append(locator)
32             return self.blocks.get(locator)
33         def get_from_cache(self, locator):
34             self.requests.append(locator)
35             return self.blocks.get(locator)
36         def put(self, data, num_retries=None, copies=None, classes=[]):
37             pdh = tutil.str_keep_locator(data)
38             self.blocks[pdh] = bytes(data)
39             return pdh
40
41     class MockApi(object):
42         def __init__(self, b, r):
43             self.body = b
44             self.response = r
45             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
46             self._rootDesc = {}
47         class MockSchema(object):
48             def __init__(self):
49                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
50         class MockCollections(object):
51             def __init__(self, b, r):
52                 self.body = b
53                 self.response = r
54             class Execute(object):
55                 def __init__(self, r):
56                     self.response = r
57                 def execute(self, num_retries=None):
58                     return self.response
59             def create(self, ensure_unique_name=False, body=None):
60                 if body != self.body:
61                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
62                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
63             def update(self, uuid=None, body=None):
64                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
65         def collections(self):
66             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
67
68
69     def test_truncate(self):
70         keep = ArvadosFileWriterTestCase.MockKeep({
71             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
72         })
73         api = ArvadosFileWriterTestCase.MockApi({
74             "name": "test_truncate",
75             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
76             "replication_desired": None,
77         }, {
78             "uuid": "zzzzz-4zz18-mockcollection0",
79             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
80             "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52",
81         })
82         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
83                         api_client=api, keep_client=keep) as c:
84             writer = c.open("count.txt", "rb+")
85             self.assertEqual(writer.size(), 10)
86             self.assertEqual(b"0123456789", writer.read(12))
87
88             writer.truncate(8)
89
90             # Make sure reading off the end doesn't break
91             self.assertEqual(b"", writer.read(12))
92
93             self.assertEqual(writer.size(), 8)
94             writer.seek(0, os.SEEK_SET)
95             self.assertEqual(b"01234567", writer.read(12))
96
97             self.assertIsNone(c.manifest_locator())
98             self.assertTrue(c.modified())
99             c.save_new("test_truncate")
100             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
101             self.assertFalse(c.modified())
102
103
104     def test_truncate2(self):
105         keep = ArvadosFileWriterTestCase.MockKeep({
106             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
107         })
108         api = ArvadosFileWriterTestCase.MockApi({
109             "name": "test_truncate2",
110             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
111             "replication_desired": None,
112         }, {
113             "uuid": "zzzzz-4zz18-mockcollection0",
114             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
115             "portable_data_hash": "272da898abdf86ddc71994835e3155f8+95",
116         })
117         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
118                         api_client=api, keep_client=keep) as c:
119             writer = c.open("count.txt", "rb+")
120             self.assertEqual(writer.size(), 10)
121             self.assertEqual(b"0123456789", writer.read(12))
122
123             # extend file size
124             writer.truncate(12)
125
126             self.assertEqual(writer.size(), 12)
127             writer.seek(0, os.SEEK_SET)
128             self.assertEqual(b"0123456789\x00\x00", writer.read(12))
129
130             self.assertIsNone(c.manifest_locator())
131             self.assertTrue(c.modified())
132             c.save_new("test_truncate2")
133             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
134             self.assertFalse(c.modified())
135
136     def test_truncate3(self):
137         keep = ArvadosFileWriterTestCase.MockKeep({
138             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
139             "a925576942e94b2ef57a066101b48876+10": b"abcdefghij",
140         })
141         api = ArvadosFileWriterTestCase.MockApi({
142             "name": "test_truncate",
143             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
144             "replication_desired": None,
145         }, {
146             "uuid": "zzzzz-4zz18-mockcollection0",
147             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
148             "portable_data_hash": "7fcd0eaac3aad4c31a6a0e756475da92+52",
149         })
150         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n',
151                         api_client=api, keep_client=keep) as c:
152             writer = c.open("count.txt", "rb+")
153             self.assertEqual(writer.size(), 20)
154             self.assertEqual(b"0123456789ab", writer.read(12))
155             self.assertEqual(12, writer.tell())
156
157             writer.truncate(8)
158
159             # Make sure reading off the end doesn't break
160             self.assertEqual(12, writer.tell())
161             self.assertEqual(b"", writer.read(12))
162
163             self.assertEqual(writer.size(), 8)
164             self.assertEqual(2, writer.seek(-10, os.SEEK_CUR))
165             self.assertEqual(b"234567", writer.read(12))
166
167             self.assertIsNone(c.manifest_locator())
168             self.assertTrue(c.modified())
169             c.save_new("test_truncate")
170             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
171             self.assertFalse(c.modified())
172
173     def test_write_to_end(self):
174         keep = ArvadosFileWriterTestCase.MockKeep({
175             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
176         })
177         api = ArvadosFileWriterTestCase.MockApi({
178             "name": "test_append",
179             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
180             "replication_desired": None,
181         }, {
182             "uuid": "zzzzz-4zz18-mockcollection0",
183             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
184             "portable_data_hash": "c5c3af76565c8efb6a806546bcf073f3+88",
185         })
186         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
187                              api_client=api, keep_client=keep) as c:
188             writer = c.open("count.txt", "rb+")
189             self.assertEqual(writer.size(), 10)
190
191             self.assertEqual(5, writer.seek(5, os.SEEK_SET))
192             self.assertEqual(b"56789", writer.read(8))
193
194             writer.seek(10, os.SEEK_SET)
195             writer.write("foo")
196             self.assertEqual(writer.size(), 13)
197
198             writer.seek(5, os.SEEK_SET)
199             self.assertEqual(b"56789foo", writer.read(8))
200
201             self.assertIsNone(c.manifest_locator())
202             self.assertTrue(c.modified())
203             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
204
205             c.save_new("test_append")
206             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
207             self.assertFalse(c.modified())
208             self.assertEqual(b"foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
209
210
211     def test_append(self):
212         keep = ArvadosFileWriterTestCase.MockKeep({
213             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
214         })
215         for (mode, convert) in (
216                 ('a+', lambda data: data.decode(encoding='utf-8')),
217                 ('at+', lambda data: data.decode(encoding='utf-8')),
218                 ('ab+', lambda data: data)):
219             c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
220             writer = c.open("count.txt", mode)
221             self.assertEqual(writer.read(20), convert(b"0123456789"))
222
223             writer.seek(0, os.SEEK_SET)
224             writer.write(convert(b"hello"))
225             self.assertEqual(writer.read(), convert(b""))
226             if 'b' in mode:
227                 writer.seek(-5, os.SEEK_CUR)
228                 self.assertEqual(writer.read(3), convert(b"hel"))
229                 self.assertEqual(writer.read(), convert(b"lo"))
230             else:
231                 with self.assertRaises(IOError):
232                     writer.seek(-5, os.SEEK_CUR)
233                 with self.assertRaises(IOError):
234                     writer.seek(-3, os.SEEK_END)
235             writer.seek(0, os.SEEK_SET)
236             writer.read(7)
237             self.assertEqual(7, writer.tell())
238             self.assertEqual(7, writer.seek(7, os.SEEK_SET))
239
240             writer.seek(0, os.SEEK_SET)
241             self.assertEqual(writer.read(), convert(b"0123456789hello"))
242
243             writer.seek(0)
244             writer.write(convert(b"world"))
245             self.assertEqual(writer.read(), convert(b""))
246             writer.seek(0)
247             self.assertEqual(writer.read(), convert(b"0123456789helloworld"))
248
249             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
250
251     def test_write_at_beginning(self):
252         keep = ArvadosFileWriterTestCase.MockKeep({
253             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
254         })
255         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
256                              keep_client=keep) as c:
257             writer = c.open("count.txt", "rb+")
258             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
259             writer.seek(0, os.SEEK_SET)
260             writer.write("foo")
261             self.assertEqual(writer.size(), 10)
262             self.assertEqual(b"foo3456789", writer.readfrom(0, 13))
263             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
264
265     def test_write_empty(self):
266         keep = ArvadosFileWriterTestCase.MockKeep({})
267         with Collection(keep_client=keep) as c:
268             writer = c.open("count.txt", "wb")
269             self.assertEqual(writer.size(), 0)
270             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
271
272     def test_save_manifest_text(self):
273         keep = ArvadosFileWriterTestCase.MockKeep({})
274         with Collection(keep_client=keep) as c:
275             writer = c.open("count.txt", "wb")
276             writer.write(b"0123456789")
277             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
278             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
279
280             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
281             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
282
283     def test_get_manifest_text_commits(self):
284          keep = ArvadosFileWriterTestCase.MockKeep({})
285          with Collection(keep_client=keep) as c:
286              writer = c.open("count.txt", "wb")
287              writer.write("0123456789")
288              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
289              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
290              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
291              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
292
293
294     def test_write_in_middle(self):
295         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
296         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
297                              keep_client=keep) as c:
298             writer = c.open("count.txt", "rb+")
299             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
300             writer.seek(3, os.SEEK_SET)
301             writer.write("foo")
302             self.assertEqual(writer.size(), 10)
303             self.assertEqual(b"012foo6789", writer.readfrom(0, 13))
304             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
305
306     def test_write_at_end(self):
307         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
308         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
309                              keep_client=keep) as c:
310             writer = c.open("count.txt", "rb+")
311             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
312             writer.seek(7, os.SEEK_SET)
313             writer.write("foo")
314             self.assertEqual(writer.size(), 10)
315             self.assertEqual(b"0123456foo", writer.readfrom(0, 13))
316             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
317
318     def test_write_across_segment_boundary(self):
319         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
320         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
321                              keep_client=keep) as c:
322             writer = c.open("count.txt", "rb+")
323             self.assertEqual(b"012345678901234", writer.readfrom(0, 15))
324             writer.seek(7, os.SEEK_SET)
325             writer.write("foobar")
326             self.assertEqual(writer.size(), 20)
327             self.assertEqual(b"0123456foobar34", writer.readfrom(0, 15))
328             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
329
330     def test_write_across_several_segments(self):
331         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
332         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
333                              keep_client=keep) as c:
334             writer = c.open("count.txt", "rb+")
335             self.assertEqual(b"012301230123", writer.readfrom(0, 15))
336             writer.seek(2, os.SEEK_SET)
337             writer.write("abcdefg")
338             self.assertEqual(writer.size(), 12)
339             self.assertEqual(b"01abcdefg123", writer.readfrom(0, 15))
340             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
341
342     def test_write_large(self):
343         keep = ArvadosFileWriterTestCase.MockKeep({})
344         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
345                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
346                                                  "replication_desired":None},
347                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
348                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
349                                                  "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
350         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
351                              api_client=api, keep_client=keep) as c:
352             writer = c.open("count.txt", "rb+")
353             text = "0123456789" * 100
354             for b in range(0, 100000):
355                 writer.write(text)
356             self.assertEqual(writer.size(), 100000000)
357
358             self.assertIsNone(c.manifest_locator())
359             self.assertTrue(c.modified())
360             c.save_new("test_write_large")
361             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
362             self.assertFalse(c.modified())
363
364     def test_large_write(self):
365         keep = ArvadosFileWriterTestCase.MockKeep({})
366         api = ArvadosFileWriterTestCase.MockApi({}, {})
367         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
368                              api_client=api, keep_client=keep) as c:
369             writer = c.open("count.txt", "rb+")
370             self.assertEqual(writer.size(), 0)
371
372             text = "0123456789"
373             writer.write(text)
374             text = "0123456789" * 9999999
375             writer.write(text)
376             self.assertEqual(writer.size(), 100000000)
377
378             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
379
380     def test_sparse_write(self):
381         keep = ArvadosFileWriterTestCase.MockKeep({})
382         api = ArvadosFileWriterTestCase.MockApi({}, {})
383         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
384                              api_client=api, keep_client=keep) as c:
385             writer = c.open("count.txt", "rb+")
386             self.assertEqual(writer.size(), 0)
387
388             text = b"0123456789"
389             writer.seek(2)
390             writer.write(text)
391             self.assertEqual(writer.size(), 12)
392             writer.seek(0, os.SEEK_SET)
393             self.assertEqual(writer.read(), b"\x00\x00"+text)
394
395             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n")
396
397     def test_sparse_write2(self):
398         keep = ArvadosFileWriterTestCase.MockKeep({})
399         api = ArvadosFileWriterTestCase.MockApi({}, {})
400         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
401                              api_client=api, keep_client=keep) as c:
402             writer = c.open("count.txt", "rb+")
403             self.assertEqual(writer.size(), 0)
404
405             text = "0123456789"
406             writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2)
407             writer.write(text)
408             self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12)
409             writer.seek(0, os.SEEK_SET)
410
411             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n")
412
413     def test_sparse_write3(self):
414         keep = ArvadosFileWriterTestCase.MockKeep({})
415         api = ArvadosFileWriterTestCase.MockApi({}, {})
416         for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]:
417             with Collection() as c:
418                 writer = c.open("count.txt", "rb+")
419                 self.assertEqual(writer.size(), 0)
420
421                 for i in r:
422                     w = ("%s" % i) * 10
423                     writer.seek(i*10)
424                     writer.write(w.encode())
425                 writer.seek(0)
426                 self.assertEqual(writer.read(), b"00000000001111111111222222222233333333334444444444")
427
428     def test_sparse_write4(self):
429         keep = ArvadosFileWriterTestCase.MockKeep({})
430         api = ArvadosFileWriterTestCase.MockApi({}, {})
431         for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]:
432             with Collection() as c:
433                 writer = c.open("count.txt", "rb+")
434                 self.assertEqual(writer.size(), 0)
435
436                 for i in r:
437                     w = ("%s" % i) * 10
438                     writer.seek(i*10)
439                     writer.write(w.encode())
440                 writer.seek(0)
441                 self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444")
442
443     def test_rewrite_on_empty_file(self):
444         keep = ArvadosFileWriterTestCase.MockKeep({})
445         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
446                              keep_client=keep) as c:
447             writer = c.open("count.txt", "rb+")
448             for b in range(0, 10):
449                 writer.seek(0, os.SEEK_SET)
450                 writer.write("0123456789")
451
452             self.assertEqual(writer.size(), 10)
453             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
454             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
455             writer.flush()
456             self.assertEqual(writer.size(), 10)
457             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
458             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
459
460     def test_rewrite_append_existing_file(self):
461         keep = ArvadosFileWriterTestCase.MockKeep({
462             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
463         })
464         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
465                              keep_client=keep) as c:
466             writer = c.open("count.txt", "rb+")
467             for b in range(0, 10):
468                 writer.seek(10, os.SEEK_SET)
469                 writer.write("abcdefghij")
470
471             self.assertEqual(writer.size(), 20)
472             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
473             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
474
475             writer.arvadosfile.flush()
476             self.assertEqual(writer.size(), 20)
477             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
478             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
479
480     def test_rewrite_over_existing_file(self):
481         keep = ArvadosFileWriterTestCase.MockKeep({
482             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
483         })
484         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
485                              keep_client=keep) as c:
486             writer = c.open("count.txt", "rb+")
487             for b in range(0, 10):
488                 writer.seek(5, os.SEEK_SET)
489                 writer.write("abcdefghij")
490
491             self.assertEqual(writer.size(), 15)
492             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
493             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
494
495             writer.arvadosfile.flush()
496
497             self.assertEqual(writer.size(), 15)
498             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
499             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
500
501     def test_write_large_rewrite(self):
502         keep = ArvadosFileWriterTestCase.MockKeep({})
503         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
504                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
505                                                  "replication_desired":None},
506                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
507                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
508                                                  "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
509         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
510                              api_client=api, keep_client=keep) as c:
511             writer = c.open("count.txt", "rb+")
512             text = b''.join([b"0123456789" for a in range(0, 100)])
513             for b in range(0, 100000):
514                 writer.write(text)
515             writer.seek(0, os.SEEK_SET)
516             writer.write("foo")
517             self.assertEqual(writer.size(), 100000000)
518
519             self.assertIsNone(c.manifest_locator())
520             self.assertTrue(c.modified())
521             c.save_new("test_write_large")
522             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
523             self.assertFalse(c.modified())
524
525     def test_create(self):
526         keep = ArvadosFileWriterTestCase.MockKeep({})
527         api = ArvadosFileWriterTestCase.MockApi({
528             "name":"test_create",
529             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
530             "replication_desired":None,
531         }, {
532             "uuid":"zzzzz-4zz18-mockcollection0",
533             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
534             "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51",
535         })
536         with Collection(api_client=api, keep_client=keep) as c:
537             writer = c.open("count.txt", "wb+")
538             self.assertEqual(writer.size(), 0)
539             writer.write("01234567")
540             self.assertEqual(writer.size(), 8)
541
542             self.assertIsNone(c.manifest_locator())
543             self.assertTrue(c.modified())
544             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
545             c.save_new("test_create")
546             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
547             self.assertFalse(c.modified())
548             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
549
550
551     def test_create_subdir(self):
552         keep = ArvadosFileWriterTestCase.MockKeep({})
553         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
554                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
555                                                  "replication_desired":None},
556                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
557                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
558                                                  "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
559         with Collection(api_client=api, keep_client=keep) as c:
560             self.assertIsNone(c.api_response())
561             writer = c.open("foo/bar/count.txt", "wb+")
562             writer.write("01234567")
563             self.assertFalse(c.committed())
564             c.save_new("test_create")
565             self.assertTrue(c.committed())
566             self.assertEqual(c.api_response(), api.response)
567
568     def test_overwrite(self):
569         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
570         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
571                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
572                                                  "replication_desired":None},
573                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
574                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
575                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
576         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
577                              api_client=api, keep_client=keep) as c:
578             writer = c.open("count.txt", "wb+")
579             self.assertEqual(writer.size(), 0)
580             writer.write("01234567")
581             self.assertEqual(writer.size(), 8)
582
583             self.assertIsNone(c.manifest_locator())
584             self.assertTrue(c.modified())
585             c.save_new("test_overwrite")
586             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
587             self.assertFalse(c.modified())
588
589     def test_file_not_found(self):
590         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
591             with self.assertRaises(IOError):
592                 writer = c.open("nocount.txt", "rb")
593
594     def test_cannot_open_directory(self):
595         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
596             with self.assertRaises(IOError):
597                 writer = c.open(".", "rb")
598
599     def test_create_multiple(self):
600         keep = ArvadosFileWriterTestCase.MockKeep({})
601         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
602                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
603                                                  "replication_desired":None},
604                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
605                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
606                                                  "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
607         with Collection(api_client=api, keep_client=keep) as c:
608             w1 = c.open("count1.txt", "wb")
609             w2 = c.open("count2.txt", "wb")
610             w1.write("01234567")
611             w2.write("abcdefgh")
612             self.assertEqual(w1.size(), 8)
613             self.assertEqual(w2.size(), 8)
614
615             self.assertIsNone(c.manifest_locator())
616             self.assertTrue(c.modified())
617             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
618             c.save_new("test_create_multiple")
619             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
620             self.assertFalse(c.modified())
621             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
622
623
624 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
625     class MockParent(object):
626         class MockBlockMgr(object):
627             def __init__(self, blocks, nocache):
628                 self.blocks = blocks
629                 self.nocache = nocache
630                 self.num_get_threads = 1
631
632             def block_prefetch(self, loc):
633                 pass
634
635             def get_block_contents(self, loc, num_retries=0, cache_only=False):
636                 if self.nocache and cache_only:
637                     return None
638                 return self.blocks[loc]
639
640         def __init__(self, blocks, nocache):
641             self.blocks = blocks
642             self.nocache = nocache
643             self.lock = arvados.arvfile.NoopLock()
644
645         def root_collection(self):
646             return self
647
648         def _my_block_manager(self):
649             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
650
651
652     def make_count_reader(self, nocache=False):
653         stream = []
654         n = 0
655         blocks = {}
656         for d in [b'01234', b'34567', b'67890']:
657             loc = tutil.str_keep_locator(d)
658             blocks[loc] = d
659             stream.append(Range(loc, n, len(d)))
660             n += len(d)
661         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
662         return ArvadosFileReader(af, mode="rb")
663
664     def test_read_block_crossing_behavior(self):
665         # read() needs to return all the data requested if possible, even if it
666         # crosses uncached blocks: https://arvados.org/issues/5856
667         sfile = self.make_count_reader(nocache=True)
668         self.assertEqual(b'12345678', sfile.read(8))
669
670     def test_successive_reads(self):
671         # Override StreamFileReaderTestCase.test_successive_reads
672         sfile = self.make_count_reader(nocache=True)
673         self.assertEqual(b'1234', sfile.read(4))
674         self.assertEqual(b'5678', sfile.read(4))
675         self.assertEqual(b'9', sfile.read(4))
676         self.assertEqual(b'', sfile.read(4))
677
678     def test_tell_after_block_read(self):
679         # Override StreamFileReaderTestCase.test_tell_after_block_read
680         sfile = self.make_count_reader(nocache=True)
681         self.assertEqual(b'12345678', sfile.read(8))
682         self.assertEqual(8, sfile.tell())
683
684     def test_prefetch(self):
685         keep = ArvadosFileWriterTestCase.MockKeep({
686             "2e9ec317e197819358fbc43afca7d837+8": b"01234567",
687             "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh",
688         })
689         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
690             r = c.open("count.txt", "rb")
691             self.assertEqual(b"0123", r.read(4))
692         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
693         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
694
695     def test__eq__from_manifest(self):
696         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
697             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
698                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
699                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
700
701     def test__eq__from_writes(self):
702         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
703             with Collection() as c2:
704                 f = c2.open("count1.txt", "wb")
705                 f.write("0123456789")
706
707                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
708                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
709
710     def test__ne__(self):
711         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
712             with Collection() as c2:
713                 f = c2.open("count1.txt", "wb")
714                 f.write("1234567890")
715
716                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
717                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
718
719
720 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
721     def reader_for(self, coll_name, **kwargs):
722         stream = []
723         segments = []
724         n = 0
725         for d in self.manifest_for(coll_name).split():
726             try:
727                 k = KeepLocator(d)
728                 segments.append(Range(n, n, k.size))
729                 stream.append(Range(d, n, k.size))
730                 n += k.size
731             except ValueError:
732                 pass
733
734         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
735         blockmanager.prefetch_enabled = False
736         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
737         af = ArvadosFile(col, "test",
738                          stream=stream,
739                          segments=segments)
740         kwargs.setdefault('mode', 'rb')
741         return ArvadosFileReader(af, **kwargs)
742
743     def read_for_test(self, reader, byte_count, **kwargs):
744         return reader.read(byte_count, **kwargs)
745
746
747 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
748     def read_for_test(self, reader, byte_count, **kwargs):
749         return reader.readfrom(0, byte_count, **kwargs)
750
751
752 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
753     def read_for_test(self, reader, byte_count, **kwargs):
754         return b''.join(reader.readall(**kwargs))
755
756
757 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
758     def read_for_test(self, reader, byte_count, **kwargs):
759         return b''.join(reader.readall_decompressed(**kwargs))
760
761
762 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
763     def read_for_test(self, reader, byte_count, **kwargs):
764         return ''.join(reader.readlines(**kwargs)).encode()
765
766
767 class ArvadosFileTestCase(unittest.TestCase):
768     def datetime_to_hex(self, dt):
769         return hex(int(time.mktime(dt.timetuple())))[2:]
770
771     def test_permission_expired(self):
772         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
773         now = datetime.datetime.now()
774         a_week_ago = now - datetime.timedelta(days=7)
775         a_month_ago = now - datetime.timedelta(days=30)
776         a_week_from_now = now + datetime.timedelta(days=7)
777         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
778             self.assertFalse(c.find('count.txt').permission_expired())
779         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
780             f = c.find('count.txt')
781             self.assertTrue(f.permission_expired())
782             self.assertTrue(f.permission_expired(a_week_from_now))
783             self.assertFalse(f.permission_expired(a_month_ago))
784
785
786 class BlockManagerTest(unittest.TestCase):
787     def test_bufferblock_append(self):
788         keep = ArvadosFileWriterTestCase.MockKeep({})
789         with arvados.arvfile._BlockManager(keep) as blockmanager:
790             bufferblock = blockmanager.alloc_bufferblock()
791             bufferblock.append("foo")
792
793             self.assertEqual(bufferblock.size(), 3)
794             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
795             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
796
797             bufferblock.append("bar")
798
799             self.assertEqual(bufferblock.size(), 6)
800             self.assertEqual(bufferblock.buffer_view[0:6], b"foobar")
801             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
802
803             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
804             with self.assertRaises(arvados.errors.AssertionError):
805                 bufferblock.append("bar")
806
807     def test_bufferblock_dup(self):
808         keep = ArvadosFileWriterTestCase.MockKeep({})
809         with arvados.arvfile._BlockManager(keep) as blockmanager:
810             bufferblock = blockmanager.alloc_bufferblock()
811             bufferblock.append("foo")
812
813             self.assertEqual(bufferblock.size(), 3)
814             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
815             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
816             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
817
818             bufferblock2 = blockmanager.dup_block(bufferblock, None)
819             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
820
821             bufferblock2.append("bar")
822
823             self.assertEqual(bufferblock2.size(), 6)
824             self.assertEqual(bufferblock2.buffer_view[0:6], b"foobar")
825             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
826
827             self.assertEqual(bufferblock.size(), 3)
828             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
829             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
830
831     def test_bufferblock_get(self):
832         keep = ArvadosFileWriterTestCase.MockKeep({
833             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
834         })
835         with arvados.arvfile._BlockManager(keep) as blockmanager:
836             bufferblock = blockmanager.alloc_bufferblock()
837             bufferblock.append("foo")
838
839             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), b"0123456789")
840             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), b"foo")
841
842     def test_bufferblock_commit(self):
843         mockkeep = mock.MagicMock()
844         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
845             bufferblock = blockmanager.alloc_bufferblock()
846             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
847             def flush(sync=None):
848                 blockmanager.commit_bufferblock(bufferblock, sync)
849             bufferblock.owner.flush.side_effect = flush
850             bufferblock.append("foo")
851             blockmanager.commit_all()
852             self.assertTrue(bufferblock.owner.flush.called)
853             self.assertTrue(mockkeep.put.called)
854             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
855             self.assertIsNone(bufferblock.buffer_view)
856
857     def test_bufferblock_commit_pending(self):
858         # Test for bug #7225
859         mockkeep = mock.MagicMock()
860         mockkeep.put.side_effect = lambda *args, **kwargs: time.sleep(1)
861         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
862             bufferblock = blockmanager.alloc_bufferblock()
863             bufferblock.append("foo")
864
865             blockmanager.commit_bufferblock(bufferblock, False)
866             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
867
868             blockmanager.commit_bufferblock(bufferblock, True)
869             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
870
871     def test_bufferblock_commit_with_error(self):
872         mockkeep = mock.MagicMock()
873         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
874         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
875             bufferblock = blockmanager.alloc_bufferblock()
876             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
877             def flush(sync=None):
878                 blockmanager.commit_bufferblock(bufferblock, sync)
879             bufferblock.owner.flush.side_effect = flush
880             bufferblock.append("foo")
881             with self.assertRaises(arvados.errors.KeepWriteError) as err:
882                 blockmanager.commit_all()
883             self.assertTrue(bufferblock.owner.flush.called)
884             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
885             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)