8784: Fix test for latest firefox.
[arvados.git] / sdk / python / tests / test_arvfile.py
1 from __future__ import absolute_import
2 from builtins import hex
3 from builtins import str
4 from builtins import range
5 from builtins import object
6 import bz2
7 import datetime
8 import gzip
9 import io
10 import mock
11 import os
12 import unittest
13 import time
14
15 import arvados
16 from arvados._ranges import Range
17 from arvados.keep import KeepLocator
18 from arvados.collection import Collection, CollectionReader
19 from arvados.arvfile import ArvadosFile, ArvadosFileReader
20
21 from . import arvados_testutil as tutil
22 from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
23
24 class ArvadosFileWriterTestCase(unittest.TestCase):
25     class MockKeep(object):
26         def __init__(self, blocks):
27             self.blocks = blocks
28             self.requests = []
29         def get(self, locator, num_retries=0):
30             self.requests.append(locator)
31             return self.blocks.get(locator)
32         def get_from_cache(self, locator):
33             self.requests.append(locator)
34             return self.blocks.get(locator)
35         def put(self, data, num_retries=None, copies=None):
36             pdh = tutil.str_keep_locator(data)
37             self.blocks[pdh] = bytes(data)
38             return pdh
39
40     class MockApi(object):
41         def __init__(self, b, r):
42             self.body = b
43             self.response = r
44             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
45             self._rootDesc = {}
46         class MockSchema(object):
47             def __init__(self):
48                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
49         class MockCollections(object):
50             def __init__(self, b, r):
51                 self.body = b
52                 self.response = r
53             class Execute(object):
54                 def __init__(self, r):
55                     self.response = r
56                 def execute(self, num_retries=None):
57                     return self.response
58             def create(self, ensure_unique_name=False, body=None):
59                 if body != self.body:
60                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
61                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
62             def update(self, uuid=None, body=None):
63                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
64         def collections(self):
65             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
66
67
68     def test_truncate(self):
69         keep = ArvadosFileWriterTestCase.MockKeep({
70             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
71         })
72         api = ArvadosFileWriterTestCase.MockApi({
73             "name": "test_truncate",
74             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
75             "replication_desired": None,
76         }, {
77             "uuid": "zzzzz-4zz18-mockcollection0",
78             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
79             "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52",
80         })
81         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
82                         api_client=api, keep_client=keep) as c:
83             writer = c.open("count.txt", "rb+")
84             self.assertEqual(writer.size(), 10)
85             self.assertEqual(b"0123456789", writer.read(12))
86
87             writer.truncate(8)
88
89             # Make sure reading off the end doesn't break
90             self.assertEqual(b"", writer.read(12))
91
92             self.assertEqual(writer.size(), 8)
93             writer.seek(0, os.SEEK_SET)
94             self.assertEqual(b"01234567", writer.read(12))
95
96             self.assertIsNone(c.manifest_locator())
97             self.assertTrue(c.modified())
98             c.save_new("test_truncate")
99             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
100             self.assertFalse(c.modified())
101
102
103     def test_truncate2(self):
104         keep = ArvadosFileWriterTestCase.MockKeep({
105             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
106         })
107         api = ArvadosFileWriterTestCase.MockApi({
108             "name": "test_truncate2",
109             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
110             "replication_desired": None,
111         }, {
112             "uuid": "zzzzz-4zz18-mockcollection0",
113             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n",
114             "portable_data_hash": "272da898abdf86ddc71994835e3155f8+95",
115         })
116         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
117                         api_client=api, keep_client=keep) as c:
118             writer = c.open("count.txt", "rb+")
119             self.assertEqual(writer.size(), 10)
120             self.assertEqual(b"0123456789", writer.read(12))
121
122             # extend file size
123             writer.truncate(12)
124
125             self.assertEqual(writer.size(), 12)
126             writer.seek(0, os.SEEK_SET)
127             self.assertEqual(b"0123456789\x00\x00", writer.read(12))
128
129             self.assertIsNone(c.manifest_locator())
130             self.assertTrue(c.modified())
131             c.save_new("test_truncate2")
132             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
133             self.assertFalse(c.modified())
134
135     def test_truncate3(self):
136         keep = ArvadosFileWriterTestCase.MockKeep({
137             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
138             "a925576942e94b2ef57a066101b48876+10": b"abcdefghij",
139         })
140         api = ArvadosFileWriterTestCase.MockApi({
141             "name": "test_truncate",
142             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
143             "replication_desired": None,
144         }, {
145             "uuid": "zzzzz-4zz18-mockcollection0",
146             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
147             "portable_data_hash": "7fcd0eaac3aad4c31a6a0e756475da92+52",
148         })
149         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n',
150                         api_client=api, keep_client=keep) as c:
151             writer = c.open("count.txt", "rb+")
152             self.assertEqual(writer.size(), 20)
153             self.assertEqual(b"0123456789ab", writer.read(12))
154             self.assertEqual(12, writer.tell())
155
156             writer.truncate(8)
157
158             # Make sure reading off the end doesn't break
159             self.assertEqual(12, writer.tell())
160             self.assertEqual(b"", writer.read(12))
161
162             self.assertEqual(writer.size(), 8)
163             self.assertEqual(2, writer.seek(-10, os.SEEK_CUR))
164             self.assertEqual(b"234567", writer.read(12))
165
166             self.assertIsNone(c.manifest_locator())
167             self.assertTrue(c.modified())
168             c.save_new("test_truncate")
169             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
170             self.assertFalse(c.modified())
171
172
173
174     def test_write_to_end(self):
175         keep = ArvadosFileWriterTestCase.MockKeep({
176             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
177         })
178         api = ArvadosFileWriterTestCase.MockApi({
179             "name": "test_append",
180             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
181             "replication_desired": None,
182         }, {
183             "uuid": "zzzzz-4zz18-mockcollection0",
184             "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
185             "portable_data_hash": "c5c3af76565c8efb6a806546bcf073f3+88",
186         })
187         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
188                              api_client=api, keep_client=keep) as c:
189             writer = c.open("count.txt", "rb+")
190             self.assertEqual(writer.size(), 10)
191
192             self.assertEqual(5, writer.seek(5, os.SEEK_SET))
193             self.assertEqual(b"56789", writer.read(8))
194
195             writer.seek(10, os.SEEK_SET)
196             writer.write("foo")
197             self.assertEqual(writer.size(), 13)
198
199             writer.seek(5, os.SEEK_SET)
200             self.assertEqual(b"56789foo", writer.read(8))
201
202             self.assertIsNone(c.manifest_locator())
203             self.assertTrue(c.modified())
204             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
205
206             c.save_new("test_append")
207             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
208             self.assertFalse(c.modified())
209             self.assertEqual(b"foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
210
211
212     def test_append(self):
213         keep = ArvadosFileWriterTestCase.MockKeep({
214             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
215         })
216         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
217         writer = c.open("count.txt", "ab+")
218         self.assertEqual(writer.read(20), b"0123456789")
219
220         writer.seek(0, os.SEEK_SET)
221         writer.write("hello")
222         self.assertEqual(writer.read(), b"")
223         writer.seek(-5, os.SEEK_CUR)
224         self.assertEqual(writer.read(3), b"hel")
225         self.assertEqual(writer.read(), b"lo")
226         writer.seek(0, os.SEEK_SET)
227         self.assertEqual(writer.read(), b"0123456789hello")
228
229         writer.seek(0)
230         writer.write("world")
231         self.assertEqual(writer.read(), b"")
232         writer.seek(0)
233         self.assertEqual(writer.read(), b"0123456789helloworld")
234
235         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
236
237     def test_write_at_beginning(self):
238         keep = ArvadosFileWriterTestCase.MockKeep({
239             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
240         })
241         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
242                              keep_client=keep) as c:
243             writer = c.open("count.txt", "rb+")
244             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
245             writer.seek(0, os.SEEK_SET)
246             writer.write("foo")
247             self.assertEqual(writer.size(), 10)
248             self.assertEqual(b"foo3456789", writer.readfrom(0, 13))
249             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
250
251     def test_write_empty(self):
252         keep = ArvadosFileWriterTestCase.MockKeep({})
253         with Collection(keep_client=keep) as c:
254             writer = c.open("count.txt", "wb")
255             self.assertEqual(writer.size(), 0)
256             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
257
258     def test_save_manifest_text(self):
259         keep = ArvadosFileWriterTestCase.MockKeep({})
260         with Collection(keep_client=keep) as c:
261             writer = c.open("count.txt", "wb")
262             writer.write(b"0123456789")
263             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
264             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
265
266             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
267             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
268
269     def test_get_manifest_text_commits(self):
270          keep = ArvadosFileWriterTestCase.MockKeep({})
271          with Collection(keep_client=keep) as c:
272              writer = c.open("count.txt", "wb")
273              writer.write("0123456789")
274              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
275              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
276              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
277              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
278
279
280     def test_write_in_middle(self):
281         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
282         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
283                              keep_client=keep) as c:
284             writer = c.open("count.txt", "rb+")
285             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
286             writer.seek(3, os.SEEK_SET)
287             writer.write("foo")
288             self.assertEqual(writer.size(), 10)
289             self.assertEqual(b"012foo6789", writer.readfrom(0, 13))
290             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
291
292     def test_write_at_end(self):
293         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
294         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
295                              keep_client=keep) as c:
296             writer = c.open("count.txt", "rb+")
297             self.assertEqual(b"0123456789", writer.readfrom(0, 13))
298             writer.seek(7, os.SEEK_SET)
299             writer.write("foo")
300             self.assertEqual(writer.size(), 10)
301             self.assertEqual(b"0123456foo", writer.readfrom(0, 13))
302             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
303
304     def test_write_across_segment_boundary(self):
305         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
306         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
307                              keep_client=keep) as c:
308             writer = c.open("count.txt", "rb+")
309             self.assertEqual(b"012345678901234", writer.readfrom(0, 15))
310             writer.seek(7, os.SEEK_SET)
311             writer.write("foobar")
312             self.assertEqual(writer.size(), 20)
313             self.assertEqual(b"0123456foobar34", writer.readfrom(0, 15))
314             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
315
316     def test_write_across_several_segments(self):
317         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
318         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
319                              keep_client=keep) as c:
320             writer = c.open("count.txt", "rb+")
321             self.assertEqual(b"012301230123", writer.readfrom(0, 15))
322             writer.seek(2, os.SEEK_SET)
323             writer.write("abcdefg")
324             self.assertEqual(writer.size(), 12)
325             self.assertEqual(b"01abcdefg123", writer.readfrom(0, 15))
326             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
327
328     def test_write_large(self):
329         keep = ArvadosFileWriterTestCase.MockKeep({})
330         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
331                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
332                                                  "replication_desired":None},
333                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
334                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
335                                                  "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
336         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
337                              api_client=api, keep_client=keep) as c:
338             writer = c.open("count.txt", "rb+")
339             text = "0123456789" * 100
340             for b in range(0, 100000):
341                 writer.write(text)
342             self.assertEqual(writer.size(), 100000000)
343
344             self.assertIsNone(c.manifest_locator())
345             self.assertTrue(c.modified())
346             c.save_new("test_write_large")
347             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
348             self.assertFalse(c.modified())
349
350
351     def test_large_write(self):
352         keep = ArvadosFileWriterTestCase.MockKeep({})
353         api = ArvadosFileWriterTestCase.MockApi({}, {})
354         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
355                              api_client=api, keep_client=keep) as c:
356             writer = c.open("count.txt", "rb+")
357             self.assertEqual(writer.size(), 0)
358
359             text = "0123456789"
360             writer.write(text)
361             text = "0123456789" * 9999999
362             writer.write(text)
363             self.assertEqual(writer.size(), 100000000)
364
365             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
366
367     def test_sparse_write(self):
368         keep = ArvadosFileWriterTestCase.MockKeep({})
369         api = ArvadosFileWriterTestCase.MockApi({}, {})
370         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
371                              api_client=api, keep_client=keep) as c:
372             writer = c.open("count.txt", "rb+")
373             self.assertEqual(writer.size(), 0)
374
375             text = b"0123456789"
376             writer.seek(2)
377             writer.write(text)
378             self.assertEqual(writer.size(), 12)
379             writer.seek(0, os.SEEK_SET)
380             self.assertEqual(writer.read(), b"\x00\x00"+text)
381
382             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n")
383
384
385     def test_sparse_write2(self):
386         keep = ArvadosFileWriterTestCase.MockKeep({})
387         api = ArvadosFileWriterTestCase.MockApi({}, {})
388         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
389                              api_client=api, keep_client=keep) as c:
390             writer = c.open("count.txt", "rb+")
391             self.assertEqual(writer.size(), 0)
392
393             text = "0123456789"
394             writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2)
395             writer.write(text)
396             self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12)
397             writer.seek(0, os.SEEK_SET)
398
399             self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n")
400
401
402     def test_sparse_write3(self):
403         keep = ArvadosFileWriterTestCase.MockKeep({})
404         api = ArvadosFileWriterTestCase.MockApi({}, {})
405         for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]:
406             with Collection() as c:
407                 writer = c.open("count.txt", "rb+")
408                 self.assertEqual(writer.size(), 0)
409
410                 for i in r:
411                     w = ("%s" % i) * 10
412                     writer.seek(i*10)
413                     writer.write(w.encode())
414                 writer.seek(0)
415                 self.assertEqual(writer.read(), b"00000000001111111111222222222233333333334444444444")
416
417     def test_sparse_write4(self):
418         keep = ArvadosFileWriterTestCase.MockKeep({})
419         api = ArvadosFileWriterTestCase.MockApi({}, {})
420         for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]:
421             with Collection() as c:
422                 writer = c.open("count.txt", "rb+")
423                 self.assertEqual(writer.size(), 0)
424
425                 for i in r:
426                     w = ("%s" % i) * 10
427                     writer.seek(i*10)
428                     writer.write(w.encode())
429                 writer.seek(0)
430                 self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444")
431
432
433     def test_rewrite_on_empty_file(self):
434         keep = ArvadosFileWriterTestCase.MockKeep({})
435         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
436                              keep_client=keep) as c:
437             writer = c.open("count.txt", "rb+")
438             for b in range(0, 10):
439                 writer.seek(0, os.SEEK_SET)
440                 writer.write("0123456789")
441
442             self.assertEqual(writer.size(), 10)
443             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
444             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
445             writer.flush()
446             self.assertEqual(writer.size(), 10)
447             self.assertEqual(b"0123456789", writer.readfrom(0, 20))
448             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
449
450     def test_rewrite_append_existing_file(self):
451         keep = ArvadosFileWriterTestCase.MockKeep({
452             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
453         })
454         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
455                              keep_client=keep) as c:
456             writer = c.open("count.txt", "rb+")
457             for b in range(0, 10):
458                 writer.seek(10, os.SEEK_SET)
459                 writer.write("abcdefghij")
460
461             self.assertEqual(writer.size(), 20)
462             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
463             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
464
465             writer.arvadosfile.flush()
466             self.assertEqual(writer.size(), 20)
467             self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
468             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
469
470     def test_rewrite_over_existing_file(self):
471         keep = ArvadosFileWriterTestCase.MockKeep({
472             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
473         })
474         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
475                              keep_client=keep) as c:
476             writer = c.open("count.txt", "rb+")
477             for b in range(0, 10):
478                 writer.seek(5, os.SEEK_SET)
479                 writer.write("abcdefghij")
480
481             self.assertEqual(writer.size(), 15)
482             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
483             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
484
485             writer.arvadosfile.flush()
486
487             self.assertEqual(writer.size(), 15)
488             self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
489             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
490
491     def test_write_large_rewrite(self):
492         keep = ArvadosFileWriterTestCase.MockKeep({})
493         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
494                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
495                                                  "replication_desired":None},
496                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
497                                                  "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n",
498                                                  "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
499         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
500                              api_client=api, keep_client=keep) as c:
501             writer = c.open("count.txt", "rb+")
502             text = b''.join([b"0123456789" for a in range(0, 100)])
503             for b in range(0, 100000):
504                 writer.write(text)
505             writer.seek(0, os.SEEK_SET)
506             writer.write("foo")
507             self.assertEqual(writer.size(), 100000000)
508
509             self.assertIsNone(c.manifest_locator())
510             self.assertTrue(c.modified())
511             c.save_new("test_write_large")
512             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
513             self.assertFalse(c.modified())
514
515     def test_create(self):
516         keep = ArvadosFileWriterTestCase.MockKeep({})
517         api = ArvadosFileWriterTestCase.MockApi({
518             "name":"test_create",
519             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
520             "replication_desired":None,
521         }, {
522             "uuid":"zzzzz-4zz18-mockcollection0",
523             "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
524             "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51",
525         })
526         with Collection(api_client=api, keep_client=keep) as c:
527             writer = c.open("count.txt", "wb+")
528             self.assertEqual(writer.size(), 0)
529             writer.write("01234567")
530             self.assertEqual(writer.size(), 8)
531
532             self.assertIsNone(c.manifest_locator())
533             self.assertTrue(c.modified())
534             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
535             c.save_new("test_create")
536             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
537             self.assertFalse(c.modified())
538             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
539
540
541     def test_create_subdir(self):
542         keep = ArvadosFileWriterTestCase.MockKeep({})
543         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
544                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
545                                                  "replication_desired":None},
546                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
547                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
548                                                  "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
549         with Collection(api_client=api, keep_client=keep) as c:
550             self.assertIsNone(c.api_response())
551             writer = c.open("foo/bar/count.txt", "wb+")
552             writer.write("01234567")
553             self.assertFalse(c.committed())
554             c.save_new("test_create")
555             self.assertTrue(c.committed())
556             self.assertEqual(c.api_response(), api.response)
557
558     def test_overwrite(self):
559         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
560         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
561                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
562                                                  "replication_desired":None},
563                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
564                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
565                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
566         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
567                              api_client=api, keep_client=keep) as c:
568             writer = c.open("count.txt", "wb+")
569             self.assertEqual(writer.size(), 0)
570             writer.write("01234567")
571             self.assertEqual(writer.size(), 8)
572
573             self.assertIsNone(c.manifest_locator())
574             self.assertTrue(c.modified())
575             c.save_new("test_overwrite")
576             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
577             self.assertFalse(c.modified())
578
579     def test_file_not_found(self):
580         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
581             with self.assertRaises(IOError):
582                 writer = c.open("nocount.txt", "rb")
583
584     def test_cannot_open_directory(self):
585         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
586             with self.assertRaises(IOError):
587                 writer = c.open(".", "rb")
588
589     def test_create_multiple(self):
590         keep = ArvadosFileWriterTestCase.MockKeep({})
591         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
592                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
593                                                  "replication_desired":None},
594                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
595                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
596                                                  "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
597         with Collection(api_client=api, keep_client=keep) as c:
598             w1 = c.open("count1.txt", "wb")
599             w2 = c.open("count2.txt", "wb")
600             w1.write("01234567")
601             w2.write("abcdefgh")
602             self.assertEqual(w1.size(), 8)
603             self.assertEqual(w2.size(), 8)
604
605             self.assertIsNone(c.manifest_locator())
606             self.assertTrue(c.modified())
607             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
608             c.save_new("test_create_multiple")
609             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
610             self.assertFalse(c.modified())
611             self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
612
613
614 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
615     class MockParent(object):
616         class MockBlockMgr(object):
617             def __init__(self, blocks, nocache):
618                 self.blocks = blocks
619                 self.nocache = nocache
620
621             def block_prefetch(self, loc):
622                 pass
623
624             def get_block_contents(self, loc, num_retries=0, cache_only=False):
625                 if self.nocache and cache_only:
626                     return None
627                 return self.blocks[loc]
628
629         def __init__(self, blocks, nocache):
630             self.blocks = blocks
631             self.nocache = nocache
632             self.lock = arvados.arvfile.NoopLock()
633
634         def root_collection(self):
635             return self
636
637         def _my_block_manager(self):
638             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
639
640
641     def make_count_reader(self, nocache=False):
642         stream = []
643         n = 0
644         blocks = {}
645         for d in [b'01234', b'34567', b'67890']:
646             loc = tutil.str_keep_locator(d)
647             blocks[loc] = d
648             stream.append(Range(loc, n, len(d)))
649             n += len(d)
650         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
651         return ArvadosFileReader(af, mode="rb")
652
653     def test_read_block_crossing_behavior(self):
654         # read() needs to return all the data requested if possible, even if it
655         # crosses uncached blocks: https://arvados.org/issues/5856
656         sfile = self.make_count_reader(nocache=True)
657         self.assertEqual(b'12345678', sfile.read(8))
658
659     def test_successive_reads(self):
660         # Override StreamFileReaderTestCase.test_successive_reads
661         sfile = self.make_count_reader(nocache=True)
662         self.assertEqual(b'1234', sfile.read(4))
663         self.assertEqual(b'5678', sfile.read(4))
664         self.assertEqual(b'9', sfile.read(4))
665         self.assertEqual(b'', sfile.read(4))
666
667     def test_tell_after_block_read(self):
668         # Override StreamFileReaderTestCase.test_tell_after_block_read
669         sfile = self.make_count_reader(nocache=True)
670         self.assertEqual(b'12345678', sfile.read(8))
671         self.assertEqual(8, sfile.tell())
672
673     def test_prefetch(self):
674         keep = ArvadosFileWriterTestCase.MockKeep({
675             "2e9ec317e197819358fbc43afca7d837+8": b"01234567",
676             "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh",
677         })
678         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
679             r = c.open("count.txt", "rb")
680             self.assertEqual(b"0123", r.read(4))
681         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
682         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
683
684     def test__eq__from_manifest(self):
685         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
686             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
687                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
688                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
689
690     def test__eq__from_writes(self):
691         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
692             with Collection() as c2:
693                 f = c2.open("count1.txt", "wb")
694                 f.write("0123456789")
695
696                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
697                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
698
699     def test__ne__(self):
700         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
701             with Collection() as c2:
702                 f = c2.open("count1.txt", "wb")
703                 f.write("1234567890")
704
705                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
706                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
707
708
709 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
710     def reader_for(self, coll_name, **kwargs):
711         stream = []
712         segments = []
713         n = 0
714         for d in self.manifest_for(coll_name).split():
715             try:
716                 k = KeepLocator(d)
717                 segments.append(Range(n, n, k.size))
718                 stream.append(Range(d, n, k.size))
719                 n += k.size
720             except ValueError:
721                 pass
722
723         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
724         blockmanager.prefetch_enabled = False
725         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
726         af = ArvadosFile(col, "test",
727                          stream=stream,
728                          segments=segments)
729         kwargs.setdefault('mode', 'rb')
730         return ArvadosFileReader(af, **kwargs)
731
732     def read_for_test(self, reader, byte_count, **kwargs):
733         return reader.read(byte_count, **kwargs)
734
735
736 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
737     def read_for_test(self, reader, byte_count, **kwargs):
738         return reader.readfrom(0, byte_count, **kwargs)
739
740
741 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
742     def read_for_test(self, reader, byte_count, **kwargs):
743         return b''.join(reader.readall(**kwargs))
744
745
746 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
747     def read_for_test(self, reader, byte_count, **kwargs):
748         return b''.join(reader.readall_decompressed(**kwargs))
749
750
751 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
752     def read_for_test(self, reader, byte_count, **kwargs):
753         return ''.join(reader.readlines(**kwargs)).encode()
754
755
756 class ArvadosFileTestCase(unittest.TestCase):
757     def datetime_to_hex(self, dt):
758         return hex(int(time.mktime(dt.timetuple())))[2:]
759
760     def test_permission_expired(self):
761         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
762         now = datetime.datetime.now()
763         a_week_ago = now - datetime.timedelta(days=7)
764         a_month_ago = now - datetime.timedelta(days=30)
765         a_week_from_now = now + datetime.timedelta(days=7)
766         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
767             self.assertFalse(c.find('count.txt').permission_expired())
768         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
769             f = c.find('count.txt')
770             self.assertTrue(f.permission_expired())
771             self.assertTrue(f.permission_expired(a_week_from_now))
772             self.assertFalse(f.permission_expired(a_month_ago))
773
774
775 class BlockManagerTest(unittest.TestCase):
776     def test_bufferblock_append(self):
777         keep = ArvadosFileWriterTestCase.MockKeep({})
778         with arvados.arvfile._BlockManager(keep) as blockmanager:
779             bufferblock = blockmanager.alloc_bufferblock()
780             bufferblock.append("foo")
781
782             self.assertEqual(bufferblock.size(), 3)
783             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
784             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
785
786             bufferblock.append("bar")
787
788             self.assertEqual(bufferblock.size(), 6)
789             self.assertEqual(bufferblock.buffer_view[0:6], b"foobar")
790             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
791
792             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
793             with self.assertRaises(arvados.errors.AssertionError):
794                 bufferblock.append("bar")
795
796     def test_bufferblock_dup(self):
797         keep = ArvadosFileWriterTestCase.MockKeep({})
798         with arvados.arvfile._BlockManager(keep) as blockmanager:
799             bufferblock = blockmanager.alloc_bufferblock()
800             bufferblock.append("foo")
801
802             self.assertEqual(bufferblock.size(), 3)
803             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
804             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
805             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
806
807             bufferblock2 = blockmanager.dup_block(bufferblock, None)
808             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
809
810             bufferblock2.append("bar")
811
812             self.assertEqual(bufferblock2.size(), 6)
813             self.assertEqual(bufferblock2.buffer_view[0:6], b"foobar")
814             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
815
816             self.assertEqual(bufferblock.size(), 3)
817             self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
818             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
819
820     def test_bufferblock_get(self):
821         keep = ArvadosFileWriterTestCase.MockKeep({
822             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
823         })
824         with arvados.arvfile._BlockManager(keep) as blockmanager:
825             bufferblock = blockmanager.alloc_bufferblock()
826             bufferblock.append("foo")
827
828             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), b"0123456789")
829             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), b"foo")
830
831     def test_bufferblock_commit(self):
832         mockkeep = mock.MagicMock()
833         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
834             bufferblock = blockmanager.alloc_bufferblock()
835             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
836             def flush(sync=None):
837                 blockmanager.commit_bufferblock(bufferblock, sync)
838             bufferblock.owner.flush.side_effect = flush
839             bufferblock.append("foo")
840             blockmanager.commit_all()
841             self.assertTrue(bufferblock.owner.flush.called)
842             self.assertTrue(mockkeep.put.called)
843             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
844             self.assertIsNone(bufferblock.buffer_view)
845
846     def test_bufferblock_commit_pending(self):
847         # Test for bug #7225
848         mockkeep = mock.MagicMock()
849         mockkeep.put.side_effect = lambda x: time.sleep(1)
850         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
851             bufferblock = blockmanager.alloc_bufferblock()
852             bufferblock.append("foo")
853
854             blockmanager.commit_bufferblock(bufferblock, False)
855             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
856
857             blockmanager.commit_bufferblock(bufferblock, True)
858             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
859
860
861     def test_bufferblock_commit_with_error(self):
862         mockkeep = mock.MagicMock()
863         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
864         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
865             bufferblock = blockmanager.alloc_bufferblock()
866             bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile)
867             def flush(sync=None):
868                 blockmanager.commit_bufferblock(bufferblock, sync)
869             bufferblock.owner.flush.side_effect = flush
870             bufferblock.append("foo")
871             with self.assertRaises(arvados.errors.KeepWriteError) as err:
872                 blockmanager.commit_all()
873             self.assertTrue(bufferblock.owner.flush.called)
874             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
875             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)