6c4976ee0de1633ca114423624f3ca6c1b9a3546
[arvados.git] / sdk / python / tests / test_arvfile.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import
4 import bz2
5 import datetime
6 import gzip
7 import io
8 import mock
9 import os
10 import unittest
11 import time
12
13 import arvados
14 from arvados._ranges import Range
15 from arvados.keep import KeepLocator
16 from arvados.collection import Collection, CollectionReader
17 from arvados.arvfile import ArvadosFile, ArvadosFileReader
18
19 from . import arvados_testutil as tutil
20 from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
21
22 class ArvadosFileWriterTestCase(unittest.TestCase):
23     class MockKeep(object):
24         def __init__(self, blocks):
25             self.blocks = blocks
26             self.requests = []
27         def get(self, locator, num_retries=0):
28             self.requests.append(locator)
29             return self.blocks.get(locator)
30         def get_from_cache(self, locator):
31             self.requests.append(locator)
32             return self.blocks.get(locator)
33         def put(self, data, num_retries=None, copies=None):
34             pdh = tutil.str_keep_locator(data)
35             self.blocks[pdh] = str(data)
36             return pdh
37
38     class MockApi(object):
39         def __init__(self, b, r):
40             self.body = b
41             self.response = r
42             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
43             self._rootDesc = {}
44         class MockSchema(object):
45             def __init__(self):
46                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
47         class MockCollections(object):
48             def __init__(self, b, r):
49                 self.body = b
50                 self.response = r
51             class Execute(object):
52                 def __init__(self, r):
53                     self.response = r
54                 def execute(self, num_retries=None):
55                     return self.response
56             def create(self, ensure_unique_name=False, body=None):
57                 if body != self.body:
58                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
59                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
60             def update(self, uuid=None, body=None):
61                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
62         def collections(self):
63             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
64
65
66     def test_truncate(self):
67         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
68         api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate",
69                                                  "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
70                                                  "replication_desired":None},
71                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
72                                                  "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
73                                                  "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52"})
74         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
75                              api_client=api, keep_client=keep) as c:
76             writer = c.open("count.txt", "r+")
77             self.assertEqual(writer.size(), 10)
78             self.assertEqual("0123456789", writer.read(12))
79
80             writer.truncate(8)
81
82             # Make sure reading off the end doesn't break
83             self.assertEqual("", writer.read(12))
84
85             self.assertEqual(writer.size(), 8)
86             writer.seek(0, os.SEEK_SET)
87             self.assertEqual("01234567", writer.read(12))
88
89             self.assertIsNone(c.manifest_locator())
90             self.assertTrue(c.modified())
91             c.save_new("test_truncate")
92             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
93             self.assertFalse(c.modified())
94
95     def test_write_to_end(self):
96         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
97         api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
98                                                  "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
99                                                  "replication_desired":None},
100                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
101                                                  "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
102                                                  "portable_data_hash":"c5c3af76565c8efb6a806546bcf073f3+88"})
103         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
104                              api_client=api, keep_client=keep) as c:
105             writer = c.open("count.txt", "r+")
106             self.assertEqual(writer.size(), 10)
107
108             writer.seek(5, os.SEEK_SET)
109             self.assertEqual("56789", writer.read(8))
110
111             writer.seek(10, os.SEEK_SET)
112             writer.write("foo")
113             self.assertEqual(writer.size(), 13)
114
115             writer.seek(5, os.SEEK_SET)
116             self.assertEqual("56789foo", writer.read(8))
117
118             self.assertIsNone(c.manifest_locator())
119             self.assertTrue(c.modified())
120             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
121
122             c.save_new("test_append")
123             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
124             self.assertFalse(c.modified())
125             self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
126
127
128     def test_append(self):
129         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
130         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
131         writer = c.open("count.txt", "a+")
132         self.assertEqual(writer.read(20), "0123456789")
133         writer.seek(0, os.SEEK_SET)
134
135         writer.write("hello")
136         self.assertEqual(writer.read(20), "0123456789hello")
137         writer.seek(0, os.SEEK_SET)
138
139         writer.write("world")
140         self.assertEqual(writer.read(20), "0123456789helloworld")
141
142         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
143
144     def test_write_at_beginning(self):
145         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
146         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
147                              keep_client=keep) as c:
148             writer = c.open("count.txt", "r+")
149             self.assertEqual("0123456789", writer.readfrom(0, 13))
150             writer.seek(0, os.SEEK_SET)
151             writer.write("foo")
152             self.assertEqual(writer.size(), 10)
153             self.assertEqual("foo3456789", writer.readfrom(0, 13))
154             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
155
156     def test_write_empty(self):
157         keep = ArvadosFileWriterTestCase.MockKeep({})
158         with Collection(keep_client=keep) as c:
159             writer = c.open("count.txt", "w")
160             self.assertEqual(writer.size(), 0)
161             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
162
163     def test_save_manifest_text(self):
164         keep = ArvadosFileWriterTestCase.MockKeep({})
165         with Collection(keep_client=keep) as c:
166             writer = c.open("count.txt", "w")
167             writer.write("0123456789")
168             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
169             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
170
171             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
172             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
173
174     def test_get_manifest_text_commits(self):
175          keep = ArvadosFileWriterTestCase.MockKeep({})
176          with Collection(keep_client=keep) as c:
177              writer = c.open("count.txt", "w")
178              writer.write("0123456789")
179              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
180              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
181              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
182              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
183
184
185     def test_write_in_middle(self):
186         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
187         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
188                              keep_client=keep) as c:
189             writer = c.open("count.txt", "r+")
190             self.assertEqual("0123456789", writer.readfrom(0, 13))
191             writer.seek(3, os.SEEK_SET)
192             writer.write("foo")
193             self.assertEqual(writer.size(), 10)
194             self.assertEqual("012foo6789", writer.readfrom(0, 13))
195             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
196
197     def test_write_at_end(self):
198         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
199         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
200                              keep_client=keep) as c:
201             writer = c.open("count.txt", "r+")
202             self.assertEqual("0123456789", writer.readfrom(0, 13))
203             writer.seek(7, os.SEEK_SET)
204             writer.write("foo")
205             self.assertEqual(writer.size(), 10)
206             self.assertEqual("0123456foo", writer.readfrom(0, 13))
207             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
208
209     def test_write_across_segment_boundary(self):
210         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
211         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
212                              keep_client=keep) as c:
213             writer = c.open("count.txt", "r+")
214             self.assertEqual("012345678901234", writer.readfrom(0, 15))
215             writer.seek(7, os.SEEK_SET)
216             writer.write("foobar")
217             self.assertEqual(writer.size(), 20)
218             self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
219             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
220
221     def test_write_across_several_segments(self):
222         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
223         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
224                              keep_client=keep) as c:
225             writer = c.open("count.txt", "r+")
226             self.assertEqual("012301230123", writer.readfrom(0, 15))
227             writer.seek(2, os.SEEK_SET)
228             writer.write("abcdefg")
229             self.assertEqual(writer.size(), 12)
230             self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
231             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
232
233     def test_write_large(self):
234         keep = ArvadosFileWriterTestCase.MockKeep({})
235         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
236                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
237                                                  "replication_desired":None},
238                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
239                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
240                                                  "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
241         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
242                              api_client=api, keep_client=keep) as c:
243             writer = c.open("count.txt", "r+")
244             text = "0123456789" * 100
245             for b in xrange(0, 100000):
246                 writer.write(text)
247             self.assertEqual(writer.size(), 100000000)
248
249             self.assertIsNone(c.manifest_locator())
250             self.assertTrue(c.modified())
251             c.save_new("test_write_large")
252             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
253             self.assertFalse(c.modified())
254
255
256     def test_large_write(self):
257         keep = ArvadosFileWriterTestCase.MockKeep({})
258         api = ArvadosFileWriterTestCase.MockApi({}, {})
259         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
260                              api_client=api, keep_client=keep) as c:
261             writer = c.open("count.txt", "r+")
262             self.assertEqual(writer.size(), 0)
263
264             text = "0123456789"
265             writer.write(text)
266             text = "0123456789" * 9999999
267             writer.write(text)
268             self.assertEqual(writer.size(), 100000000)
269
270             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
271
272     def test_rewrite_on_empty_file(self):
273         keep = ArvadosFileWriterTestCase.MockKeep({})
274         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
275                              keep_client=keep) as c:
276             writer = c.open("count.txt", "r+")
277             for b in xrange(0, 10):
278                 writer.seek(0, os.SEEK_SET)
279                 writer.write("0123456789")
280
281             self.assertEqual(writer.size(), 10)
282             self.assertEqual("0123456789", writer.readfrom(0, 20))
283             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
284             writer.flush()
285             self.assertEqual(writer.size(), 10)
286             self.assertEqual("0123456789", writer.readfrom(0, 20))
287             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
288
289     def test_rewrite_append_existing_file(self):
290         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
291         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
292                              keep_client=keep) as c:
293             writer = c.open("count.txt", "r+")
294             for b in xrange(0, 10):
295                 writer.seek(10, os.SEEK_SET)
296                 writer.write("abcdefghij")
297
298             self.assertEqual(writer.size(), 20)
299             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
300             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
301
302             writer.arvadosfile.flush()
303             self.assertEqual(writer.size(), 20)
304             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
305             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
306
307     def test_rewrite_over_existing_file(self):
308         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
309         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
310                              keep_client=keep) as c:
311             writer = c.open("count.txt", "r+")
312             for b in xrange(0, 10):
313                 writer.seek(5, os.SEEK_SET)
314                 writer.write("abcdefghij")
315
316             self.assertEqual(writer.size(), 15)
317             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
318             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
319
320             writer.arvadosfile.flush()
321
322             self.assertEqual(writer.size(), 15)
323             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
324             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
325
326     def test_write_large_rewrite(self):
327         keep = ArvadosFileWriterTestCase.MockKeep({})
328         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
329                                                  "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
330                                                  "replication_desired":None},
331                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
332                                                  "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
333                                                  "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
334         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
335                              api_client=api, keep_client=keep) as c:
336             writer = c.open("count.txt", "r+")
337             text = ''.join(["0123456789" for a in xrange(0, 100)])
338             for b in xrange(0, 100000):
339                 writer.write(text)
340             writer.seek(0, os.SEEK_SET)
341             writer.write("foo")
342             self.assertEqual(writer.size(), 100000000)
343
344             self.assertIsNone(c.manifest_locator())
345             self.assertTrue(c.modified())
346             c.save_new("test_write_large")
347             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
348             self.assertFalse(c.modified())
349
350     def test_create(self):
351         keep = ArvadosFileWriterTestCase.MockKeep({})
352         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
353                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
354                                                  "replication_desired":None},
355                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
356                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
357                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
358         with Collection(api_client=api, keep_client=keep) as c:
359             writer = c.open("count.txt", "w+")
360             self.assertEqual(writer.size(), 0)
361             writer.write("01234567")
362             self.assertEqual(writer.size(), 8)
363
364             self.assertIsNone(c.manifest_locator())
365             self.assertTrue(c.modified())
366             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
367             c.save_new("test_create")
368             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
369             self.assertFalse(c.modified())
370             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
371
372
373     def test_create_subdir(self):
374         keep = ArvadosFileWriterTestCase.MockKeep({})
375         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
376                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
377                                                  "replication_desired":None},
378                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
379                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
380                                                  "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
381         with Collection(api_client=api, keep_client=keep) as c:
382             self.assertIsNone(c.api_response())
383             writer = c.open("foo/bar/count.txt", "w+")
384             writer.write("01234567")
385             self.assertFalse(c.committed())
386             c.save_new("test_create")
387             self.assertTrue(c.committed())
388             self.assertEqual(c.api_response(), api.response)
389
390     def test_overwrite(self):
391         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
392         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
393                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
394                                                  "replication_desired":None},
395                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
396                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
397                                                  "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
398         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
399                              api_client=api, keep_client=keep) as c:
400             writer = c.open("count.txt", "w+")
401             self.assertEqual(writer.size(), 0)
402             writer.write("01234567")
403             self.assertEqual(writer.size(), 8)
404
405             self.assertIsNone(c.manifest_locator())
406             self.assertTrue(c.modified())
407             c.save_new("test_overwrite")
408             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
409             self.assertFalse(c.modified())
410
411     def test_file_not_found(self):
412         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
413             with self.assertRaises(IOError):
414                 writer = c.open("nocount.txt", "r")
415
416     def test_cannot_open_directory(self):
417         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
418             with self.assertRaises(IOError):
419                 writer = c.open(".", "r")
420
421     def test_create_multiple(self):
422         keep = ArvadosFileWriterTestCase.MockKeep({})
423         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
424                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
425                                                  "replication_desired":None},
426                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
427                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
428                                                  "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
429         with Collection(api_client=api, keep_client=keep) as c:
430             w1 = c.open("count1.txt", "w")
431             w2 = c.open("count2.txt", "w")
432             w1.write("01234567")
433             w2.write("abcdefgh")
434             self.assertEqual(w1.size(), 8)
435             self.assertEqual(w2.size(), 8)
436
437             self.assertIsNone(c.manifest_locator())
438             self.assertTrue(c.modified())
439             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
440             c.save_new("test_create_multiple")
441             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
442             self.assertFalse(c.modified())
443             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
444
445
446 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
447     class MockParent(object):
448         class MockBlockMgr(object):
449             def __init__(self, blocks, nocache):
450                 self.blocks = blocks
451                 self.nocache = nocache
452
453             def block_prefetch(self, loc):
454                 pass
455
456             def get_block_contents(self, loc, num_retries=0, cache_only=False):
457                 if self.nocache and cache_only:
458                     return None
459                 return self.blocks[loc]
460
461         def __init__(self, blocks, nocache):
462             self.blocks = blocks
463             self.nocache = nocache
464             self.lock = arvados.arvfile.NoopLock()
465
466         def root_collection(self):
467             return self
468
469         def _my_block_manager(self):
470             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
471
472
473     def make_count_reader(self, nocache=False):
474         stream = []
475         n = 0
476         blocks = {}
477         for d in ['01234', '34567', '67890']:
478             loc = tutil.str_keep_locator(d)
479             blocks[loc] = d
480             stream.append(Range(loc, n, len(d)))
481             n += len(d)
482         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
483         return ArvadosFileReader(af)
484
485     def test_read_block_crossing_behavior(self):
486         # read() needs to return all the data requested if possible, even if it
487         # crosses uncached blocks: https://arvados.org/issues/5856
488         sfile = self.make_count_reader(nocache=True)
489         self.assertEqual('12345678', sfile.read(8))
490
491     def test_successive_reads(self):
492         # Override StreamFileReaderTestCase.test_successive_reads
493         sfile = self.make_count_reader(nocache=True)
494         self.assertEqual('1234', sfile.read(4))
495         self.assertEqual('5678', sfile.read(4))
496         self.assertEqual('9', sfile.read(4))
497         self.assertEqual('', sfile.read(4))
498
499     def test_tell_after_block_read(self):
500         # Override StreamFileReaderTestCase.test_tell_after_block_read
501         sfile = self.make_count_reader(nocache=True)
502         self.assertEqual('12345678', sfile.read(8))
503         self.assertEqual(8, sfile.tell())
504
505     def test_prefetch(self):
506         keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
507         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
508             r = c.open("count.txt", "r")
509             self.assertEqual("0123", r.read(4))
510         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
511         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
512
513     def test__eq__from_manifest(self):
514         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
515             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
516                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
517                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
518
519     def test__eq__from_writes(self):
520         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
521             with Collection() as c2:
522                 f = c2.open("count1.txt", "w")
523                 f.write("0123456789")
524
525                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
526                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
527
528     def test__ne__(self):
529         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
530             with Collection() as c2:
531                 f = c2.open("count1.txt", "w")
532                 f.write("1234567890")
533
534                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
535                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
536
537
538 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
539     def reader_for(self, coll_name, **kwargs):
540         stream = []
541         segments = []
542         n = 0
543         for d in self.manifest_for(coll_name).split():
544             try:
545                 k = KeepLocator(d)
546                 segments.append(Range(n, n, k.size))
547                 stream.append(Range(d, n, k.size))
548                 n += k.size
549             except ValueError:
550                 pass
551
552         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
553         blockmanager.prefetch_enabled = False
554         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
555         af = ArvadosFile(col, "test",
556                          stream=stream,
557                          segments=segments)
558         return ArvadosFileReader(af, **kwargs)
559
560     def read_for_test(self, reader, byte_count, **kwargs):
561         return reader.read(byte_count, **kwargs)
562
563
564 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
565     def read_for_test(self, reader, byte_count, **kwargs):
566         return reader.readfrom(0, byte_count, **kwargs)
567
568
569 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
570     def read_for_test(self, reader, byte_count, **kwargs):
571         return ''.join(reader.readall(**kwargs))
572
573
574 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
575     def read_for_test(self, reader, byte_count, **kwargs):
576         return ''.join(reader.readall_decompressed(**kwargs))
577
578
579 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
580     def read_for_test(self, reader, byte_count, **kwargs):
581         return ''.join(reader.readlines(**kwargs))
582
583
584 class ArvadosFileTestCase(unittest.TestCase):
585     def datetime_to_hex(self, dt):
586         return hex(int(time.mktime(dt.timetuple())))[2:]
587
588     def test_permission_expired(self):
589         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
590         now = datetime.datetime.now()
591         a_week_ago = now - datetime.timedelta(days=7)
592         a_month_ago = now - datetime.timedelta(days=30)
593         a_week_from_now = now + datetime.timedelta(days=7)
594         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
595             self.assertFalse(c.find('count.txt').permission_expired())
596         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
597             f = c.find('count.txt')
598             self.assertTrue(f.permission_expired())
599             self.assertTrue(f.permission_expired(a_week_from_now))
600             self.assertFalse(f.permission_expired(a_month_ago))
601
602
603 class BlockManagerTest(unittest.TestCase):
604     def test_bufferblock_append(self):
605         keep = ArvadosFileWriterTestCase.MockKeep({})
606         with arvados.arvfile._BlockManager(keep) as blockmanager:
607             bufferblock = blockmanager.alloc_bufferblock()
608             bufferblock.append("foo")
609
610             self.assertEqual(bufferblock.size(), 3)
611             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
612             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
613
614             bufferblock.append("bar")
615
616             self.assertEqual(bufferblock.size(), 6)
617             self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
618             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
619
620             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
621             with self.assertRaises(arvados.errors.AssertionError):
622                 bufferblock.append("bar")
623
624     def test_bufferblock_dup(self):
625         keep = ArvadosFileWriterTestCase.MockKeep({})
626         with arvados.arvfile._BlockManager(keep) as blockmanager:
627             bufferblock = blockmanager.alloc_bufferblock()
628             bufferblock.append("foo")
629
630             self.assertEqual(bufferblock.size(), 3)
631             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
632             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
633             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
634
635             bufferblock2 = blockmanager.dup_block(bufferblock, None)
636             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
637
638             bufferblock2.append("bar")
639
640             self.assertEqual(bufferblock2.size(), 6)
641             self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
642             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
643
644             self.assertEqual(bufferblock.size(), 3)
645             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
646             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
647
648     def test_bufferblock_get(self):
649         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
650         with arvados.arvfile._BlockManager(keep) as blockmanager:
651             bufferblock = blockmanager.alloc_bufferblock()
652             bufferblock.append("foo")
653
654             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
655             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
656
657     def test_bufferblock_commit(self):
658         mockkeep = mock.MagicMock()
659         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
660             bufferblock = blockmanager.alloc_bufferblock()
661             bufferblock.owner = mock.MagicMock()
662             def flush(sync=None):
663                 blockmanager.commit_bufferblock(bufferblock, sync)
664             bufferblock.owner.flush.side_effect = flush
665             bufferblock.append("foo")
666             blockmanager.commit_all()
667             self.assertTrue(bufferblock.owner.flush.called)
668             self.assertTrue(mockkeep.put.called)
669             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
670             self.assertIsNone(bufferblock.buffer_view)
671
672     def test_bufferblock_commit_pending(self):
673         # Test for bug #7225
674         mockkeep = mock.MagicMock()
675         mockkeep.put.side_effect = lambda x: time.sleep(1)
676         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
677             bufferblock = blockmanager.alloc_bufferblock()
678             bufferblock.append("foo")
679
680             blockmanager.commit_bufferblock(bufferblock, False)
681             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
682
683             blockmanager.commit_bufferblock(bufferblock, True)
684             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
685
686
687     def test_bufferblock_commit_with_error(self):
688         mockkeep = mock.MagicMock()
689         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
690         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
691             bufferblock = blockmanager.alloc_bufferblock()
692             bufferblock.owner = mock.MagicMock()
693             def flush(sync=None):
694                 blockmanager.commit_bufferblock(bufferblock, sync)
695             bufferblock.owner.flush.side_effect = flush
696             bufferblock.append("foo")
697             with self.assertRaises(arvados.errors.KeepWriteError) as err:
698                 blockmanager.commit_all()
699             self.assertTrue(bufferblock.owner.flush.called)
700             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
701             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)