10671: Merge branch 'master' into 10671-pipeline-instance-finish-time
[arvados.git] / sdk / python / tests / test_arvfile.py
1 #!/usr/bin/env python
2
3 import bz2
4 import datetime
5 import gzip
6 import io
7 import mock
8 import os
9 import unittest
10 import time
11
12 import arvados
13 from arvados._ranges import Range
14 from arvados.keep import KeepLocator
15 from arvados.collection import Collection, CollectionReader
16 from arvados.arvfile import ArvadosFile, ArvadosFileReader
17
18 import arvados_testutil as tutil
19 from test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
20
21 class ArvadosFileWriterTestCase(unittest.TestCase):
22     class MockKeep(object):
23         def __init__(self, blocks):
24             self.blocks = blocks
25             self.requests = []
26         def get(self, locator, num_retries=0):
27             self.requests.append(locator)
28             return self.blocks.get(locator)
29         def get_from_cache(self, locator):
30             self.requests.append(locator)
31             return self.blocks.get(locator)
32         def put(self, data, num_retries=None, copies=None):
33             pdh = tutil.str_keep_locator(data)
34             self.blocks[pdh] = str(data)
35             return pdh
36
37     class MockApi(object):
38         def __init__(self, b, r):
39             self.body = b
40             self.response = r
41             self._schema = ArvadosFileWriterTestCase.MockApi.MockSchema()
42             self._rootDesc = {}
43         class MockSchema(object):
44             def __init__(self):
45                 self.schemas = {'Collection': {'properties': {'replication_desired': {'type':'integer'}}}}
46         class MockCollections(object):
47             def __init__(self, b, r):
48                 self.body = b
49                 self.response = r
50             class Execute(object):
51                 def __init__(self, r):
52                     self.response = r
53                 def execute(self, num_retries=None):
54                     return self.response
55             def create(self, ensure_unique_name=False, body=None):
56                 if body != self.body:
57                     raise Exception("Body %s does not match expectation %s" % (body, self.body))
58                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
59             def update(self, uuid=None, body=None):
60                 return ArvadosFileWriterTestCase.MockApi.MockCollections.Execute(self.response)
61         def collections(self):
62             return ArvadosFileWriterTestCase.MockApi.MockCollections(self.body, self.response)
63
64
65     def test_truncate(self):
66         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
67         api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate",
68                                                  "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
69                                                  "replication_desired":None},
70                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
71                                                  "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n"})
72         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
73                              api_client=api, keep_client=keep) as c:
74             writer = c.open("count.txt", "r+")
75             self.assertEqual(writer.size(), 10)
76             self.assertEqual("0123456789", writer.read(12))
77
78             writer.truncate(8)
79
80             # Make sure reading off the end doesn't break
81             self.assertEqual("", writer.read(12))
82
83             self.assertEqual(writer.size(), 8)
84             writer.seek(0, os.SEEK_SET)
85             self.assertEqual("01234567", writer.read(12))
86
87             self.assertIsNone(c.manifest_locator())
88             self.assertTrue(c.modified())
89             c.save_new("test_truncate")
90             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
91             self.assertFalse(c.modified())
92
93     def test_write_to_end(self):
94         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
95         api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
96                                                  "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
97                                                  "replication_desired":None},
98                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
99                                                  "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n"})
100         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
101                              api_client=api, keep_client=keep) as c:
102             writer = c.open("count.txt", "r+")
103             self.assertEqual(writer.size(), 10)
104
105             writer.seek(5, os.SEEK_SET)
106             self.assertEqual("56789", writer.read(8))
107
108             writer.seek(10, os.SEEK_SET)
109             writer.write("foo")
110             self.assertEqual(writer.size(), 13)
111
112             writer.seek(5, os.SEEK_SET)
113             self.assertEqual("56789foo", writer.read(8))
114
115             self.assertIsNone(c.manifest_locator())
116             self.assertTrue(c.modified())
117             self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
118
119             c.save_new("test_append")
120             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
121             self.assertFalse(c.modified())
122             self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
123
124
125     def test_append(self):
126         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
127         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
128         writer = c.open("count.txt", "a+")
129         self.assertEqual(writer.read(20), "0123456789")
130         writer.seek(0, os.SEEK_SET)
131
132         writer.write("hello")
133         self.assertEqual(writer.read(20), "0123456789hello")
134         writer.seek(0, os.SEEK_SET)
135
136         writer.write("world")
137         self.assertEqual(writer.read(20), "0123456789helloworld")
138
139         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
140
141     def test_write_at_beginning(self):
142         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
143         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
144                              keep_client=keep) as c:
145             writer = c.open("count.txt", "r+")
146             self.assertEqual("0123456789", writer.readfrom(0, 13))
147             writer.seek(0, os.SEEK_SET)
148             writer.write("foo")
149             self.assertEqual(writer.size(), 10)
150             self.assertEqual("foo3456789", writer.readfrom(0, 13))
151             self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
152
153     def test_write_empty(self):
154         keep = ArvadosFileWriterTestCase.MockKeep({})
155         with Collection(keep_client=keep) as c:
156             writer = c.open("count.txt", "w")
157             self.assertEqual(writer.size(), 0)
158             self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
159
160     def test_save_manifest_text(self):
161         keep = ArvadosFileWriterTestCase.MockKeep({})
162         with Collection(keep_client=keep) as c:
163             writer = c.open("count.txt", "w")
164             writer.write("0123456789")
165             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
166             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
167
168             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
169             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
170
171     def test_get_manifest_text_commits(self):
172          keep = ArvadosFileWriterTestCase.MockKeep({})
173          with Collection(keep_client=keep) as c:
174              writer = c.open("count.txt", "w")
175              writer.write("0123456789")
176              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
177              self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
178              self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
179              self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
180
181
182     def test_write_in_middle(self):
183         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
184         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
185                              keep_client=keep) as c:
186             writer = c.open("count.txt", "r+")
187             self.assertEqual("0123456789", writer.readfrom(0, 13))
188             writer.seek(3, os.SEEK_SET)
189             writer.write("foo")
190             self.assertEqual(writer.size(), 10)
191             self.assertEqual("012foo6789", writer.readfrom(0, 13))
192             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
193
194     def test_write_at_end(self):
195         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
196         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
197                              keep_client=keep) as c:
198             writer = c.open("count.txt", "r+")
199             self.assertEqual("0123456789", writer.readfrom(0, 13))
200             writer.seek(7, os.SEEK_SET)
201             writer.write("foo")
202             self.assertEqual(writer.size(), 10)
203             self.assertEqual("0123456foo", writer.readfrom(0, 13))
204             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
205
206     def test_write_across_segment_boundary(self):
207         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
208         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
209                              keep_client=keep) as c:
210             writer = c.open("count.txt", "r+")
211             self.assertEqual("012345678901234", writer.readfrom(0, 15))
212             writer.seek(7, os.SEEK_SET)
213             writer.write("foobar")
214             self.assertEqual(writer.size(), 20)
215             self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
216             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
217
218     def test_write_across_several_segments(self):
219         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
220         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
221                              keep_client=keep) as c:
222             writer = c.open("count.txt", "r+")
223             self.assertEqual("012301230123", writer.readfrom(0, 15))
224             writer.seek(2, os.SEEK_SET)
225             writer.write("abcdefg")
226             self.assertEqual(writer.size(), 12)
227             self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
228             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
229
230     def test_write_large(self):
231         keep = ArvadosFileWriterTestCase.MockKeep({})
232         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
233                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
234                                                  "replication_desired":None},
235                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
236                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n"})
237         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
238                              api_client=api, keep_client=keep) as c:
239             writer = c.open("count.txt", "r+")
240             text = "0123456789" * 100
241             for b in xrange(0, 100000):
242                 writer.write(text)
243             self.assertEqual(writer.size(), 100000000)
244
245             self.assertIsNone(c.manifest_locator())
246             self.assertTrue(c.modified())
247             c.save_new("test_write_large")
248             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
249             self.assertFalse(c.modified())
250
251
252     def test_large_write(self):
253         keep = ArvadosFileWriterTestCase.MockKeep({})
254         api = ArvadosFileWriterTestCase.MockApi({}, {})
255         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
256                              api_client=api, keep_client=keep) as c:
257             writer = c.open("count.txt", "r+")
258             self.assertEqual(writer.size(), 0)
259
260             text = "0123456789"
261             writer.write(text)
262             text = "0123456789" * 9999999
263             writer.write(text)
264             self.assertEqual(writer.size(), 100000000)
265
266             self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
267
268     def test_rewrite_on_empty_file(self):
269         keep = ArvadosFileWriterTestCase.MockKeep({})
270         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
271                              keep_client=keep) as c:
272             writer = c.open("count.txt", "r+")
273             for b in xrange(0, 10):
274                 writer.seek(0, os.SEEK_SET)
275                 writer.write("0123456789")
276
277             self.assertEqual(writer.size(), 10)
278             self.assertEqual("0123456789", writer.readfrom(0, 20))
279             self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
280             writer.flush()
281             self.assertEqual(writer.size(), 10)
282             self.assertEqual("0123456789", writer.readfrom(0, 20))
283             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
284
285     def test_rewrite_append_existing_file(self):
286         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
287         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
288                              keep_client=keep) as c:
289             writer = c.open("count.txt", "r+")
290             for b in xrange(0, 10):
291                 writer.seek(10, os.SEEK_SET)
292                 writer.write("abcdefghij")
293
294             self.assertEqual(writer.size(), 20)
295             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
296             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
297
298             writer.arvadosfile.flush()
299             self.assertEqual(writer.size(), 20)
300             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
301             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
302
303     def test_rewrite_over_existing_file(self):
304         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
305         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
306                              keep_client=keep) as c:
307             writer = c.open("count.txt", "r+")
308             for b in xrange(0, 10):
309                 writer.seek(5, os.SEEK_SET)
310                 writer.write("abcdefghij")
311
312             self.assertEqual(writer.size(), 15)
313             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
314             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
315
316             writer.arvadosfile.flush()
317
318             self.assertEqual(writer.size(), 15)
319             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
320             self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
321
322     def test_write_large_rewrite(self):
323         keep = ArvadosFileWriterTestCase.MockKeep({})
324         api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large",
325                                                  "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
326                                                  "replication_desired":None},
327                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
328                                                  "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n"})
329         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
330                              api_client=api, keep_client=keep) as c:
331             writer = c.open("count.txt", "r+")
332             text = ''.join(["0123456789" for a in xrange(0, 100)])
333             for b in xrange(0, 100000):
334                 writer.write(text)
335             writer.seek(0, os.SEEK_SET)
336             writer.write("foo")
337             self.assertEqual(writer.size(), 100000000)
338
339             self.assertIsNone(c.manifest_locator())
340             self.assertTrue(c.modified())
341             c.save_new("test_write_large")
342             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
343             self.assertFalse(c.modified())
344
345     def test_create(self):
346         keep = ArvadosFileWriterTestCase.MockKeep({})
347         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
348                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
349                                                  "replication_desired":None},
350                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
351                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
352         with Collection(api_client=api, keep_client=keep) as c:
353             writer = c.open("count.txt", "w+")
354             self.assertEqual(writer.size(), 0)
355             writer.write("01234567")
356             self.assertEqual(writer.size(), 8)
357
358             self.assertIsNone(c.manifest_locator())
359             self.assertTrue(c.modified())
360             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
361             c.save_new("test_create")
362             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
363             self.assertFalse(c.modified())
364             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
365
366
367     def test_create_subdir(self):
368         keep = ArvadosFileWriterTestCase.MockKeep({})
369         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
370                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
371                                                  "replication_desired":None},
372                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
373                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
374         with Collection(api_client=api, keep_client=keep) as c:
375             self.assertIsNone(c.api_response())
376             writer = c.open("foo/bar/count.txt", "w+")
377             writer.write("01234567")
378             self.assertFalse(c.committed())
379             c.save_new("test_create")
380             self.assertTrue(c.committed())
381             self.assertEqual(c.api_response(), api.response)
382
383     def test_overwrite(self):
384         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
385         api = ArvadosFileWriterTestCase.MockApi({"name":"test_overwrite",
386                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
387                                                  "replication_desired":None},
388                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
389                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
390         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
391                              api_client=api, keep_client=keep) as c:
392             writer = c.open("count.txt", "w+")
393             self.assertEqual(writer.size(), 0)
394             writer.write("01234567")
395             self.assertEqual(writer.size(), 8)
396
397             self.assertIsNone(c.manifest_locator())
398             self.assertTrue(c.modified())
399             c.save_new("test_overwrite")
400             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
401             self.assertFalse(c.modified())
402
403     def test_file_not_found(self):
404         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
405             with self.assertRaises(IOError):
406                 writer = c.open("nocount.txt", "r")
407
408     def test_cannot_open_directory(self):
409         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
410             with self.assertRaises(IOError):
411                 writer = c.open(".", "r")
412
413     def test_create_multiple(self):
414         keep = ArvadosFileWriterTestCase.MockKeep({})
415         api = ArvadosFileWriterTestCase.MockApi({"name":"test_create_multiple",
416                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
417                                                  "replication_desired":None},
418                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
419                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n"})
420         with Collection(api_client=api, keep_client=keep) as c:
421             w1 = c.open("count1.txt", "w")
422             w2 = c.open("count2.txt", "w")
423             w1.write("01234567")
424             w2.write("abcdefgh")
425             self.assertEqual(w1.size(), 8)
426             self.assertEqual(w2.size(), 8)
427
428             self.assertIsNone(c.manifest_locator())
429             self.assertTrue(c.modified())
430             self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
431             c.save_new("test_create_multiple")
432             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
433             self.assertFalse(c.modified())
434             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
435
436
437 class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
438     class MockParent(object):
439         class MockBlockMgr(object):
440             def __init__(self, blocks, nocache):
441                 self.blocks = blocks
442                 self.nocache = nocache
443
444             def block_prefetch(self, loc):
445                 pass
446
447             def get_block_contents(self, loc, num_retries=0, cache_only=False):
448                 if self.nocache and cache_only:
449                     return None
450                 return self.blocks[loc]
451
452         def __init__(self, blocks, nocache):
453             self.blocks = blocks
454             self.nocache = nocache
455             self.lock = arvados.arvfile.NoopLock()
456
457         def root_collection(self):
458             return self
459
460         def _my_block_manager(self):
461             return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache)
462
463
464     def make_count_reader(self, nocache=False):
465         stream = []
466         n = 0
467         blocks = {}
468         for d in ['01234', '34567', '67890']:
469             loc = tutil.str_keep_locator(d)
470             blocks[loc] = d
471             stream.append(Range(loc, n, len(d)))
472             n += len(d)
473         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
474         return ArvadosFileReader(af)
475
476     def test_read_block_crossing_behavior(self):
477         # read() needs to return all the data requested if possible, even if it
478         # crosses uncached blocks: https://arvados.org/issues/5856
479         sfile = self.make_count_reader(nocache=True)
480         self.assertEqual('12345678', sfile.read(8))
481
482     def test_successive_reads(self):
483         # Override StreamFileReaderTestCase.test_successive_reads
484         sfile = self.make_count_reader(nocache=True)
485         self.assertEqual('1234', sfile.read(4))
486         self.assertEqual('5678', sfile.read(4))
487         self.assertEqual('9', sfile.read(4))
488         self.assertEqual('', sfile.read(4))
489
490     def test_tell_after_block_read(self):
491         # Override StreamFileReaderTestCase.test_tell_after_block_read
492         sfile = self.make_count_reader(nocache=True)
493         self.assertEqual('12345678', sfile.read(8))
494         self.assertEqual(8, sfile.tell())
495
496     def test_prefetch(self):
497         keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
498         with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
499             r = c.open("count.txt", "r")
500             self.assertEqual("0123", r.read(4))
501         self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
502         self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
503
504     def test__eq__from_manifest(self):
505         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
506             with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c2:
507                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
508                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
509
510     def test__eq__from_writes(self):
511         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
512             with Collection() as c2:
513                 f = c2.open("count1.txt", "w")
514                 f.write("0123456789")
515
516                 self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
517                 self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
518
519     def test__ne__(self):
520         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
521             with Collection() as c2:
522                 f = c2.open("count1.txt", "w")
523                 f.write("1234567890")
524
525                 self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
526                 self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
527
528
529 class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
530     def reader_for(self, coll_name, **kwargs):
531         stream = []
532         segments = []
533         n = 0
534         for d in self.manifest_for(coll_name).split():
535             try:
536                 k = KeepLocator(d)
537                 segments.append(Range(n, n, k.size))
538                 stream.append(Range(d, n, k.size))
539                 n += k.size
540             except ValueError:
541                 pass
542
543         blockmanager = arvados.arvfile._BlockManager(self.keep_client())
544         blockmanager.prefetch_enabled = False
545         col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
546         af = ArvadosFile(col, "test",
547                          stream=stream,
548                          segments=segments)
549         return ArvadosFileReader(af, **kwargs)
550
551     def read_for_test(self, reader, byte_count, **kwargs):
552         return reader.read(byte_count, **kwargs)
553
554
555 class ArvadosFileReadFromTestCase(ArvadosFileReadTestCase):
556     def read_for_test(self, reader, byte_count, **kwargs):
557         return reader.readfrom(0, byte_count, **kwargs)
558
559
560 class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
561     def read_for_test(self, reader, byte_count, **kwargs):
562         return ''.join(reader.readall(**kwargs))
563
564
565 class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
566     def read_for_test(self, reader, byte_count, **kwargs):
567         return ''.join(reader.readall_decompressed(**kwargs))
568
569
570 class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
571     def read_for_test(self, reader, byte_count, **kwargs):
572         return ''.join(reader.readlines(**kwargs))
573
574
575 class ArvadosFileTestCase(unittest.TestCase):
576     def datetime_to_hex(self, dt):
577         return hex(int(time.mktime(dt.timetuple())))[2:]
578
579     def test_permission_expired(self):
580         base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
581         now = datetime.datetime.now()
582         a_week_ago = now - datetime.timedelta(days=7)
583         a_month_ago = now - datetime.timedelta(days=30)
584         a_week_from_now = now + datetime.timedelta(days=7)
585         with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
586             self.assertFalse(c.find('count.txt').permission_expired())
587         with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
588             f = c.find('count.txt')
589             self.assertTrue(f.permission_expired())
590             self.assertTrue(f.permission_expired(a_week_from_now))
591             self.assertFalse(f.permission_expired(a_month_ago))
592
593
594 class BlockManagerTest(unittest.TestCase):
595     def test_bufferblock_append(self):
596         keep = ArvadosFileWriterTestCase.MockKeep({})
597         with arvados.arvfile._BlockManager(keep) as blockmanager:
598             bufferblock = blockmanager.alloc_bufferblock()
599             bufferblock.append("foo")
600
601             self.assertEqual(bufferblock.size(), 3)
602             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
603             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
604
605             bufferblock.append("bar")
606
607             self.assertEqual(bufferblock.size(), 6)
608             self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
609             self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
610
611             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
612             with self.assertRaises(arvados.errors.AssertionError):
613                 bufferblock.append("bar")
614
615     def test_bufferblock_dup(self):
616         keep = ArvadosFileWriterTestCase.MockKeep({})
617         with arvados.arvfile._BlockManager(keep) as blockmanager:
618             bufferblock = blockmanager.alloc_bufferblock()
619             bufferblock.append("foo")
620
621             self.assertEqual(bufferblock.size(), 3)
622             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
623             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
624             bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
625
626             bufferblock2 = blockmanager.dup_block(bufferblock, None)
627             self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
628
629             bufferblock2.append("bar")
630
631             self.assertEqual(bufferblock2.size(), 6)
632             self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
633             self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
634
635             self.assertEqual(bufferblock.size(), 3)
636             self.assertEqual(bufferblock.buffer_view[0:3], "foo")
637             self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
638
639     def test_bufferblock_get(self):
640         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
641         with arvados.arvfile._BlockManager(keep) as blockmanager:
642             bufferblock = blockmanager.alloc_bufferblock()
643             bufferblock.append("foo")
644
645             self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
646             self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
647
648     def test_bufferblock_commit(self):
649         mockkeep = mock.MagicMock()
650         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
651             bufferblock = blockmanager.alloc_bufferblock()
652             bufferblock.owner = mock.MagicMock()
653             def flush(sync=None):
654                 blockmanager.commit_bufferblock(bufferblock, sync)
655             bufferblock.owner.flush.side_effect = flush
656             bufferblock.append("foo")
657             blockmanager.commit_all()
658             self.assertTrue(bufferblock.owner.flush.called)
659             self.assertTrue(mockkeep.put.called)
660             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
661             self.assertIsNone(bufferblock.buffer_view)
662
663     def test_bufferblock_commit_pending(self):
664         # Test for bug #7225
665         mockkeep = mock.MagicMock()
666         mockkeep.put.side_effect = lambda x: time.sleep(1)
667         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
668             bufferblock = blockmanager.alloc_bufferblock()
669             bufferblock.append("foo")
670
671             blockmanager.commit_bufferblock(bufferblock, False)
672             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
673
674             blockmanager.commit_bufferblock(bufferblock, True)
675             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
676
677
678     def test_bufferblock_commit_with_error(self):
679         mockkeep = mock.MagicMock()
680         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
681         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
682             bufferblock = blockmanager.alloc_bufferblock()
683             bufferblock.owner = mock.MagicMock()
684             def flush(sync=None):
685                 blockmanager.commit_bufferblock(bufferblock, sync)
686             bufferblock.owner.flush.side_effect = flush
687             bufferblock.append("foo")
688             with self.assertRaises(arvados.errors.KeepWriteError) as err:
689                 blockmanager.commit_all()
690             self.assertTrue(bufferblock.owner.flush.called)
691             self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
692             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)