Merge branch '21933-deps-upgrade'
[arvados.git] / sdk / cwl / tests / test_pathmapper.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import functools
6 import sys
7 import unittest
8 import json
9 import logging
10 import os
11
12 from unittest import mock
13
14 import arvados
15 import arvados.keep
16 import arvados.collection
17 import arvados_cwl
18 import arvados_cwl.executor
19
20 from cwltool.pathmapper import MapperEnt
21 from .mock_discovery import get_rootDesc
22
23 from arvados_cwl.pathmapper import ArvPathMapper
24
25 def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None, packed=None):
26     pdh = "99999999999999999999999999999991+99"
27     for c in files:
28         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
29         c.fn = fnPattern % (pdh, os.path.basename(c.fn))
30
31 class TestPathmap(unittest.TestCase):
32     def setUp(self):
33         self.api = mock.MagicMock()
34         self.api._rootDesc = get_rootDesc()
35
36     def tearDown(self):
37         root_logger = logging.getLogger('')
38
39         # Remove existing RuntimeStatusLoggingHandlers if they exist
40         handlers = [h for h in root_logger.handlers if not isinstance(h, arvados_cwl.executor.RuntimeStatusLoggingHandler)]
41         root_logger.handlers = handlers
42
43     def test_keepref(self):
44         """Test direct keep references."""
45
46         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
47
48         p = ArvPathMapper(arvrunner, [{
49             "class": "File",
50             "location": "keep:99999999999999999999999999999991+99/hw.py"
51         }], "", "/test/%s", "/test/%s/%s")
52
53         self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
54                          p._pathmap)
55
56     @mock.patch("arvados.commands.run.uploadfiles")
57     @mock.patch("arvados.commands.run.statfile")
58     def test_upload(self, statfile, upl):
59         """Test pathmapper uploading files."""
60
61         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
62
63         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
64             st = arvados.commands.run.UploadFile("", "tests/hw.py")
65             return st
66
67         upl.side_effect = upload_mock
68         statfile.side_effect = statfile_mock
69
70         p = ArvPathMapper(arvrunner, [{
71             "class": "File",
72             "location": "file:tests/hw.py"
73         }], "", "/test/%s", "/test/%s/%s")
74
75         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
76                          p._pathmap)
77
78     @mock.patch("arvados.commands.run.uploadfiles")
79     @mock.patch("arvados.commands.run.statfile")
80     def test_statfile(self, statfile, upl):
81         """Test pathmapper handling ArvFile references."""
82         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
83
84         # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
85         # keep mount, so we can construct a direct reference directly without upload.
86         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
87             st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
88             return st
89
90         upl.side_effect = upload_mock
91         statfile.side_effect = statfile_mock
92
93         p = ArvPathMapper(arvrunner, [{
94             "class": "File",
95             "location": "file:tests/hw.py"
96         }], "", "/test/%s", "/test/%s/%s")
97
98         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
99                          p._pathmap)
100
101     @mock.patch("os.stat")
102     def test_missing_file(self, stat):
103         """Test pathmapper handling missing references."""
104         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
105
106         stat.side_effect = OSError(2, "No such file or directory")
107
108         with self.assertRaises(OSError):
109             p = ArvPathMapper(arvrunner, [{
110                 "class": "File",
111                 "location": "file:tests/hw.py"
112             }], "", "/test/%s", "/test/%s/%s")
113
114     def test_needs_new_collection(self):
115         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
116
117         # Plain file.  Don't need a new collection.
118         a = {
119             "class": "File",
120             "location": "keep:99999999999999999999999999999991+99/hw.py",
121             "basename": "hw.py"
122         }
123         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
124         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
125         self.assertFalse(p.needs_new_collection(a))
126
127         # A file that isn't in the pathmap (for some reason).  Need a new collection.
128         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
129         self.assertTrue(p.needs_new_collection(a))
130
131         # A file with a secondary file in the same collection.  Don't need
132         # a new collection.
133         a = {
134             "class": "File",
135             "location": "keep:99999999999999999999999999999991+99/hw.py",
136             "basename": "hw.py",
137             "secondaryFiles": [{
138                 "class": "File",
139                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
140                 "basename": "hw.pyc"
141             }]
142         }
143         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
144         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
145         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
146         self.assertFalse(p.needs_new_collection(a))
147
148         # Secondary file is in a different collection from the
149         # a new collectionprimary.  Need a new collection.
150         a = {
151             "class": "File",
152             "location": "keep:99999999999999999999999999999991+99/hw.py",
153             "basename": "hw.py",
154             "secondaryFiles": [{
155                 "class": "File",
156                 "location": "keep:99999999999999999999999999999992+99/hw.pyc",
157                 "basename": "hw.pyc"
158             }]
159         }
160         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
161         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
162         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
163         self.assertTrue(p.needs_new_collection(a))
164
165         # Secondary file should be staged to a different name than
166         # path in location.  Need a new collection.
167         a = {
168             "class": "File",
169             "location": "keep:99999999999999999999999999999991+99/hw.py",
170             "basename": "hw.py",
171             "secondaryFiles": [{
172                 "class": "File",
173                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
174                 "basename": "hw.other"
175             }]
176         }
177         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
178         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
179         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
180         self.assertTrue(p.needs_new_collection(a))
181
182         # Secondary file is a directory.  Do not need a new collection.
183         a = {
184             "class": "File",
185             "location": "keep:99999999999999999999999999999991+99/hw.py",
186             "basename": "hw.py",
187             "secondaryFiles": [{
188                 "class": "Directory",
189                 "location": "keep:99999999999999999999999999999991+99/hw",
190                 "basename": "hw",
191                 "listing": [{
192                     "class": "File",
193                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
194                     "basename": "h2"
195                 }]
196             }]
197         }
198         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
199         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
200         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
201         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
202         self.assertFalse(p.needs_new_collection(a))
203
204         # Secondary file is a renamed directory.  Need a new collection.
205         a = {
206             "class": "File",
207             "location": "keep:99999999999999999999999999999991+99/hw.py",
208             "basename": "hw.py",
209             "secondaryFiles": [{
210                 "class": "Directory",
211                 "location": "keep:99999999999999999999999999999991+99/hw",
212                 "basename": "wh",
213                 "listing": [{
214                     "class": "File",
215                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
216                     "basename": "h2"
217                 }]
218             }]
219         }
220         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
221         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
222         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
223         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
224         self.assertTrue(p.needs_new_collection(a))
225
226         # Secondary file is a file literal.  Need a new collection.
227         a = {
228             "class": "File",
229             "location": "keep:99999999999999999999999999999991+99/hw.py",
230             "basename": "hw.py",
231             "secondaryFiles": [{
232                 "class": "File",
233                 "location": "_:123",
234                 "basename": "hw.pyc",
235                 "contents": "123"
236             }]
237         }
238         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
239         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
240         p._pathmap["_:123"] = True
241         self.assertTrue(p.needs_new_collection(a))
242
243     def test_is_in_collection(self):
244         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
245         self.maxDiff = 1000000
246
247         cwd = os.getcwd()
248         p = ArvPathMapper(arvrunner, [{
249             "class": "File",
250             "location": "file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt"
251         }], "", "/test/%s", "/test/%s/%s")
252
253         self.assertEqual({"file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt": MapperEnt(resolved='keep:99999999999999999999999999999991+99/subdir/banana.txt', target='/test/99999999999999999999999999999991+99/subdir/banana.txt', type='File', staged=True)},
254                          p._pathmap)