18870: Need to declare NODES as array
[arvados.git] / sdk / cwl / tests / test_pathmapper.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import functools
6 import mock
7 import sys
8 import unittest
9 import json
10 import logging
11 import os
12
13 import arvados
14 import arvados.keep
15 import arvados.collection
16 import arvados_cwl
17 import arvados_cwl.executor
18
19 from cwltool.pathmapper import MapperEnt
20 from .mock_discovery import get_rootDesc
21
22 from arvados_cwl.pathmapper import ArvPathMapper
23
24 def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None, packed=None):
25     pdh = "99999999999999999999999999999991+99"
26     for c in files:
27         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
28         c.fn = fnPattern % (pdh, os.path.basename(c.fn))
29
30 class TestPathmap(unittest.TestCase):
31     def setUp(self):
32         self.api = mock.MagicMock()
33         self.api._rootDesc = get_rootDesc()
34
35     def tearDown(self):
36         root_logger = logging.getLogger('')
37
38         # Remove existing RuntimeStatusLoggingHandlers if they exist
39         handlers = [h for h in root_logger.handlers if not isinstance(h, arvados_cwl.executor.RuntimeStatusLoggingHandler)]
40         root_logger.handlers = handlers
41
42     def test_keepref(self):
43         """Test direct keep references."""
44
45         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
46
47         p = ArvPathMapper(arvrunner, [{
48             "class": "File",
49             "location": "keep:99999999999999999999999999999991+99/hw.py"
50         }], "", "/test/%s", "/test/%s/%s")
51
52         self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
53                          p._pathmap)
54
55     @mock.patch("arvados.commands.run.uploadfiles")
56     @mock.patch("arvados.commands.run.statfile")
57     def test_upload(self, statfile, upl):
58         """Test pathmapper uploading files."""
59
60         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
61
62         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
63             st = arvados.commands.run.UploadFile("", "tests/hw.py")
64             return st
65
66         upl.side_effect = upload_mock
67         statfile.side_effect = statfile_mock
68
69         p = ArvPathMapper(arvrunner, [{
70             "class": "File",
71             "location": "file:tests/hw.py"
72         }], "", "/test/%s", "/test/%s/%s")
73
74         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
75                          p._pathmap)
76
77     @mock.patch("arvados.commands.run.uploadfiles")
78     @mock.patch("arvados.commands.run.statfile")
79     def test_statfile(self, statfile, upl):
80         """Test pathmapper handling ArvFile references."""
81         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
82
83         # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
84         # keep mount, so we can construct a direct reference directly without upload.
85         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
86             st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
87             return st
88
89         upl.side_effect = upload_mock
90         statfile.side_effect = statfile_mock
91
92         p = ArvPathMapper(arvrunner, [{
93             "class": "File",
94             "location": "file:tests/hw.py"
95         }], "", "/test/%s", "/test/%s/%s")
96
97         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
98                          p._pathmap)
99
100     @mock.patch("os.stat")
101     def test_missing_file(self, stat):
102         """Test pathmapper handling missing references."""
103         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
104
105         stat.side_effect = OSError(2, "No such file or directory")
106
107         with self.assertRaises(OSError):
108             p = ArvPathMapper(arvrunner, [{
109                 "class": "File",
110                 "location": "file:tests/hw.py"
111             }], "", "/test/%s", "/test/%s/%s")
112
113     def test_needs_new_collection(self):
114         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
115
116         # Plain file.  Don't need a new collection.
117         a = {
118             "class": "File",
119             "location": "keep:99999999999999999999999999999991+99/hw.py",
120             "basename": "hw.py"
121         }
122         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
123         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
124         self.assertFalse(p.needs_new_collection(a))
125
126         # A file that isn't in the pathmap (for some reason).  Need a new collection.
127         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
128         self.assertTrue(p.needs_new_collection(a))
129
130         # A file with a secondary file in the same collection.  Don't need
131         # a new collection.
132         a = {
133             "class": "File",
134             "location": "keep:99999999999999999999999999999991+99/hw.py",
135             "basename": "hw.py",
136             "secondaryFiles": [{
137                 "class": "File",
138                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
139                 "basename": "hw.pyc"
140             }]
141         }
142         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
143         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
144         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
145         self.assertFalse(p.needs_new_collection(a))
146
147         # Secondary file is in a different collection from the
148         # a new collectionprimary.  Need a new collection.
149         a = {
150             "class": "File",
151             "location": "keep:99999999999999999999999999999991+99/hw.py",
152             "basename": "hw.py",
153             "secondaryFiles": [{
154                 "class": "File",
155                 "location": "keep:99999999999999999999999999999992+99/hw.pyc",
156                 "basename": "hw.pyc"
157             }]
158         }
159         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
160         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
161         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
162         self.assertTrue(p.needs_new_collection(a))
163
164         # Secondary file should be staged to a different name than
165         # path in location.  Need a new collection.
166         a = {
167             "class": "File",
168             "location": "keep:99999999999999999999999999999991+99/hw.py",
169             "basename": "hw.py",
170             "secondaryFiles": [{
171                 "class": "File",
172                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
173                 "basename": "hw.other"
174             }]
175         }
176         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
177         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
178         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
179         self.assertTrue(p.needs_new_collection(a))
180
181         # Secondary file is a directory.  Do not need a new collection.
182         a = {
183             "class": "File",
184             "location": "keep:99999999999999999999999999999991+99/hw.py",
185             "basename": "hw.py",
186             "secondaryFiles": [{
187                 "class": "Directory",
188                 "location": "keep:99999999999999999999999999999991+99/hw",
189                 "basename": "hw",
190                 "listing": [{
191                     "class": "File",
192                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
193                     "basename": "h2"
194                 }]
195             }]
196         }
197         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
198         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
199         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
200         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
201         self.assertFalse(p.needs_new_collection(a))
202
203         # Secondary file is a renamed directory.  Need a new collection.
204         a = {
205             "class": "File",
206             "location": "keep:99999999999999999999999999999991+99/hw.py",
207             "basename": "hw.py",
208             "secondaryFiles": [{
209                 "class": "Directory",
210                 "location": "keep:99999999999999999999999999999991+99/hw",
211                 "basename": "wh",
212                 "listing": [{
213                     "class": "File",
214                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
215                     "basename": "h2"
216                 }]
217             }]
218         }
219         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
220         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
221         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
222         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
223         self.assertTrue(p.needs_new_collection(a))
224
225         # Secondary file is a file literal.  Need a new collection.
226         a = {
227             "class": "File",
228             "location": "keep:99999999999999999999999999999991+99/hw.py",
229             "basename": "hw.py",
230             "secondaryFiles": [{
231                 "class": "File",
232                 "location": "_:123",
233                 "basename": "hw.pyc",
234                 "contents": "123"
235             }]
236         }
237         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
238         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
239         p._pathmap["_:123"] = True
240         self.assertTrue(p.needs_new_collection(a))
241
242     def test_is_in_collection(self):
243         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
244         self.maxDiff = 1000000
245
246         cwd = os.getcwd()
247         p = ArvPathMapper(arvrunner, [{
248             "class": "File",
249             "location": "file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt"
250         }], "", "/test/%s", "/test/%s/%s")
251
252         self.assertEqual({"file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt": MapperEnt(resolved='keep:99999999999999999999999999999991+99/subdir/banana.txt', target='/test/99999999999999999999999999999991+99/subdir/banana.txt', type='File', staged=True)},
253                          p._pathmap)