18994: Add test case, correctly re-stage files when basename is changed.
[arvados.git] / sdk / cwl / tests / test_pathmapper.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import functools
6 import mock
7 import sys
8 import unittest
9 import json
10 import logging
11 import os
12
13 import arvados
14 import arvados.keep
15 import arvados.collection
16 import arvados_cwl
17 import arvados_cwl.executor
18
19 from cwltool.pathmapper import MapperEnt
20 from .mock_discovery import get_rootDesc
21
22 from arvados_cwl.pathmapper import ArvPathMapper
23
24 def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None, packed=None):
25     pdh = "99999999999999999999999999999991+99"
26     for c in files:
27         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
28         c.fn = fnPattern % (pdh, os.path.basename(c.fn))
29
30 class TestPathmap(unittest.TestCase):
31     def setUp(self):
32         self.api = mock.MagicMock()
33         self.api._rootDesc = get_rootDesc()
34
35     def test_keepref(self):
36         """Test direct keep references."""
37
38         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
39
40         p = ArvPathMapper(arvrunner, [{
41             "class": "File",
42             "location": "keep:99999999999999999999999999999991+99/hw.py"
43         }], "", "/test/%s", "/test/%s/%s")
44
45         self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
46                          p._pathmap)
47
48     @mock.patch("arvados.commands.run.uploadfiles")
49     @mock.patch("arvados.commands.run.statfile")
50     def test_upload(self, statfile, upl):
51         """Test pathmapper uploading files."""
52
53         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
54
55         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
56             st = arvados.commands.run.UploadFile("", "tests/hw.py")
57             return st
58
59         upl.side_effect = upload_mock
60         statfile.side_effect = statfile_mock
61
62         p = ArvPathMapper(arvrunner, [{
63             "class": "File",
64             "location": "file:tests/hw.py"
65         }], "", "/test/%s", "/test/%s/%s")
66
67         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
68                          p._pathmap)
69
70     @mock.patch("arvados.commands.run.uploadfiles")
71     @mock.patch("arvados.commands.run.statfile")
72     def test_statfile(self, statfile, upl):
73         """Test pathmapper handling ArvFile references."""
74         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
75
76         # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
77         # keep mount, so we can construct a direct reference directly without upload.
78         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
79             st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
80             return st
81
82         upl.side_effect = upload_mock
83         statfile.side_effect = statfile_mock
84
85         p = ArvPathMapper(arvrunner, [{
86             "class": "File",
87             "location": "file:tests/hw.py"
88         }], "", "/test/%s", "/test/%s/%s")
89
90         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
91                          p._pathmap)
92
93     @mock.patch("os.stat")
94     def test_missing_file(self, stat):
95         """Test pathmapper handling missing references."""
96         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
97
98         stat.side_effect = OSError(2, "No such file or directory")
99
100         with self.assertRaises(OSError):
101             p = ArvPathMapper(arvrunner, [{
102                 "class": "File",
103                 "location": "file:tests/hw.py"
104             }], "", "/test/%s", "/test/%s/%s")
105
106     def test_needs_new_collection(self):
107         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
108
109         # Plain file.  Don't need a new collection.
110         a = {
111             "class": "File",
112             "location": "keep:99999999999999999999999999999991+99/hw.py",
113             "basename": "hw.py"
114         }
115         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
116         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
117         self.assertFalse(p.needs_new_collection(a))
118
119         # A file that isn't in the pathmap (for some reason).  Need a new collection.
120         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
121         self.assertTrue(p.needs_new_collection(a))
122
123         # A file with a secondary file in the same collection.  Don't need
124         # a new collection.
125         a = {
126             "class": "File",
127             "location": "keep:99999999999999999999999999999991+99/hw.py",
128             "basename": "hw.py",
129             "secondaryFiles": [{
130                 "class": "File",
131                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
132                 "basename": "hw.pyc"
133             }]
134         }
135         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
136         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
137         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
138         self.assertFalse(p.needs_new_collection(a))
139
140         # Secondary file is in a different collection from the
141         # a new collectionprimary.  Need a new collection.
142         a = {
143             "class": "File",
144             "location": "keep:99999999999999999999999999999991+99/hw.py",
145             "basename": "hw.py",
146             "secondaryFiles": [{
147                 "class": "File",
148                 "location": "keep:99999999999999999999999999999992+99/hw.pyc",
149                 "basename": "hw.pyc"
150             }]
151         }
152         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
153         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
154         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
155         self.assertTrue(p.needs_new_collection(a))
156
157         # Secondary file should be staged to a different name than
158         # path in location.  Need a new collection.
159         a = {
160             "class": "File",
161             "location": "keep:99999999999999999999999999999991+99/hw.py",
162             "basename": "hw.py",
163             "secondaryFiles": [{
164                 "class": "File",
165                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
166                 "basename": "hw.other"
167             }]
168         }
169         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
170         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
171         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
172         self.assertTrue(p.needs_new_collection(a))
173
174         # Secondary file is a directory.  Do not need a new collection.
175         a = {
176             "class": "File",
177             "location": "keep:99999999999999999999999999999991+99/hw.py",
178             "basename": "hw.py",
179             "secondaryFiles": [{
180                 "class": "Directory",
181                 "location": "keep:99999999999999999999999999999991+99/hw",
182                 "basename": "hw",
183                 "listing": [{
184                     "class": "File",
185                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
186                     "basename": "h2"
187                 }]
188             }]
189         }
190         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
191         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
192         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
193         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
194         self.assertFalse(p.needs_new_collection(a))
195
196         # Secondary file is a renamed directory.  Need a new collection.
197         a = {
198             "class": "File",
199             "location": "keep:99999999999999999999999999999991+99/hw.py",
200             "basename": "hw.py",
201             "secondaryFiles": [{
202                 "class": "Directory",
203                 "location": "keep:99999999999999999999999999999991+99/hw",
204                 "basename": "wh",
205                 "listing": [{
206                     "class": "File",
207                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
208                     "basename": "h2"
209                 }]
210             }]
211         }
212         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
213         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
214         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
215         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
216         self.assertTrue(p.needs_new_collection(a))
217
218         # Secondary file is a file literal.  Need a new collection.
219         a = {
220             "class": "File",
221             "location": "keep:99999999999999999999999999999991+99/hw.py",
222             "basename": "hw.py",
223             "secondaryFiles": [{
224                 "class": "File",
225                 "location": "_:123",
226                 "basename": "hw.pyc",
227                 "contents": "123"
228             }]
229         }
230         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
231         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
232         p._pathmap["_:123"] = True
233         self.assertTrue(p.needs_new_collection(a))