14327: Don't create new collection if source has expected secondaryFiles
[arvados.git] / sdk / cwl / tests / test_pathmapper.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import functools
6 import mock
7 import sys
8 import unittest
9 import json
10 import logging
11 import os
12
13 import arvados
14 import arvados.keep
15 import arvados.collection
16 import arvados_cwl
17 import arvados_cwl.executor
18
19 from cwltool.pathmapper import MapperEnt
20 from .mock_discovery import get_rootDesc
21
22 from arvados_cwl.pathmapper import ArvPathMapper
23
24 def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None, collection=None, packed=None):
25     pdh = "99999999999999999999999999999991+99"
26     for c in files:
27         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
28         c.fn = fnPattern % (pdh, os.path.basename(c.fn))
29
30 class TestPathmap(unittest.TestCase):
31     def setUp(self):
32         self.api = mock.MagicMock()
33         self.api._rootDesc = get_rootDesc()
34
35     def test_keepref(self):
36         """Test direct keep references."""
37
38         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
39
40         p = ArvPathMapper(arvrunner, [{
41             "class": "File",
42             "location": "keep:99999999999999999999999999999991+99/hw.py"
43         }], "", "/test/%s", "/test/%s/%s")
44
45         self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
46                          p._pathmap)
47
48     @mock.patch("arvados.commands.run.uploadfiles")
49     @mock.patch("arvados.commands.run.statfile")
50     def test_upload(self, statfile, upl):
51         """Test pathmapper uploading files."""
52
53         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
54
55         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
56             st = arvados.commands.run.UploadFile("", "tests/hw.py")
57             return st
58
59         upl.side_effect = upload_mock
60         statfile.side_effect = statfile_mock
61
62         p = ArvPathMapper(arvrunner, [{
63             "class": "File",
64             "location": "file:tests/hw.py"
65         }], "", "/test/%s", "/test/%s/%s")
66
67         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
68                          p._pathmap)
69
70     @mock.patch("arvados.commands.run.uploadfiles")
71     @mock.patch("arvados.commands.run.statfile")
72     def test_statfile(self, statfile, upl):
73         """Test pathmapper handling ArvFile references."""
74         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
75
76         # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
77         # keep mount, so we can construct a direct reference directly without upload.
78         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
79             st = arvados.commands.run.ArvFile("", fnPattern % ("99999999999999999999999999999991+99", "hw.py"))
80             return st
81
82         upl.side_effect = upload_mock
83         statfile.side_effect = statfile_mock
84
85         p = ArvPathMapper(arvrunner, [{
86             "class": "File",
87             "location": "file:tests/hw.py"
88         }], "", "/test/%s", "/test/%s/%s")
89
90         self.assertEqual({'file:tests/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)},
91                          p._pathmap)
92
93     @mock.patch("os.stat")
94     def test_missing_file(self, stat):
95         """Test pathmapper handling missing references."""
96         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
97
98         stat.side_effect = OSError(2, "No such file or directory")
99
100         with self.assertRaises(OSError):
101             p = ArvPathMapper(arvrunner, [{
102                 "class": "File",
103                 "location": "file:tests/hw.py"
104             }], "", "/test/%s", "/test/%s/%s")
105
106     def test_needs_new_collection(self):
107         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
108         a = {
109             "class": "File",
110             "location": "keep:99999999999999999999999999999991+99/hw.py",
111             "basename": "hw.py"
112         }
113         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
114         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
115         self.assertFalse(p.needs_new_collection(a))
116
117         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
118         self.assertTrue(p.needs_new_collection(a))
119
120         a = {
121             "class": "File",
122             "location": "keep:99999999999999999999999999999991+99/hw.py",
123             "basename": "hw.py",
124             "secondaryFiles": [{
125                 "class": "File",
126                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
127                 "basename": "hw.pyc"
128             }]
129         }
130         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
131         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
132         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
133         self.assertFalse(p.needs_new_collection(a))
134
135         a = {
136             "class": "File",
137             "location": "keep:99999999999999999999999999999991+99/hw.py",
138             "basename": "hw.py",
139             "secondaryFiles": [{
140                 "class": "File",
141                 "location": "keep:99999999999999999999999999999992+99/hw.pyc",
142                 "basename": "hw.pyc"
143             }]
144         }
145         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
146         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
147         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
148         self.assertTrue(p.needs_new_collection(a))
149
150         a = {
151             "class": "File",
152             "location": "keep:99999999999999999999999999999991+99/hw.py",
153             "basename": "hw.py",
154             "secondaryFiles": [{
155                 "class": "File",
156                 "location": "keep:99999999999999999999999999999991+99/hw.pyc",
157                 "basename": "hw.other"
158             }]
159         }
160         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
161         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
162         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
163         self.assertTrue(p.needs_new_collection(a))
164
165         a = {
166             "class": "File",
167             "location": "keep:99999999999999999999999999999991+99/hw.py",
168             "basename": "hw.py",
169             "secondaryFiles": [{
170                 "class": "Directory",
171                 "location": "keep:99999999999999999999999999999991+99/hw",
172                 "basename": "hw",
173                 "listing": [{
174                     "class": "File",
175                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
176                     "basename": "h2"
177                 }]
178             }]
179         }
180         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
181         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
182         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
183         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
184         self.assertFalse(p.needs_new_collection(a))
185
186         a = {
187             "class": "File",
188             "location": "keep:99999999999999999999999999999991+99/hw.py",
189             "basename": "hw.py",
190             "secondaryFiles": [{
191                 "class": "Directory",
192                 "location": "keep:99999999999999999999999999999991+99/hw",
193                 "basename": "wh",
194                 "listing": [{
195                     "class": "File",
196                     "location": "keep:99999999999999999999999999999991+99/hw/h2",
197                     "basename": "h2"
198                 }]
199             }]
200         }
201         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
202         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
203         p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
204         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
205         self.assertTrue(p.needs_new_collection(a))
206
207         a = {
208             "class": "File",
209             "location": "keep:99999999999999999999999999999991+99/hw.py",
210             "basename": "hw.py",
211             "secondaryFiles": [{
212                 "class": "File",
213                 "location": "_:123",
214                 "basename": "hw.pyc",
215                 "contents": "123"
216             }]
217         }
218         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
219         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
220         p._pathmap["_:123"] = True
221         self.assertTrue(p.needs_new_collection(a))