Merge branch '7167-keep-rsync' of git.curoverse.com:arvados into 7167-keep-rsync
[arvados.git] / services / api / test / unit / collection_test.rb
1 require 'test_helper'
2
3 class CollectionTest < ActiveSupport::TestCase
4   def create_collection name, enc=nil
5     txt = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:#{name}.txt\n"
6     txt.force_encoding(enc) if enc
7     return Collection.create(manifest_text: txt)
8   end
9
10   test 'accept ASCII manifest_text' do
11     act_as_system_user do
12       c = create_collection 'foo', Encoding::US_ASCII
13       assert c.valid?
14     end
15   end
16
17   test 'accept UTF-8 manifest_text' do
18     act_as_system_user do
19       c = create_collection "f\xc3\x98\xc3\x98", Encoding::UTF_8
20       assert c.valid?
21     end
22   end
23
24   test 'refuse manifest_text with invalid UTF-8 byte sequence' do
25     act_as_system_user do
26       c = create_collection "f\xc8o", Encoding::UTF_8
27       assert !c.valid?
28       assert_equal [:manifest_text], c.errors.messages.keys
29       assert_match /UTF-8/, c.errors.messages[:manifest_text].first
30     end
31   end
32
33   test 'refuse manifest_text with non-UTF-8 encoding' do
34     act_as_system_user do
35       c = create_collection "f\xc8o", Encoding::ASCII_8BIT
36       assert !c.valid?
37       assert_equal [:manifest_text], c.errors.messages.keys
38       assert_match /UTF-8/, c.errors.messages[:manifest_text].first
39     end
40   end
41
42   [
43     ". 0:0:foo.txt",
44     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
45     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
46     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
47   ].each do |manifest_text|
48     test "create collection with invalid manifest text #{manifest_text} and expect error" do
49       act_as_system_user do
50         c = Collection.create(manifest_text: manifest_text)
51         assert !c.valid?
52       end
53     end
54   end
55
56   [
57     nil,
58     "",
59     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
60   ].each do |manifest_text|
61     test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
62       act_as_system_user do
63         c = Collection.create(manifest_text: manifest_text)
64         assert c.valid?
65       end
66     end
67   end
68
69   [
70     ". 0:0:foo.txt",
71     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
72     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
73     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
74   ].each do |manifest_text|
75     test "update collection with invalid manifest text #{manifest_text} and expect error" do
76       act_as_system_user do
77         c = create_collection 'foo', Encoding::US_ASCII
78         assert c.valid?
79
80         c.update_attribute 'manifest_text', manifest_text
81         assert !c.valid?
82       end
83     end
84   end
85
86   [
87     nil,
88     "",
89     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
90   ].each do |manifest_text|
91     test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
92       act_as_system_user do
93         c = create_collection 'foo', Encoding::US_ASCII
94         assert c.valid?
95
96         c.update_attribute 'manifest_text', manifest_text
97         assert c.valid?
98       end
99     end
100   end
101
102   test 'create and update collection and verify file_names' do
103     act_as_system_user do
104       c = create_collection 'foo', Encoding::US_ASCII
105       assert c.valid?
106       created_file_names = c.file_names
107       assert created_file_names
108       assert_match /foo.txt/, c.file_names
109
110       c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
111       assert_not_equal created_file_names, c.file_names
112       assert_match /foo2.txt/, c.file_names
113     end
114   end
115
116   [
117     [2**8, false],
118     [2**18, true],
119   ].each do |manifest_size, allow_truncate|
120     test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
121           and not expect exceptions even on very large manifest texts" do
122       # file_names has a max size, hence there will be no errors even on large manifests
123       act_as_system_user do
124         manifest_text = ''
125         index = 0
126         while manifest_text.length < manifest_size
127           manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
128           index += 1
129         end
130         manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
131         c = Collection.create(manifest_text: manifest_text)
132
133         assert c.valid?
134         assert c.file_names
135         assert_match /veryverylongfilename0000000000001.txt/, c.file_names
136         assert_match /veryverylongfilename0000000000002.txt/, c.file_names
137         if not allow_truncate
138           assert_match /veryverylastfilename/, c.file_names
139           assert_match /laststreamname/, c.file_names
140         end
141       end
142     end
143   end
144
145   test "full text search for collections" do
146     # file_names column does not get populated when fixtures are loaded, hence setup test data
147     act_as_system_user do
148       Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
149       Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
150       Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
151     end
152
153     [
154       ['foo', true],
155       ['foo bar', false],                     # no collection matching both
156       ['foo&bar', false],                     # no collection matching both
157       ['foo|bar', true],                      # works only no spaces between the words
158       ['Gnu public', true],                   # both prefixes found, though not consecutively
159       ['Gnu&public', true],                   # both prefixes found, though not consecutively
160       ['file4', true],                        # prefix match
161       ['file4.txt', true],                    # whole string match
162       ['filex', false],                       # no such prefix
163       ['subdir', true],                       # prefix matches
164       ['subdir2', true],
165       ['subdir2/', true],
166       ['subdir2/subdir3', true],
167       ['subdir2/subdir3/subdir4', true],
168       ['subdir2 file4', true],                # look for both prefixes
169       ['subdir4', false],                     # not a prefix match
170     ].each do |search_filter, expect_results|
171       search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
172       results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
173                                  "#{search_filters}")
174       if expect_results
175         refute_empty results
176       else
177         assert_empty results
178       end
179     end
180   end
181
182   test 'portable data hash with missing size hints' do
183     [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
184       ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
185      [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
186       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
187      [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
188       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
189     ].each do |unportable, portable|
190       c = Collection.new(manifest_text: unportable)
191       assert c.valid?
192       assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
193                    c.portable_data_hash)
194     end
195   end
196
197   pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
198   pdhmd5 = Digest::MD5.hexdigest pdhmanifest
199   [[true, nil],
200    [true, pdhmd5],
201    [true, pdhmd5+'+12345'],
202    [true, pdhmd5+'+'+pdhmanifest.length.to_s],
203    [true, pdhmd5+'+12345+Foo'],
204    [true, pdhmd5+'+Foo'],
205    [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
206    [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
207    [false, pdhmd5[0..30]],
208    [false, pdhmd5[0..30]+'z'],
209    [false, pdhmd5[0..24]+'000000000'],
210    [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
211     test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
212       c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
213       assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
214     end
215   end
216
217   [0, 2, 4, nil].each do |ask|
218     test "set replication_desired to #{ask.inspect}" do
219       Rails.configuration.default_collection_replication = 2
220       act_as_user users(:active) do
221         c = collections(:replication_undesired_unconfirmed)
222         c.update_attributes replication_desired: ask
223         assert_equal ask, c.replication_desired
224       end
225     end
226   end
227
228   test "replication_confirmed* can be set by admin user" do
229     c = collections(:replication_desired_2_unconfirmed)
230     act_as_user users(:admin) do
231       assert c.update_attributes(replication_confirmed: 2,
232                                  replication_confirmed_at: Time.now)
233     end
234   end
235
236   test "replication_confirmed* cannot be set by non-admin user" do
237     act_as_user users(:active) do
238       c = collections(:replication_desired_2_unconfirmed)
239       # Cannot set just one at a time.
240       assert_raise ArvadosModel::PermissionDeniedError do
241         c.update_attributes replication_confirmed: 1
242       end
243       assert_raise ArvadosModel::PermissionDeniedError do
244         c.update_attributes replication_confirmed_at: Time.now
245       end
246       # Cannot set both at once, either.
247       assert_raise ArvadosModel::PermissionDeniedError do
248         c.update_attributes(replication_confirmed: 1,
249                             replication_confirmed_at: Time.now)
250       end
251     end
252   end
253
254   test "replication_confirmed* can be cleared (but only together) by non-admin user" do
255     act_as_user users(:active) do
256       c = collections(:replication_desired_2_confirmed_2)
257       # Cannot clear just one at a time.
258       assert_raise ArvadosModel::PermissionDeniedError do
259         c.update_attributes replication_confirmed: nil
260       end
261       c.reload
262       assert_raise ArvadosModel::PermissionDeniedError do
263         c.update_attributes replication_confirmed_at: nil
264       end
265       # Can clear both at once.
266       c.reload
267       assert c.update_attributes(replication_confirmed: nil,
268                                  replication_confirmed_at: nil)
269     end
270   end
271
272   test "clear replication_confirmed* when introducing a new block in manifest" do
273     c = collections(:replication_desired_2_confirmed_2)
274     act_as_user users(:active) do
275       assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
276       assert_nil c.replication_confirmed
277       assert_nil c.replication_confirmed_at
278     end
279   end
280
281   test "don't clear replication_confirmed* when just renaming a file" do
282     c = collections(:replication_desired_2_confirmed_2)
283     act_as_user users(:active) do
284       new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
285       assert c.update_attributes(manifest_text: new_manifest)
286       assert_equal 2, c.replication_confirmed
287       assert_not_nil c.replication_confirmed_at
288     end
289   end
290
291   test "don't clear replication_confirmed* when just deleting a data block" do
292     c = collections(:replication_desired_2_confirmed_2)
293     act_as_user users(:active) do
294       new_manifest = c.signed_manifest_text
295       new_manifest.sub!(/ \S+:bar/, '')
296       new_manifest.sub!(/ acbd\S+/, '')
297
298       # Confirm that we did just remove a block from the manifest (if
299       # not, this test would pass without testing the relevant case):
300       assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
301
302       assert c.update_attributes(manifest_text: new_manifest)
303       assert_equal 2, c.replication_confirmed
304       assert_not_nil c.replication_confirmed_at
305     end
306   end
307
308   test "create collection with properties" do
309     act_as_system_user do
310       c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",
311                             properties: {'property_1' => 'value_1'})
312       assert c.valid?
313       assert_equal 'value_1', c.properties['property_1']
314     end
315   end
316
317   test 'create, delete, recreate collection with same name and owner' do
318     act_as_user users(:active) do
319       # create collection with name
320       c = Collection.create(manifest_text: '',
321                             name: "test collection name")
322       assert c.valid?
323       uuid = c.uuid
324
325       # mark collection as expired
326       c.update_attribute 'expires_at', Time.new.strftime("%Y-%m-%d")
327       c = Collection.where(uuid: uuid)
328       assert_empty c, 'Should not be able to find expired collection'
329
330       # recreate collection with the same name
331       c = Collection.create(manifest_text: '',
332                             name: "test collection name")
333       assert c.valid?
334     end
335   end
336
337   test "find_all_for_docker_image resolves names that look like hashes" do
338     coll_list = Collection.
339       find_all_for_docker_image('a' * 64, nil, [users(:active)])
340     coll_uuids = coll_list.map(&:uuid)
341     assert_includes(coll_uuids, collections(:docker_image).uuid)
342   end
343 end