Merge branch 'crunch-job_finds_newer_docker_hashes' of https://github.com/tmooney...
[arvados.git] / services / api / test / unit / collection_test.rb
1 require 'test_helper'
2
3 class CollectionTest < ActiveSupport::TestCase
4   include DbCurrentTime
5
6   def create_collection name, enc=nil
7     txt = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:#{name}.txt\n"
8     txt.force_encoding(enc) if enc
9     return Collection.create(manifest_text: txt)
10   end
11
12   test 'accept ASCII manifest_text' do
13     act_as_system_user do
14       c = create_collection 'foo', Encoding::US_ASCII
15       assert c.valid?
16     end
17   end
18
19   test 'accept UTF-8 manifest_text' do
20     act_as_system_user do
21       c = create_collection "f\xc3\x98\xc3\x98", Encoding::UTF_8
22       assert c.valid?
23     end
24   end
25
26   test 'refuse manifest_text with invalid UTF-8 byte sequence' do
27     act_as_system_user do
28       c = create_collection "f\xc8o", Encoding::UTF_8
29       assert !c.valid?
30       assert_equal [:manifest_text], c.errors.messages.keys
31       assert_match /UTF-8/, c.errors.messages[:manifest_text].first
32     end
33   end
34
35   test 'refuse manifest_text with non-UTF-8 encoding' do
36     act_as_system_user do
37       c = create_collection "f\xc8o", Encoding::ASCII_8BIT
38       assert !c.valid?
39       assert_equal [:manifest_text], c.errors.messages.keys
40       assert_match /UTF-8/, c.errors.messages[:manifest_text].first
41     end
42   end
43
44   [
45     ". 0:0:foo.txt",
46     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
47     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
48     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
49   ].each do |manifest_text|
50     test "create collection with invalid manifest text #{manifest_text} and expect error" do
51       act_as_system_user do
52         c = Collection.create(manifest_text: manifest_text)
53         assert !c.valid?
54       end
55     end
56   end
57
58   [
59     nil,
60     "",
61     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
62   ].each do |manifest_text|
63     test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
64       act_as_system_user do
65         c = Collection.create(manifest_text: manifest_text)
66         assert c.valid?
67       end
68     end
69   end
70
71   [
72     ". 0:0:foo.txt",
73     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
74     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
75     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
76   ].each do |manifest_text|
77     test "update collection with invalid manifest text #{manifest_text} and expect error" do
78       act_as_system_user do
79         c = create_collection 'foo', Encoding::US_ASCII
80         assert c.valid?
81
82         c.update_attribute 'manifest_text', manifest_text
83         assert !c.valid?
84       end
85     end
86   end
87
88   [
89     nil,
90     "",
91     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
92   ].each do |manifest_text|
93     test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
94       act_as_system_user do
95         c = create_collection 'foo', Encoding::US_ASCII
96         assert c.valid?
97
98         c.update_attribute 'manifest_text', manifest_text
99         assert c.valid?
100       end
101     end
102   end
103
104   test 'create and update collection and verify file_names' do
105     act_as_system_user do
106       c = create_collection 'foo', Encoding::US_ASCII
107       assert c.valid?
108       created_file_names = c.file_names
109       assert created_file_names
110       assert_match /foo.txt/, c.file_names
111
112       c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
113       assert_not_equal created_file_names, c.file_names
114       assert_match /foo2.txt/, c.file_names
115     end
116   end
117
118   [
119     [2**8, false],
120     [2**18, true],
121   ].each do |manifest_size, allow_truncate|
122     test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
123           and not expect exceptions even on very large manifest texts" do
124       # file_names has a max size, hence there will be no errors even on large manifests
125       act_as_system_user do
126         manifest_text = ''
127         index = 0
128         while manifest_text.length < manifest_size
129           manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
130           index += 1
131         end
132         manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
133         c = Collection.create(manifest_text: manifest_text)
134
135         assert c.valid?
136         assert c.file_names
137         assert_match /veryverylongfilename0000000000001.txt/, c.file_names
138         assert_match /veryverylongfilename0000000000002.txt/, c.file_names
139         if not allow_truncate
140           assert_match /veryverylastfilename/, c.file_names
141           assert_match /laststreamname/, c.file_names
142         end
143       end
144     end
145   end
146
147   test "full text search for collections" do
148     # file_names column does not get populated when fixtures are loaded, hence setup test data
149     act_as_system_user do
150       Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
151       Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
152       Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
153     end
154
155     [
156       ['foo', true],
157       ['foo bar', false],                     # no collection matching both
158       ['foo&bar', false],                     # no collection matching both
159       ['foo|bar', true],                      # works only no spaces between the words
160       ['Gnu public', true],                   # both prefixes found, though not consecutively
161       ['Gnu&public', true],                   # both prefixes found, though not consecutively
162       ['file4', true],                        # prefix match
163       ['file4.txt', true],                    # whole string match
164       ['filex', false],                       # no such prefix
165       ['subdir', true],                       # prefix matches
166       ['subdir2', true],
167       ['subdir2/', true],
168       ['subdir2/subdir3', true],
169       ['subdir2/subdir3/subdir4', true],
170       ['subdir2 file4', true],                # look for both prefixes
171       ['subdir4', false],                     # not a prefix match
172     ].each do |search_filter, expect_results|
173       search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
174       results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
175                                  "#{search_filters}")
176       if expect_results
177         refute_empty results
178       else
179         assert_empty results
180       end
181     end
182   end
183
184   test 'portable data hash with missing size hints' do
185     [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
186       ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
187      [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
188       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
189      [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
190       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
191     ].each do |unportable, portable|
192       c = Collection.new(manifest_text: unportable)
193       assert c.valid?
194       assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
195                    c.portable_data_hash)
196     end
197   end
198
199   pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
200   pdhmd5 = Digest::MD5.hexdigest pdhmanifest
201   [[true, nil],
202    [true, pdhmd5],
203    [true, pdhmd5+'+12345'],
204    [true, pdhmd5+'+'+pdhmanifest.length.to_s],
205    [true, pdhmd5+'+12345+Foo'],
206    [true, pdhmd5+'+Foo'],
207    [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
208    [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
209    [false, pdhmd5[0..30]],
210    [false, pdhmd5[0..30]+'z'],
211    [false, pdhmd5[0..24]+'000000000'],
212    [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
213     test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
214       c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
215       assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
216     end
217   end
218
219   [0, 2, 4, nil].each do |ask|
220     test "set replication_desired to #{ask.inspect}" do
221       Rails.configuration.default_collection_replication = 2
222       act_as_user users(:active) do
223         c = collections(:replication_undesired_unconfirmed)
224         c.update_attributes replication_desired: ask
225         assert_equal ask, c.replication_desired
226       end
227     end
228   end
229
230   test "replication_confirmed* can be set by admin user" do
231     c = collections(:replication_desired_2_unconfirmed)
232     act_as_user users(:admin) do
233       assert c.update_attributes(replication_confirmed: 2,
234                                  replication_confirmed_at: Time.now)
235     end
236   end
237
238   test "replication_confirmed* cannot be set by non-admin user" do
239     act_as_user users(:active) do
240       c = collections(:replication_desired_2_unconfirmed)
241       # Cannot set just one at a time.
242       assert_raise ArvadosModel::PermissionDeniedError do
243         c.update_attributes replication_confirmed: 1
244       end
245       assert_raise ArvadosModel::PermissionDeniedError do
246         c.update_attributes replication_confirmed_at: Time.now
247       end
248       # Cannot set both at once, either.
249       assert_raise ArvadosModel::PermissionDeniedError do
250         c.update_attributes(replication_confirmed: 1,
251                             replication_confirmed_at: Time.now)
252       end
253     end
254   end
255
256   test "replication_confirmed* can be cleared (but only together) by non-admin user" do
257     act_as_user users(:active) do
258       c = collections(:replication_desired_2_confirmed_2)
259       # Cannot clear just one at a time.
260       assert_raise ArvadosModel::PermissionDeniedError do
261         c.update_attributes replication_confirmed: nil
262       end
263       c.reload
264       assert_raise ArvadosModel::PermissionDeniedError do
265         c.update_attributes replication_confirmed_at: nil
266       end
267       # Can clear both at once.
268       c.reload
269       assert c.update_attributes(replication_confirmed: nil,
270                                  replication_confirmed_at: nil)
271     end
272   end
273
274   test "clear replication_confirmed* when introducing a new block in manifest" do
275     c = collections(:replication_desired_2_confirmed_2)
276     act_as_user users(:active) do
277       assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
278       assert_nil c.replication_confirmed
279       assert_nil c.replication_confirmed_at
280     end
281   end
282
283   test "don't clear replication_confirmed* when just renaming a file" do
284     c = collections(:replication_desired_2_confirmed_2)
285     act_as_user users(:active) do
286       new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
287       assert c.update_attributes(manifest_text: new_manifest)
288       assert_equal 2, c.replication_confirmed
289       assert_not_nil c.replication_confirmed_at
290     end
291   end
292
293   test "don't clear replication_confirmed* when just deleting a data block" do
294     c = collections(:replication_desired_2_confirmed_2)
295     act_as_user users(:active) do
296       new_manifest = c.signed_manifest_text
297       new_manifest.sub!(/ \S+:bar/, '')
298       new_manifest.sub!(/ acbd\S+/, '')
299
300       # Confirm that we did just remove a block from the manifest (if
301       # not, this test would pass without testing the relevant case):
302       assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
303
304       assert c.update_attributes(manifest_text: new_manifest)
305       assert_equal 2, c.replication_confirmed
306       assert_not_nil c.replication_confirmed_at
307     end
308   end
309
310   test 'signature expiry does not exceed expires_at' do
311     act_as_user users(:active) do
312       t0 = db_current_time
313       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
314       c.update_attributes! expires_at: (t0 + 1.hours)
315       c.reload
316       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
317       assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
318     end
319   end
320
321   test 'far-future expiry date cannot be used to circumvent configured permission ttl' do
322     act_as_user users(:active) do
323       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
324                              name: 'foo',
325                              expires_at: db_current_time + 1.years)
326       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
327       expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
328       assert_operator c.expires_at.to_i, :>, expect_max_sig_exp
329       assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
330     end
331   end
332
333   test "create collection with properties" do
334     act_as_system_user do
335       c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",
336                             properties: {'property_1' => 'value_1'})
337       assert c.valid?
338       assert_equal 'value_1', c.properties['property_1']
339     end
340   end
341
342   test 'create, delete, recreate collection with same name and owner' do
343     act_as_user users(:active) do
344       # create collection with name
345       c = Collection.create(manifest_text: '',
346                             name: "test collection name")
347       assert c.valid?
348       uuid = c.uuid
349
350       # mark collection as expired
351       c.update_attribute 'expires_at', Time.new.strftime("%Y-%m-%d")
352       c = Collection.where(uuid: uuid)
353       assert_empty c, 'Should not be able to find expired collection'
354
355       # recreate collection with the same name
356       c = Collection.create(manifest_text: '',
357                             name: "test collection name")
358       assert c.valid?
359     end
360   end
361
362   test "find_all_for_docker_image resolves names that look like hashes" do
363     coll_list = Collection.
364       find_all_for_docker_image('a' * 64, nil, [users(:active)])
365     coll_uuids = coll_list.map(&:uuid)
366     assert_includes(coll_uuids, collections(:docker_image).uuid)
367   end
368
369   test 'expires_at cannot be set too far in the past' do
370     act_as_user users(:active) do
371       t0 = db_current_time
372       c = Collection.create!(manifest_text: '', name: 'foo')
373       c.update_attributes! expires_at: (t0 - 2.weeks)
374       c.reload
375       assert_operator c.expires_at, :>, t0
376     end
377   end
378 end