Merge branch '11100-cr-output-ttl'
[arvados.git] / services / api / test / unit / collection_test.rb
1 require 'test_helper'
2 require 'sweep_trashed_collections'
3
4 class CollectionTest < ActiveSupport::TestCase
5   include DbCurrentTime
6
7   def create_collection name, enc=nil
8     txt = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:#{name}.txt\n"
9     txt.force_encoding(enc) if enc
10     return Collection.create(manifest_text: txt)
11   end
12
13   test 'accept ASCII manifest_text' do
14     act_as_system_user do
15       c = create_collection 'foo', Encoding::US_ASCII
16       assert c.valid?
17     end
18   end
19
20   test 'accept UTF-8 manifest_text' do
21     act_as_system_user do
22       c = create_collection "f\xc3\x98\xc3\x98", Encoding::UTF_8
23       assert c.valid?
24     end
25   end
26
27   test 'refuse manifest_text with invalid UTF-8 byte sequence' do
28     act_as_system_user do
29       c = create_collection "f\xc8o", Encoding::UTF_8
30       assert !c.valid?
31       assert_equal [:manifest_text], c.errors.messages.keys
32       assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
33     end
34   end
35
36   test 'refuse manifest_text with non-UTF-8 encoding' do
37     act_as_system_user do
38       c = create_collection "f\xc8o", Encoding::ASCII_8BIT
39       assert !c.valid?
40       assert_equal [:manifest_text], c.errors.messages.keys
41       assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
42     end
43   end
44
45   [
46     ". 0:0:foo.txt",
47     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
48     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
49     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
50   ].each do |manifest_text|
51     test "create collection with invalid manifest text #{manifest_text} and expect error" do
52       act_as_system_user do
53         c = Collection.create(manifest_text: manifest_text)
54         assert !c.valid?
55       end
56     end
57   end
58
59   [
60     nil,
61     "",
62     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
63   ].each do |manifest_text|
64     test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
65       act_as_system_user do
66         c = Collection.create(manifest_text: manifest_text)
67         assert c.valid?
68       end
69     end
70   end
71
72   [
73     ". 0:0:foo.txt",
74     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
75     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
76     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
77   ].each do |manifest_text|
78     test "update collection with invalid manifest text #{manifest_text} and expect error" do
79       act_as_system_user do
80         c = create_collection 'foo', Encoding::US_ASCII
81         assert c.valid?
82
83         c.update_attribute 'manifest_text', manifest_text
84         assert !c.valid?
85       end
86     end
87   end
88
89   [
90     nil,
91     "",
92     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
93   ].each do |manifest_text|
94     test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
95       act_as_system_user do
96         c = create_collection 'foo', Encoding::US_ASCII
97         assert c.valid?
98
99         c.update_attribute 'manifest_text', manifest_text
100         assert c.valid?
101       end
102     end
103   end
104
105   test 'create and update collection and verify file_names' do
106     act_as_system_user do
107       c = create_collection 'foo', Encoding::US_ASCII
108       assert c.valid?
109       created_file_names = c.file_names
110       assert created_file_names
111       assert_match(/foo.txt/, c.file_names)
112
113       c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
114       assert_not_equal created_file_names, c.file_names
115       assert_match(/foo2.txt/, c.file_names)
116     end
117   end
118
119   [
120     [2**8, false],
121     [2**18, true],
122   ].each do |manifest_size, allow_truncate|
123     test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
124           and not expect exceptions even on very large manifest texts" do
125       # file_names has a max size, hence there will be no errors even on large manifests
126       act_as_system_user do
127         manifest_text = ''
128         index = 0
129         while manifest_text.length < manifest_size
130           manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
131           index += 1
132         end
133         manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
134         c = Collection.create(manifest_text: manifest_text)
135
136         assert c.valid?
137         assert c.file_names
138         assert_match(/veryverylongfilename0000000000001.txt/, c.file_names)
139         assert_match(/veryverylongfilename0000000000002.txt/, c.file_names)
140         if not allow_truncate
141           assert_match(/veryverylastfilename/, c.file_names)
142           assert_match(/laststreamname/, c.file_names)
143         end
144       end
145     end
146   end
147
148   test "full text search for collections" do
149     # file_names column does not get populated when fixtures are loaded, hence setup test data
150     act_as_system_user do
151       Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
152       Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
153       Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
154     end
155
156     [
157       ['foo', true],
158       ['foo bar', false],                     # no collection matching both
159       ['foo&bar', false],                     # no collection matching both
160       ['foo|bar', true],                      # works only no spaces between the words
161       ['Gnu public', true],                   # both prefixes found, though not consecutively
162       ['Gnu&public', true],                   # both prefixes found, though not consecutively
163       ['file4', true],                        # prefix match
164       ['file4.txt', true],                    # whole string match
165       ['filex', false],                       # no such prefix
166       ['subdir', true],                       # prefix matches
167       ['subdir2', true],
168       ['subdir2/', true],
169       ['subdir2/subdir3', true],
170       ['subdir2/subdir3/subdir4', true],
171       ['subdir2 file4', true],                # look for both prefixes
172       ['subdir4', false],                     # not a prefix match
173     ].each do |search_filter, expect_results|
174       search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
175       results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
176                                  "#{search_filters}")
177       if expect_results
178         refute_empty results
179       else
180         assert_empty results
181       end
182     end
183   end
184
185   test 'portable data hash with missing size hints' do
186     [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
187       ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
188      [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
189       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
190      [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
191       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
192     ].each do |unportable, portable|
193       c = Collection.new(manifest_text: unportable)
194       assert c.valid?
195       assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
196                    c.portable_data_hash)
197     end
198   end
199
200   pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
201   pdhmd5 = Digest::MD5.hexdigest pdhmanifest
202   [[true, nil],
203    [true, pdhmd5],
204    [true, pdhmd5+'+12345'],
205    [true, pdhmd5+'+'+pdhmanifest.length.to_s],
206    [true, pdhmd5+'+12345+Foo'],
207    [true, pdhmd5+'+Foo'],
208    [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
209    [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
210    [false, pdhmd5[0..30]],
211    [false, pdhmd5[0..30]+'z'],
212    [false, pdhmd5[0..24]+'000000000'],
213    [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
214     test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
215       c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
216       assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
217     end
218   end
219
220   [0, 2, 4, nil].each do |ask|
221     test "set replication_desired to #{ask.inspect}" do
222       Rails.configuration.default_collection_replication = 2
223       act_as_user users(:active) do
224         c = collections(:replication_undesired_unconfirmed)
225         c.update_attributes replication_desired: ask
226         assert_equal ask, c.replication_desired
227       end
228     end
229   end
230
231   test "replication_confirmed* can be set by admin user" do
232     c = collections(:replication_desired_2_unconfirmed)
233     act_as_user users(:admin) do
234       assert c.update_attributes(replication_confirmed: 2,
235                                  replication_confirmed_at: Time.now)
236     end
237   end
238
239   test "replication_confirmed* cannot be set by non-admin user" do
240     act_as_user users(:active) do
241       c = collections(:replication_desired_2_unconfirmed)
242       # Cannot set just one at a time.
243       assert_raise ArvadosModel::PermissionDeniedError do
244         c.update_attributes replication_confirmed: 1
245       end
246       assert_raise ArvadosModel::PermissionDeniedError do
247         c.update_attributes replication_confirmed_at: Time.now
248       end
249       # Cannot set both at once, either.
250       assert_raise ArvadosModel::PermissionDeniedError do
251         c.update_attributes(replication_confirmed: 1,
252                             replication_confirmed_at: Time.now)
253       end
254     end
255   end
256
257   test "replication_confirmed* can be cleared (but only together) by non-admin user" do
258     act_as_user users(:active) do
259       c = collections(:replication_desired_2_confirmed_2)
260       # Cannot clear just one at a time.
261       assert_raise ArvadosModel::PermissionDeniedError do
262         c.update_attributes replication_confirmed: nil
263       end
264       c.reload
265       assert_raise ArvadosModel::PermissionDeniedError do
266         c.update_attributes replication_confirmed_at: nil
267       end
268       # Can clear both at once.
269       c.reload
270       assert c.update_attributes(replication_confirmed: nil,
271                                  replication_confirmed_at: nil)
272     end
273   end
274
275   test "clear replication_confirmed* when introducing a new block in manifest" do
276     c = collections(:replication_desired_2_confirmed_2)
277     act_as_user users(:active) do
278       assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
279       assert_nil c.replication_confirmed
280       assert_nil c.replication_confirmed_at
281     end
282   end
283
284   test "don't clear replication_confirmed* when just renaming a file" do
285     c = collections(:replication_desired_2_confirmed_2)
286     act_as_user users(:active) do
287       new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
288       assert c.update_attributes(manifest_text: new_manifest)
289       assert_equal 2, c.replication_confirmed
290       assert_not_nil c.replication_confirmed_at
291     end
292   end
293
294   test "don't clear replication_confirmed* when just deleting a data block" do
295     c = collections(:replication_desired_2_confirmed_2)
296     act_as_user users(:active) do
297       new_manifest = c.signed_manifest_text
298       new_manifest.sub!(/ \S+:bar/, '')
299       new_manifest.sub!(/ acbd\S+/, '')
300
301       # Confirm that we did just remove a block from the manifest (if
302       # not, this test would pass without testing the relevant case):
303       assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
304
305       assert c.update_attributes(manifest_text: new_manifest)
306       assert_equal 2, c.replication_confirmed
307       assert_not_nil c.replication_confirmed_at
308     end
309   end
310
311   test 'signature expiry does not exceed trash_at' do
312     act_as_user users(:active) do
313       t0 = db_current_time
314       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
315       c.update_attributes! trash_at: (t0 + 1.hours)
316       c.reload
317       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
318       assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
319     end
320   end
321
322   test 'far-future expiry date cannot be used to circumvent configured permission ttl' do
323     act_as_user users(:active) do
324       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
325                              name: 'foo',
326                              trash_at: db_current_time + 1.years)
327       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
328       expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
329       assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
330       assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
331     end
332   end
333
334   test "create collection with properties" do
335     act_as_system_user do
336       c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",
337                             properties: {'property_1' => 'value_1'})
338       assert c.valid?
339       assert_equal 'value_1', c.properties['property_1']
340     end
341   end
342
343   test 'create, delete, recreate collection with same name and owner' do
344     act_as_user users(:active) do
345       # create collection with name
346       c = Collection.create(manifest_text: '',
347                             name: "test collection name")
348       assert c.valid?
349       uuid = c.uuid
350
351       # mark collection as expired
352       c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
353       c = Collection.where(uuid: uuid)
354       assert_empty c, 'Should not be able to find expired collection'
355
356       # recreate collection with the same name
357       c = Collection.create(manifest_text: '',
358                             name: "test collection name")
359       assert c.valid?
360     end
361   end
362
363   test 'trash_at cannot be set too far in the past' do
364     act_as_user users(:active) do
365       t0 = db_current_time
366       c = Collection.create!(manifest_text: '', name: 'foo')
367       c.update_attributes! trash_at: (t0 - 2.weeks)
368       c.reload
369       assert_operator c.trash_at, :>, t0
370     end
371   end
372
373   now = Time.now
374   [['trash-to-delete interval negative',
375     :collection_owned_by_active,
376     {trash_at: now+2.weeks, delete_at: now},
377     {state: :invalid}],
378    ['now-to-delete interval short',
379     :collection_owned_by_active,
380     {trash_at: now+3.days, delete_at: now+7.days},
381     {state: :trash_future}],
382    ['now-to-delete interval short, trash=delete',
383     :collection_owned_by_active,
384     {trash_at: now+3.days, delete_at: now+3.days},
385     {state: :trash_future}],
386    ['trash-to-delete interval ok',
387     :collection_owned_by_active,
388     {trash_at: now, delete_at: now+15.days},
389     {state: :trash_now}],
390    ['trash-to-delete interval short, but far enough in future',
391     :collection_owned_by_active,
392     {trash_at: now+13.days, delete_at: now+15.days},
393     {state: :trash_future}],
394    ['trash by setting is_trashed bool',
395     :collection_owned_by_active,
396     {is_trashed: true},
397     {state: :trash_now}],
398    ['trash in future by setting just trash_at',
399     :collection_owned_by_active,
400     {trash_at: now+1.week},
401     {state: :trash_future}],
402    ['trash in future by setting trash_at and delete_at',
403     :collection_owned_by_active,
404     {trash_at: now+1.week, delete_at: now+4.weeks},
405     {state: :trash_future}],
406    ['untrash by clearing is_trashed bool',
407     :expired_collection,
408     {is_trashed: false},
409     {state: :not_trash}],
410   ].each do |test_name, fixture_name, updates, expect|
411     test test_name do
412       act_as_user users(:active) do
413         min_exp = (db_current_time +
414                    Rails.configuration.blob_signature_ttl.seconds)
415         if fixture_name == :expired_collection
416           # Fixture-finder shorthand doesn't find trashed collections
417           # because they're not in the default scope.
418           c = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
419         else
420           c = collections(fixture_name)
421         end
422         updates_ok = c.update_attributes(updates)
423         expect_valid = expect[:state] != :invalid
424         assert_equal expect_valid, updates_ok, c.errors.full_messages.to_s
425         case expect[:state]
426         when :invalid
427           refute c.valid?
428         when :trash_now
429           assert c.is_trashed
430           assert_not_nil c.trash_at
431           assert_operator c.trash_at, :<=, db_current_time
432           assert_not_nil c.delete_at
433           assert_operator c.delete_at, :>=, min_exp
434         when :trash_future
435           refute c.is_trashed
436           assert_not_nil c.trash_at
437           assert_operator c.trash_at, :>, db_current_time
438           assert_not_nil c.delete_at
439           assert_operator c.delete_at, :>=, c.trash_at
440           # Currently this minimum interval is needed to prevent early
441           # garbage collection:
442           assert_operator c.delete_at, :>=, min_exp
443         when :not_trash
444           refute c.is_trashed
445           assert_nil c.trash_at
446           assert_nil c.delete_at
447         else
448           raise "bad expect[:state]==#{expect[:state].inspect} in test case"
449         end
450       end
451     end
452   end
453
454   test 'default trash interval > blob signature ttl' do
455     Rails.configuration.default_trash_lifetime = 86400 * 21 # 3 weeks
456     start = db_current_time
457     act_as_user users(:active) do
458       c = Collection.create!(manifest_text: '', name: 'foo')
459       c.update_attributes!(trash_at: start + 86400.seconds)
460       assert_operator c.delete_at, :>=, start + (86400*22).seconds
461       assert_operator c.delete_at, :<, start + (86400*22 + 30).seconds
462       c.destroy
463
464       c = Collection.create!(manifest_text: '', name: 'foo')
465       c.update_attributes!(is_trashed: true)
466       assert_operator c.delete_at, :>=, start + (86400*21).seconds
467     end
468   end
469
470   test "find_all_for_docker_image resolves names that look like hashes" do
471     coll_list = Collection.
472       find_all_for_docker_image('a' * 64, nil, [users(:active)])
473     coll_uuids = coll_list.map(&:uuid)
474     assert_includes(coll_uuids, collections(:docker_image).uuid)
475   end
476
477   test "move to trash in SweepTrashedCollections" do
478     c = collections(:trashed_on_next_sweep)
479     refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
480     assert_raises(ActiveRecord::RecordNotUnique) do
481       act_as_user users(:active) do
482         Collection.create!(owner_uuid: c.owner_uuid,
483                            name: c.name)
484       end
485     end
486     SweepTrashedCollections.sweep_now
487     c = Collection.unscoped.where('uuid=? and is_trashed=true', c.uuid).first
488     assert c
489     act_as_user users(:active) do
490       assert Collection.create!(owner_uuid: c.owner_uuid,
491                                 name: c.name)
492     end
493   end
494
495   test "delete in SweepTrashedCollections" do
496     uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
497     assert_not_empty Collection.unscoped.where(uuid: uuid)
498     SweepTrashedCollections.sweep_now
499     assert_empty Collection.unscoped.where(uuid: uuid)
500   end
501
502   test "delete referring links in SweepTrashedCollections" do
503     uuid = collections(:trashed_on_next_sweep).uuid
504     act_as_system_user do
505       Link.create!(head_uuid: uuid,
506                    tail_uuid: system_user_uuid,
507                    link_class: 'whatever',
508                    name: 'something')
509     end
510     past = db_current_time
511     Collection.unscoped.where(uuid: uuid).
512       update_all(is_trashed: true, trash_at: past, delete_at: past)
513     assert_not_empty Collection.unscoped.where(uuid: uuid)
514     SweepTrashedCollections.sweep_now
515     assert_empty Collection.unscoped.where(uuid: uuid)
516   end
517 end