13111: Merge branch 'master' into 12308-go-fuse
[arvados.git] / services / api / test / unit / collection_test.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require 'test_helper'
6 require 'sweep_trashed_collections'
7
8 class CollectionTest < ActiveSupport::TestCase
9   include DbCurrentTime
10
11   def create_collection name, enc=nil
12     txt = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:#{name}.txt\n"
13     txt.force_encoding(enc) if enc
14     return Collection.create(manifest_text: txt)
15   end
16
17   test 'accept ASCII manifest_text' do
18     act_as_system_user do
19       c = create_collection 'foo', Encoding::US_ASCII
20       assert c.valid?
21     end
22   end
23
24   test 'accept UTF-8 manifest_text' do
25     act_as_system_user do
26       c = create_collection "f\xc3\x98\xc3\x98", Encoding::UTF_8
27       assert c.valid?
28     end
29   end
30
31   test 'refuse manifest_text with invalid UTF-8 byte sequence' do
32     act_as_system_user do
33       c = create_collection "f\xc8o", Encoding::UTF_8
34       assert !c.valid?
35       assert_equal [:manifest_text], c.errors.messages.keys
36       assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
37     end
38   end
39
40   test 'refuse manifest_text with non-UTF-8 encoding' do
41     act_as_system_user do
42       c = create_collection "f\xc8o", Encoding::ASCII_8BIT
43       assert !c.valid?
44       assert_equal [:manifest_text], c.errors.messages.keys
45       assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
46     end
47   end
48
49   [
50     ". 0:0:foo.txt",
51     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
52     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
53     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
54   ].each do |manifest_text|
55     test "create collection with invalid manifest text #{manifest_text} and expect error" do
56       act_as_system_user do
57         c = Collection.create(manifest_text: manifest_text)
58         assert !c.valid?
59       end
60     end
61   end
62
63   [
64     nil,
65     "",
66     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
67   ].each do |manifest_text|
68     test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
69       act_as_system_user do
70         c = Collection.create(manifest_text: manifest_text)
71         assert c.valid?
72       end
73     end
74   end
75
76   [
77     ". 0:0:foo.txt",
78     ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
79     "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
80     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
81   ].each do |manifest_text|
82     test "update collection with invalid manifest text #{manifest_text} and expect error" do
83       act_as_system_user do
84         c = create_collection 'foo', Encoding::US_ASCII
85         assert c.valid?
86
87         c.update_attribute 'manifest_text', manifest_text
88         assert !c.valid?
89       end
90     end
91   end
92
93   [
94     nil,
95     "",
96     ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
97   ].each do |manifest_text|
98     test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
99       act_as_system_user do
100         c = create_collection 'foo', Encoding::US_ASCII
101         assert c.valid?
102
103         c.update_attribute 'manifest_text', manifest_text
104         assert c.valid?
105       end
106     end
107   end
108
109   test 'create and update collection and verify file_names' do
110     act_as_system_user do
111       c = create_collection 'foo', Encoding::US_ASCII
112       assert c.valid?
113       created_file_names = c.file_names
114       assert created_file_names
115       assert_match(/foo.txt/, c.file_names)
116
117       c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
118       assert_not_equal created_file_names, c.file_names
119       assert_match(/foo2.txt/, c.file_names)
120     end
121   end
122
123   [
124     [2**8, false],
125     [2**18, true],
126   ].each do |manifest_size, allow_truncate|
127     test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
128           and not expect exceptions even on very large manifest texts" do
129       # file_names has a max size, hence there will be no errors even on large manifests
130       act_as_system_user do
131         manifest_text = ''
132         index = 0
133         while manifest_text.length < manifest_size
134           manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
135           index += 1
136         end
137         manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
138         c = Collection.create(manifest_text: manifest_text)
139
140         assert c.valid?
141         assert c.file_names
142         assert_match(/veryverylongfilename0000000000001.txt/, c.file_names)
143         assert_match(/veryverylongfilename0000000000002.txt/, c.file_names)
144         if not allow_truncate
145           assert_match(/veryverylastfilename/, c.file_names)
146           assert_match(/laststreamname/, c.file_names)
147         end
148       end
149     end
150   end
151
152   test "full text search for collections" do
153     # file_names column does not get populated when fixtures are loaded, hence setup test data
154     act_as_system_user do
155       Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
156       Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
157       Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
158     end
159
160     [
161       ['foo', true],
162       ['foo bar', false],                     # no collection matching both
163       ['foo&bar', false],                     # no collection matching both
164       ['foo|bar', true],                      # works only no spaces between the words
165       ['Gnu public', true],                   # both prefixes found, though not consecutively
166       ['Gnu&public', true],                   # both prefixes found, though not consecutively
167       ['file4', true],                        # prefix match
168       ['file4.txt', true],                    # whole string match
169       ['filex', false],                       # no such prefix
170       ['subdir', true],                       # prefix matches
171       ['subdir2', true],
172       ['subdir2/', true],
173       ['subdir2/subdir3', true],
174       ['subdir2/subdir3/subdir4', true],
175       ['subdir2 file4', true],                # look for both prefixes
176       ['subdir4', false],                     # not a prefix match
177     ].each do |search_filter, expect_results|
178       search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
179       results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
180                                  "#{search_filters}")
181       if expect_results
182         refute_empty results
183       else
184         assert_empty results
185       end
186     end
187   end
188
189   test 'portable data hash with missing size hints' do
190     [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
191       ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
192      [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
193       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
194      [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
195       ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
196     ].each do |unportable, portable|
197       c = Collection.new(manifest_text: unportable)
198       assert c.valid?
199       assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
200                    c.portable_data_hash)
201     end
202   end
203
204   pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
205   pdhmd5 = Digest::MD5.hexdigest pdhmanifest
206   [[true, nil],
207    [true, pdhmd5],
208    [true, pdhmd5+'+12345'],
209    [true, pdhmd5+'+'+pdhmanifest.length.to_s],
210    [true, pdhmd5+'+12345+Foo'],
211    [true, pdhmd5+'+Foo'],
212    [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
213    [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
214    [false, pdhmd5[0..30]],
215    [false, pdhmd5[0..30]+'z'],
216    [false, pdhmd5[0..24]+'000000000'],
217    [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
218     test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
219       c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
220       assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
221     end
222   end
223
224   test "storage_classes_desired cannot be empty" do
225     act_as_user users(:active) do
226       c = collections(:collection_owned_by_active)
227       c.update_attributes storage_classes_desired: ["hot"]
228       assert_equal ["hot"], c.storage_classes_desired
229       assert_raise ArvadosModel::InvalidStateTransitionError do
230         c.update_attributes storage_classes_desired: []
231       end
232     end
233   end
234
235   test "storage classes lists should only contain non-empty strings" do
236     c = collections(:storage_classes_desired_default_unconfirmed)
237     act_as_user users(:admin) do
238       assert c.update_attributes(storage_classes_desired: ["default", "a_string"],
239                                  storage_classes_confirmed: ["another_string"])
240       [
241         ["storage_classes_desired", ["default", 42]],
242         ["storage_classes_confirmed", [{the_answer: 42}]],
243         ["storage_classes_desired", ["default", ""]],
244         ["storage_classes_confirmed", [""]],
245       ].each do |attr, val|
246         assert_raise ArvadosModel::InvalidStateTransitionError do
247           assert c.update_attributes({attr => val})
248         end
249       end
250     end
251   end
252
253   test "storage_classes_confirmed* can be set by admin user" do
254     c = collections(:storage_classes_desired_default_unconfirmed)
255     act_as_user users(:admin) do
256       assert c.update_attributes(storage_classes_confirmed: ["default"],
257                                  storage_classes_confirmed_at: Time.now)
258     end
259   end
260
261   test "storage_classes_confirmed* cannot be set by non-admin user" do
262     act_as_user users(:active) do
263       c = collections(:storage_classes_desired_default_unconfirmed)
264       # Cannot set just one at a time.
265       assert_raise ArvadosModel::PermissionDeniedError do
266         c.update_attributes storage_classes_confirmed: ["default"]
267       end
268       c.reload
269       assert_raise ArvadosModel::PermissionDeniedError do
270         c.update_attributes storage_classes_confirmed_at: Time.now
271       end
272       # Cannot set bot at once, either.
273       c.reload
274       assert_raise ArvadosModel::PermissionDeniedError do
275         assert c.update_attributes(storage_classes_confirmed: ["default"],
276                                    storage_classes_confirmed_at: Time.now)
277       end
278     end
279   end
280
281   test "storage_classes_confirmed* can be cleared (but only together) by non-admin user" do
282     act_as_user users(:active) do
283       c = collections(:storage_classes_desired_default_confirmed_default)
284       # Cannot clear just one at a time.
285       assert_raise ArvadosModel::PermissionDeniedError do
286         c.update_attributes storage_classes_confirmed: []
287       end
288       c.reload
289       assert_raise ArvadosModel::PermissionDeniedError do
290         c.update_attributes storage_classes_confirmed_at: nil
291       end
292       # Can clear both at once.
293       c.reload
294       assert c.update_attributes(storage_classes_confirmed: [],
295                                  storage_classes_confirmed_at: nil)
296     end
297   end
298
299   [0, 2, 4, nil].each do |ask|
300     test "set replication_desired to #{ask.inspect}" do
301       Rails.configuration.default_collection_replication = 2
302       act_as_user users(:active) do
303         c = collections(:replication_undesired_unconfirmed)
304         c.update_attributes replication_desired: ask
305         assert_equal ask, c.replication_desired
306       end
307     end
308   end
309
310   test "replication_confirmed* can be set by admin user" do
311     c = collections(:replication_desired_2_unconfirmed)
312     act_as_user users(:admin) do
313       assert c.update_attributes(replication_confirmed: 2,
314                                  replication_confirmed_at: Time.now)
315     end
316   end
317
318   test "replication_confirmed* cannot be set by non-admin user" do
319     act_as_user users(:active) do
320       c = collections(:replication_desired_2_unconfirmed)
321       # Cannot set just one at a time.
322       assert_raise ArvadosModel::PermissionDeniedError do
323         c.update_attributes replication_confirmed: 1
324       end
325       assert_raise ArvadosModel::PermissionDeniedError do
326         c.update_attributes replication_confirmed_at: Time.now
327       end
328       # Cannot set both at once, either.
329       assert_raise ArvadosModel::PermissionDeniedError do
330         c.update_attributes(replication_confirmed: 1,
331                             replication_confirmed_at: Time.now)
332       end
333     end
334   end
335
336   test "replication_confirmed* can be cleared (but only together) by non-admin user" do
337     act_as_user users(:active) do
338       c = collections(:replication_desired_2_confirmed_2)
339       # Cannot clear just one at a time.
340       assert_raise ArvadosModel::PermissionDeniedError do
341         c.update_attributes replication_confirmed: nil
342       end
343       c.reload
344       assert_raise ArvadosModel::PermissionDeniedError do
345         c.update_attributes replication_confirmed_at: nil
346       end
347       # Can clear both at once.
348       c.reload
349       assert c.update_attributes(replication_confirmed: nil,
350                                  replication_confirmed_at: nil)
351     end
352   end
353
354   test "clear replication_confirmed* when introducing a new block in manifest" do
355     c = collections(:replication_desired_2_confirmed_2)
356     act_as_user users(:active) do
357       assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
358       assert_nil c.replication_confirmed
359       assert_nil c.replication_confirmed_at
360     end
361   end
362
363   test "don't clear replication_confirmed* when just renaming a file" do
364     c = collections(:replication_desired_2_confirmed_2)
365     act_as_user users(:active) do
366       new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
367       assert c.update_attributes(manifest_text: new_manifest)
368       assert_equal 2, c.replication_confirmed
369       assert_not_nil c.replication_confirmed_at
370     end
371   end
372
373   test "don't clear replication_confirmed* when just deleting a data block" do
374     c = collections(:replication_desired_2_confirmed_2)
375     act_as_user users(:active) do
376       new_manifest = c.signed_manifest_text
377       new_manifest.sub!(/ \S+:bar/, '')
378       new_manifest.sub!(/ acbd\S+/, '')
379
380       # Confirm that we did just remove a block from the manifest (if
381       # not, this test would pass without testing the relevant case):
382       assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
383
384       assert c.update_attributes(manifest_text: new_manifest)
385       assert_equal 2, c.replication_confirmed
386       assert_not_nil c.replication_confirmed_at
387     end
388   end
389
390   test 'signature expiry does not exceed trash_at' do
391     act_as_user users(:active) do
392       t0 = db_current_time
393       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
394       c.update_attributes! trash_at: (t0 + 1.hours)
395       c.reload
396       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
397       assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
398     end
399   end
400
401   test 'far-future expiry date cannot be used to circumvent configured permission ttl' do
402     act_as_user users(:active) do
403       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
404                              name: 'foo',
405                              trash_at: db_current_time + 1.years)
406       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
407       expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
408       assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
409       assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
410     end
411   end
412
413   test "create collection with properties" do
414     act_as_system_user do
415       c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",
416                             properties: {'property_1' => 'value_1'})
417       assert c.valid?
418       assert_equal 'value_1', c.properties['property_1']
419     end
420   end
421
422   test 'create, delete, recreate collection with same name and owner' do
423     act_as_user users(:active) do
424       # create collection with name
425       c = Collection.create(manifest_text: '',
426                             name: "test collection name")
427       assert c.valid?
428       uuid = c.uuid
429
430       c = Collection.readable_by(current_user).where(uuid: uuid)
431       assert_not_empty c, 'Should be able to find live collection'
432
433       # mark collection as expired
434       c.first.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
435       c = Collection.readable_by(current_user).where(uuid: uuid)
436       assert_empty c, 'Should not be able to find expired collection'
437
438       # recreate collection with the same name
439       c = Collection.create(manifest_text: '',
440                             name: "test collection name")
441       assert c.valid?
442     end
443   end
444
445   test 'trash_at cannot be set too far in the past' do
446     act_as_user users(:active) do
447       t0 = db_current_time
448       c = Collection.create!(manifest_text: '', name: 'foo')
449       c.update_attributes! trash_at: (t0 - 2.weeks)
450       c.reload
451       assert_operator c.trash_at, :>, t0
452     end
453   end
454
455   now = Time.now
456   [['trash-to-delete interval negative',
457     :collection_owned_by_active,
458     {trash_at: now+2.weeks, delete_at: now},
459     {state: :invalid}],
460    ['now-to-delete interval short',
461     :collection_owned_by_active,
462     {trash_at: now+3.days, delete_at: now+7.days},
463     {state: :trash_future}],
464    ['now-to-delete interval short, trash=delete',
465     :collection_owned_by_active,
466     {trash_at: now+3.days, delete_at: now+3.days},
467     {state: :trash_future}],
468    ['trash-to-delete interval ok',
469     :collection_owned_by_active,
470     {trash_at: now, delete_at: now+15.days},
471     {state: :trash_now}],
472    ['trash-to-delete interval short, but far enough in future',
473     :collection_owned_by_active,
474     {trash_at: now+13.days, delete_at: now+15.days},
475     {state: :trash_future}],
476    ['trash by setting is_trashed bool',
477     :collection_owned_by_active,
478     {is_trashed: true},
479     {state: :trash_now}],
480    ['trash in future by setting just trash_at',
481     :collection_owned_by_active,
482     {trash_at: now+1.week},
483     {state: :trash_future}],
484    ['trash in future by setting trash_at and delete_at',
485     :collection_owned_by_active,
486     {trash_at: now+1.week, delete_at: now+4.weeks},
487     {state: :trash_future}],
488    ['untrash by clearing is_trashed bool',
489     :expired_collection,
490     {is_trashed: false},
491     {state: :not_trash}],
492   ].each do |test_name, fixture_name, updates, expect|
493     test test_name do
494       act_as_user users(:active) do
495         min_exp = (db_current_time +
496                    Rails.configuration.blob_signature_ttl.seconds)
497         if fixture_name == :expired_collection
498           # Fixture-finder shorthand doesn't find trashed collections
499           # because they're not in the default scope.
500           c = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
501         else
502           c = collections(fixture_name)
503         end
504         updates_ok = c.update_attributes(updates)
505         expect_valid = expect[:state] != :invalid
506         assert_equal expect_valid, updates_ok, c.errors.full_messages.to_s
507         case expect[:state]
508         when :invalid
509           refute c.valid?
510         when :trash_now
511           assert c.is_trashed
512           assert_not_nil c.trash_at
513           assert_operator c.trash_at, :<=, db_current_time
514           assert_not_nil c.delete_at
515           assert_operator c.delete_at, :>=, min_exp
516         when :trash_future
517           refute c.is_trashed
518           assert_not_nil c.trash_at
519           assert_operator c.trash_at, :>, db_current_time
520           assert_not_nil c.delete_at
521           assert_operator c.delete_at, :>=, c.trash_at
522           # Currently this minimum interval is needed to prevent early
523           # garbage collection:
524           assert_operator c.delete_at, :>=, min_exp
525         when :not_trash
526           refute c.is_trashed
527           assert_nil c.trash_at
528           assert_nil c.delete_at
529         else
530           raise "bad expect[:state]==#{expect[:state].inspect} in test case"
531         end
532       end
533     end
534   end
535
536   test 'default trash interval > blob signature ttl' do
537     Rails.configuration.default_trash_lifetime = 86400 * 21 # 3 weeks
538     start = db_current_time
539     act_as_user users(:active) do
540       c = Collection.create!(manifest_text: '', name: 'foo')
541       c.update_attributes!(trash_at: start + 86400.seconds)
542       assert_operator c.delete_at, :>=, start + (86400*22).seconds
543       assert_operator c.delete_at, :<, start + (86400*22 + 30).seconds
544       c.destroy
545
546       c = Collection.create!(manifest_text: '', name: 'foo')
547       c.update_attributes!(is_trashed: true)
548       assert_operator c.delete_at, :>=, start + (86400*21).seconds
549     end
550   end
551
552   test "find_all_for_docker_image resolves names that look like hashes" do
553     coll_list = Collection.
554       find_all_for_docker_image('a' * 64, nil, [users(:active)])
555     coll_uuids = coll_list.map(&:uuid)
556     assert_includes(coll_uuids, collections(:docker_image).uuid)
557   end
558
559   test "move to trash in SweepTrashedCollections" do
560     c = collections(:trashed_on_next_sweep)
561     refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
562     assert_raises(ActiveRecord::RecordNotUnique) do
563       act_as_user users(:active) do
564         Collection.create!(owner_uuid: c.owner_uuid,
565                            name: c.name)
566       end
567     end
568     SweepTrashedCollections.sweep_now
569     c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
570     assert c
571     act_as_user users(:active) do
572       assert Collection.create!(owner_uuid: c.owner_uuid,
573                                 name: c.name)
574     end
575   end
576
577   test "delete in SweepTrashedCollections" do
578     uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
579     assert_not_empty Collection.where(uuid: uuid)
580     SweepTrashedCollections.sweep_now
581     assert_empty Collection.where(uuid: uuid)
582   end
583
584   test "delete referring links in SweepTrashedCollections" do
585     uuid = collections(:trashed_on_next_sweep).uuid
586     act_as_system_user do
587       Link.create!(head_uuid: uuid,
588                    tail_uuid: system_user_uuid,
589                    link_class: 'whatever',
590                    name: 'something')
591     end
592     past = db_current_time
593     Collection.where(uuid: uuid).
594       update_all(is_trashed: true, trash_at: past, delete_at: past)
595     assert_not_empty Collection.where(uuid: uuid)
596     SweepTrashedCollections.sweep_now
597     assert_empty Collection.where(uuid: uuid)
598   end
599 end