1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
6 require 'sweep_trashed_objects'
8 class CollectionTest < ActiveSupport::TestCase
11 def create_collection name, enc=nil
12 txt = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:#{name}.txt\n"
13 txt.force_encoding(enc) if enc
14 return Collection.create(manifest_text: txt, name: name)
17 test 'accept ASCII manifest_text' do
19 c = create_collection 'foo', Encoding::US_ASCII
24 test 'accept UTF-8 manifest_text' do
26 c = create_collection "f\xc3\x98\xc3\x98", Encoding::UTF_8
31 test 'refuse manifest_text with invalid UTF-8 byte sequence' do
33 c = create_collection "f\xc8o", Encoding::UTF_8
35 assert_equal [:manifest_text], c.errors.messages.keys
36 assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
40 test 'refuse manifest_text with non-UTF-8 encoding' do
42 c = create_collection "f\xc8o", Encoding::ASCII_8BIT
44 assert_equal [:manifest_text], c.errors.messages.keys
45 assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
51 ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
52 "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
53 ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
54 ].each do |manifest_text|
55 test "create collection with invalid manifest text #{manifest_text} and expect error" do
57 c = Collection.create(manifest_text: manifest_text)
66 ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
67 ].each do |manifest_text|
68 test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
70 c = Collection.create(manifest_text: manifest_text)
78 ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
79 "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
80 ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
81 ].each do |manifest_text|
82 test "update collection with invalid manifest text #{manifest_text} and expect error" do
84 c = create_collection 'foo', Encoding::US_ASCII
87 c.update_attribute 'manifest_text', manifest_text
96 ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
97 ].each do |manifest_text|
98 test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
100 c = create_collection 'foo', Encoding::US_ASCII
103 c.update_attribute 'manifest_text', manifest_text
110 [false, 'name', 'bar'],
111 [false, 'description', 'The quick brown fox jumps over the lazy dog'],
112 [false, 'properties', {'new_version' => true}],
113 [false, 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"],
114 [true, 'name', 'bar'],
115 [true, 'description', 'The quick brown fox jumps over the lazy dog'],
116 [true, 'properties', {'new_version' => true}],
117 [true, 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"],
118 ].each do |versioning, attr, val|
119 test "update collection #{attr} with versioning #{versioning ? '' : 'not '}enabled" do
120 Rails.configuration.collection_versioning = versioning
121 act_as_system_user do
122 # Create initial collection
123 c = create_collection 'foo', Encoding::US_ASCII
125 assert_equal 'foo', c.name
127 # Check current version attributes
128 assert_equal 1, c.version
129 assert_equal c.uuid, c.current_version_uuid
131 # Update attribute and check if version number should be incremented
132 old_value = c.attributes[attr]
133 c.update_attributes!({attr => val})
134 assert_equal versioning, c.version == 2
135 assert_equal val, c.attributes[attr]
138 # Search for the snapshot & previous value
139 assert_equal 2, Collection.where(current_version_uuid: c.uuid).count
140 s = Collection.where(current_version_uuid: c.uuid, version: 1).first
142 assert_equal old_value, s.attributes[attr]
144 # If versioning is disabled, only the current version should exist
145 assert_equal 1, Collection.where(current_version_uuid: c.uuid).count
146 assert_equal c, Collection.where(current_version_uuid: c.uuid).first
152 test 'with versioning enabled, simultaneous updates increment version correctly' do
153 Rails.configuration.collection_versioning = true
154 act_as_system_user do
155 # Create initial collection
156 col = create_collection 'foo', Encoding::US_ASCII
158 assert_equal 1, col.version
160 # Simulate simultaneous updates
161 c1 = Collection.where(uuid: col.uuid).first
162 assert_equal 1, c1.version
164 c2 = Collection.where(uuid: col.uuid).first
165 c2.description = 'foo collection'
167 assert_equal 1, c2.version
168 # with_lock forces a reload, so this shouldn't produce an unique violation error
170 assert_equal 3, c2.version
171 assert_equal 'foo collection', c2.description
175 test 'create and update collection and verify file_names' do
176 act_as_system_user do
177 c = create_collection 'foo', Encoding::US_ASCII
179 created_file_names = c.file_names
180 assert created_file_names
181 assert_match(/foo.txt/, c.file_names)
183 c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
184 assert_not_equal created_file_names, c.file_names
185 assert_match(/foo2.txt/, c.file_names)
192 ].each do |manifest_size, allow_truncate|
193 test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
194 and not expect exceptions even on very large manifest texts" do
195 # file_names has a max size, hence there will be no errors even on large manifests
196 act_as_system_user do
199 while manifest_text.length < manifest_size
200 manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
203 manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
204 c = Collection.create(manifest_text: manifest_text)
208 assert_match(/veryverylongfilename0000000000001.txt/, c.file_names)
209 assert_match(/veryverylongfilename0000000000002.txt/, c.file_names)
210 if not allow_truncate
211 assert_match(/veryverylastfilename/, c.file_names)
212 assert_match(/laststreamname/, c.file_names)
218 test "full text search for collections" do
219 # file_names column does not get populated when fixtures are loaded, hence setup test data
220 act_as_system_user do
221 Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
222 Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
223 Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
228 ['foo bar', false], # no collection matching both
229 ['foo&bar', false], # no collection matching both
230 ['foo|bar', true], # works only no spaces between the words
231 ['Gnu public', true], # both prefixes found, though not consecutively
232 ['Gnu&public', true], # both prefixes found, though not consecutively
233 ['file4', true], # prefix match
234 ['file4.txt', true], # whole string match
235 ['filex', false], # no such prefix
236 ['subdir', true], # prefix matches
239 ['subdir2/subdir3', true],
240 ['subdir2/subdir3/subdir4', true],
241 ['subdir2 file4', true], # look for both prefixes
242 ['subdir4', false], # not a prefix match
243 ].each do |search_filter, expect_results|
244 search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
245 results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
255 test 'portable data hash with missing size hints' do
256 [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
257 ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
258 [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
259 ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
260 [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
261 ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
262 ].each do |unportable, portable|
263 c = Collection.new(manifest_text: unportable)
265 assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
266 c.portable_data_hash)
270 pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
271 pdhmd5 = Digest::MD5.hexdigest pdhmanifest
274 [true, pdhmd5+'+12345'],
275 [true, pdhmd5+'+'+pdhmanifest.length.to_s],
276 [true, pdhmd5+'+12345+Foo'],
277 [true, pdhmd5+'+Foo'],
278 [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
279 [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
280 [false, pdhmd5[0..30]],
281 [false, pdhmd5[0..30]+'z'],
282 [false, pdhmd5[0..24]+'000000000'],
283 [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
284 test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
285 c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
286 assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
290 test "storage_classes_desired cannot be empty" do
291 act_as_user users(:active) do
292 c = collections(:collection_owned_by_active)
293 c.update_attributes storage_classes_desired: ["hot"]
294 assert_equal ["hot"], c.storage_classes_desired
295 assert_raise ArvadosModel::InvalidStateTransitionError do
296 c.update_attributes storage_classes_desired: []
301 test "storage classes lists should only contain non-empty strings" do
302 c = collections(:storage_classes_desired_default_unconfirmed)
303 act_as_user users(:admin) do
304 assert c.update_attributes(storage_classes_desired: ["default", "a_string"],
305 storage_classes_confirmed: ["another_string"])
307 ["storage_classes_desired", ["default", 42]],
308 ["storage_classes_confirmed", [{the_answer: 42}]],
309 ["storage_classes_desired", ["default", ""]],
310 ["storage_classes_confirmed", [""]],
311 ].each do |attr, val|
312 assert_raise ArvadosModel::InvalidStateTransitionError do
313 assert c.update_attributes({attr => val})
319 test "storage_classes_confirmed* can be set by admin user" do
320 c = collections(:storage_classes_desired_default_unconfirmed)
321 act_as_user users(:admin) do
322 assert c.update_attributes(storage_classes_confirmed: ["default"],
323 storage_classes_confirmed_at: Time.now)
327 test "storage_classes_confirmed* cannot be set by non-admin user" do
328 act_as_user users(:active) do
329 c = collections(:storage_classes_desired_default_unconfirmed)
330 # Cannot set just one at a time.
331 assert_raise ArvadosModel::PermissionDeniedError do
332 c.update_attributes storage_classes_confirmed: ["default"]
335 assert_raise ArvadosModel::PermissionDeniedError do
336 c.update_attributes storage_classes_confirmed_at: Time.now
338 # Cannot set bot at once, either.
340 assert_raise ArvadosModel::PermissionDeniedError do
341 assert c.update_attributes(storage_classes_confirmed: ["default"],
342 storage_classes_confirmed_at: Time.now)
347 test "storage_classes_confirmed* can be cleared (but only together) by non-admin user" do
348 act_as_user users(:active) do
349 c = collections(:storage_classes_desired_default_confirmed_default)
350 # Cannot clear just one at a time.
351 assert_raise ArvadosModel::PermissionDeniedError do
352 c.update_attributes storage_classes_confirmed: []
355 assert_raise ArvadosModel::PermissionDeniedError do
356 c.update_attributes storage_classes_confirmed_at: nil
358 # Can clear both at once.
360 assert c.update_attributes(storage_classes_confirmed: [],
361 storage_classes_confirmed_at: nil)
365 [0, 2, 4, nil].each do |ask|
366 test "set replication_desired to #{ask.inspect}" do
367 Rails.configuration.default_collection_replication = 2
368 act_as_user users(:active) do
369 c = collections(:replication_undesired_unconfirmed)
370 c.update_attributes replication_desired: ask
371 assert_equal ask, c.replication_desired
376 test "replication_confirmed* can be set by admin user" do
377 c = collections(:replication_desired_2_unconfirmed)
378 act_as_user users(:admin) do
379 assert c.update_attributes(replication_confirmed: 2,
380 replication_confirmed_at: Time.now)
384 test "replication_confirmed* cannot be set by non-admin user" do
385 act_as_user users(:active) do
386 c = collections(:replication_desired_2_unconfirmed)
387 # Cannot set just one at a time.
388 assert_raise ArvadosModel::PermissionDeniedError do
389 c.update_attributes replication_confirmed: 1
391 assert_raise ArvadosModel::PermissionDeniedError do
392 c.update_attributes replication_confirmed_at: Time.now
394 # Cannot set both at once, either.
395 assert_raise ArvadosModel::PermissionDeniedError do
396 c.update_attributes(replication_confirmed: 1,
397 replication_confirmed_at: Time.now)
402 test "replication_confirmed* can be cleared (but only together) by non-admin user" do
403 act_as_user users(:active) do
404 c = collections(:replication_desired_2_confirmed_2)
405 # Cannot clear just one at a time.
406 assert_raise ArvadosModel::PermissionDeniedError do
407 c.update_attributes replication_confirmed: nil
410 assert_raise ArvadosModel::PermissionDeniedError do
411 c.update_attributes replication_confirmed_at: nil
413 # Can clear both at once.
415 assert c.update_attributes(replication_confirmed: nil,
416 replication_confirmed_at: nil)
420 test "clear replication_confirmed* when introducing a new block in manifest" do
421 c = collections(:replication_desired_2_confirmed_2)
422 act_as_user users(:active) do
423 assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
424 assert_nil c.replication_confirmed
425 assert_nil c.replication_confirmed_at
429 test "don't clear replication_confirmed* when just renaming a file" do
430 c = collections(:replication_desired_2_confirmed_2)
431 act_as_user users(:active) do
432 new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
433 assert c.update_attributes(manifest_text: new_manifest)
434 assert_equal 2, c.replication_confirmed
435 assert_not_nil c.replication_confirmed_at
439 test "don't clear replication_confirmed* when just deleting a data block" do
440 c = collections(:replication_desired_2_confirmed_2)
441 act_as_user users(:active) do
442 new_manifest = c.signed_manifest_text
443 new_manifest.sub!(/ \S+:bar/, '')
444 new_manifest.sub!(/ acbd\S+/, '')
446 # Confirm that we did just remove a block from the manifest (if
447 # not, this test would pass without testing the relevant case):
448 assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
450 assert c.update_attributes(manifest_text: new_manifest)
451 assert_equal 2, c.replication_confirmed
452 assert_not_nil c.replication_confirmed_at
456 test 'signature expiry does not exceed trash_at' do
457 act_as_user users(:active) do
459 c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
460 c.update_attributes! trash_at: (t0 + 1.hours)
462 sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
463 assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
467 test 'far-future expiry date cannot be used to circumvent configured permission ttl' do
468 act_as_user users(:active) do
469 c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
471 trash_at: db_current_time + 1.years)
472 sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
473 expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
474 assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
475 assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
479 test "create collection with properties" do
480 act_as_system_user do
481 c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",
482 properties: {'property_1' => 'value_1'})
484 assert_equal 'value_1', c.properties['property_1']
488 test 'create, delete, recreate collection with same name and owner' do
489 act_as_user users(:active) do
490 # create collection with name
491 c = Collection.create(manifest_text: '',
492 name: "test collection name")
496 c = Collection.readable_by(current_user).where(uuid: uuid)
497 assert_not_empty c, 'Should be able to find live collection'
499 # mark collection as expired
500 c.first.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
501 c = Collection.readable_by(current_user).where(uuid: uuid)
502 assert_empty c, 'Should not be able to find expired collection'
504 # recreate collection with the same name
505 c = Collection.create(manifest_text: '',
506 name: "test collection name")
511 test 'trash_at cannot be set too far in the past' do
512 act_as_user users(:active) do
514 c = Collection.create!(manifest_text: '', name: 'foo')
515 c.update_attributes! trash_at: (t0 - 2.weeks)
517 assert_operator c.trash_at, :>, t0
522 [['trash-to-delete interval negative',
523 :collection_owned_by_active,
524 {trash_at: now+2.weeks, delete_at: now},
526 ['now-to-delete interval short',
527 :collection_owned_by_active,
528 {trash_at: now+3.days, delete_at: now+7.days},
529 {state: :trash_future}],
530 ['now-to-delete interval short, trash=delete',
531 :collection_owned_by_active,
532 {trash_at: now+3.days, delete_at: now+3.days},
533 {state: :trash_future}],
534 ['trash-to-delete interval ok',
535 :collection_owned_by_active,
536 {trash_at: now, delete_at: now+15.days},
537 {state: :trash_now}],
538 ['trash-to-delete interval short, but far enough in future',
539 :collection_owned_by_active,
540 {trash_at: now+13.days, delete_at: now+15.days},
541 {state: :trash_future}],
542 ['trash by setting is_trashed bool',
543 :collection_owned_by_active,
545 {state: :trash_now}],
546 ['trash in future by setting just trash_at',
547 :collection_owned_by_active,
548 {trash_at: now+1.week},
549 {state: :trash_future}],
550 ['trash in future by setting trash_at and delete_at',
551 :collection_owned_by_active,
552 {trash_at: now+1.week, delete_at: now+4.weeks},
553 {state: :trash_future}],
554 ['untrash by clearing is_trashed bool',
557 {state: :not_trash}],
558 ].each do |test_name, fixture_name, updates, expect|
560 act_as_user users(:active) do
561 min_exp = (db_current_time +
562 Rails.configuration.blob_signature_ttl.seconds)
563 if fixture_name == :expired_collection
564 # Fixture-finder shorthand doesn't find trashed collections
565 # because they're not in the default scope.
566 c = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
568 c = collections(fixture_name)
570 updates_ok = c.update_attributes(updates)
571 expect_valid = expect[:state] != :invalid
572 assert_equal expect_valid, updates_ok, c.errors.full_messages.to_s
578 assert_not_nil c.trash_at
579 assert_operator c.trash_at, :<=, db_current_time
580 assert_not_nil c.delete_at
581 assert_operator c.delete_at, :>=, min_exp
584 assert_not_nil c.trash_at
585 assert_operator c.trash_at, :>, db_current_time
586 assert_not_nil c.delete_at
587 assert_operator c.delete_at, :>=, c.trash_at
588 # Currently this minimum interval is needed to prevent early
589 # garbage collection:
590 assert_operator c.delete_at, :>=, min_exp
593 assert_nil c.trash_at
594 assert_nil c.delete_at
596 raise "bad expect[:state]==#{expect[:state].inspect} in test case"
602 test 'default trash interval > blob signature ttl' do
603 Rails.configuration.default_trash_lifetime = 86400 * 21 # 3 weeks
604 start = db_current_time
605 act_as_user users(:active) do
606 c = Collection.create!(manifest_text: '', name: 'foo')
607 c.update_attributes!(trash_at: start + 86400.seconds)
608 assert_operator c.delete_at, :>=, start + (86400*22).seconds
609 assert_operator c.delete_at, :<, start + (86400*22 + 30).seconds
612 c = Collection.create!(manifest_text: '', name: 'foo')
613 c.update_attributes!(is_trashed: true)
614 assert_operator c.delete_at, :>=, start + (86400*21).seconds
618 test "find_all_for_docker_image resolves names that look like hashes" do
619 coll_list = Collection.
620 find_all_for_docker_image('a' * 64, nil, [users(:active)])
621 coll_uuids = coll_list.map(&:uuid)
622 assert_includes(coll_uuids, collections(:docker_image).uuid)
625 test "move collections to trash in SweepTrashedObjects" do
626 c = collections(:trashed_on_next_sweep)
627 refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
628 assert_raises(ActiveRecord::RecordNotUnique) do
629 act_as_user users(:active) do
630 Collection.create!(owner_uuid: c.owner_uuid,
634 SweepTrashedObjects.sweep_now
635 c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
637 act_as_user users(:active) do
638 assert Collection.create!(owner_uuid: c.owner_uuid,
643 test "delete collections in SweepTrashedObjects" do
644 uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
645 assert_not_empty Collection.where(uuid: uuid)
646 SweepTrashedObjects.sweep_now
647 assert_empty Collection.where(uuid: uuid)
650 test "delete referring links in SweepTrashedObjects" do
651 uuid = collections(:trashed_on_next_sweep).uuid
652 act_as_system_user do
653 Link.create!(head_uuid: uuid,
654 tail_uuid: system_user_uuid,
655 link_class: 'whatever',
658 past = db_current_time
659 Collection.where(uuid: uuid).
660 update_all(is_trashed: true, trash_at: past, delete_at: past)
661 assert_not_empty Collection.where(uuid: uuid)
662 SweepTrashedObjects.sweep_now
663 assert_empty Collection.where(uuid: uuid)