Merge branch '17948-test-collection-tool' into main. Closes #17948
[arvados.git] / services / api / test / unit / salvage_collection_test.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require 'test_helper'
6 require 'salvage_collection'
7 require 'shellwords'
8
9 # Valid manifest_text
10 TEST_MANIFEST = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626+Ad391622a17f61e4a254eda85d1ca751c4f368da9@55e076ce 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:82570:brca1-hg19.fa\n"
11 TEST_MANIFEST_STRIPPED = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570 0:82570:brca1-hg19.fa\n"
12
13 # This invalid manifest_text has the following flaws:
14 #   Missing stream name with locator in it's place
15 #   Invalid locators:
16 #     foofaafaafaabd78ad0d6fc3f5b926b450e+foo
17 #     bar-baabaabaabd78ad0d6fc3f5b926b450e
18 #     bad12345dae58ad0d6fc3f5b926b450e+
19 #     341dabea2bd78ad0d6fc3f5b926b450e+abc
20 #     341dabea2bd78ad0d6fc3f5b926abcdf
21 # Expectation: All these locators are preserved in salvaged_data
22 BAD_MANIFEST = "faafaafaabd78ad0d6fc3f5b926b450e+foo bar-baabaabaabd78ad0d6fc3f5b926b450e_bad12345dae58ad0d6fc3f5b926b450e+ 341dabea2bd78ad0d6fc3f5b926b450e+abc 341dabea2bd78ad0d6fc3f5b926abcdf 0:85626:brca2-hg19.fa\n. abcdabea2bd78ad0d6fc3f5b926b450e+1000 0:1000:brca-hg19.fa\n. d7321a918923627c972d8f8080c07d29+2000+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:2000:brca1-hg19.fa\n"
23
24 class SalvageCollectionTest < ActiveSupport::TestCase
25   include SalvageCollection
26
27   setup do
28     set_user_from_auth :admin
29     # arv-put needs ARV env variables
30     ENV['ARVADOS_API_HOST'] = 'unused_by_test'
31     ENV['ARVADOS_API_TOKEN'] = 'unused_by_test'
32     @backtick_mock_failure = false
33   end
34
35   teardown do
36     ENV['ARVADOS_API_HOST'] = ''
37     ENV['ARVADOS_API_TOKEN'] = ''
38   end
39
40   def ` cmd # mock Kernel `
41     assert_equal 'arv-put', cmd.shellsplit[0]
42     if @backtick_mock_failure
43       # run a process so $? indicates failure
44       return super 'false'
45     end
46     # run a process so $? indicates success
47     super 'true'
48     file_contents = File.open(cmd.shellsplit[-1], "r").read
49     ". " +
50       Digest::MD5.hexdigest(file_contents) + "+" + file_contents.length.to_s +
51       " 0:" + file_contents.length.to_s + ":invalid_manifest_text.txt\n"
52   end
53
54   test "salvage test collection with valid manifest text" do
55     # create a collection to test salvaging
56     src_collection = Collection.new name: "test collection", manifest_text: TEST_MANIFEST
57     src_collection.save!
58
59     # salvage this collection
60     salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
61
62     # verify the updated src_collection data
63     updated_src_collection = Collection.find_by_uuid src_collection.uuid
64     updated_name = updated_src_collection.name
65     assert_equal true, updated_name.include?(src_collection.name)
66
67     match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
68     assert_not_nil match
69     assert_not_nil match[1]
70     assert_empty updated_src_collection.manifest_text
71
72     # match[1] is the uuid of the new collection created from src_collection's salvaged data
73     # use this to get the new collection and verify
74     new_collection = Collection.find_by_uuid match[1]
75     match = new_collection.name.match(/^salvaged from (.*),.*/)
76     assert_not_nil match
77     assert_equal src_collection.uuid, match[1]
78
79     # verify the new collection's manifest format
80     expected_manifest = ". " + Digest::MD5.hexdigest(TEST_MANIFEST_STRIPPED) + "+" +
81       TEST_MANIFEST_STRIPPED.length.to_s + " 0:" + TEST_MANIFEST_STRIPPED.length.to_s +
82       ":invalid_manifest_text.txt\n. 341dabea2bd78ad0d6fc3f5b926b450e+85626 d7321a918923627c972d8f8080c07d29+82570 0:168196:salvaged_data\n"
83     assert_equal expected_manifest, new_collection.manifest_text
84   end
85
86   test "salvage collection with no uuid required argument" do
87     assert_raises RuntimeError do
88       salvage_collection nil
89     end
90   end
91
92   test "salvage collection with bogus uuid" do
93     e = assert_raises RuntimeError do
94       salvage_collection 'bogus-uuid'
95     end
96     assert_equal "No collection found for bogus-uuid.", e.message
97   end
98
99   test "salvage collection with no env ARVADOS_API_HOST" do
100     e = assert_raises RuntimeError do
101       ENV['ARVADOS_API_HOST'] = ''
102       ENV['ARVADOS_API_TOKEN'] = ''
103       salvage_collection collections('user_agreement').uuid
104     end
105     assert_equal "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables.", e.message
106   end
107
108   test "salvage collection with error during arv-put" do
109     # try to salvage collection while mimicking error during arv-put
110     @backtick_mock_failure = true
111     e = assert_raises RuntimeError do
112       salvage_collection collections('user_agreement').uuid
113     end
114     assert_match(/Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message)
115   end
116
117   # This test uses BAD_MANIFEST, which has the following flaws:
118   #   Missing stream name with locator in it's place
119   #   Invalid locators:
120   #     foo-faafaafaabd78ad0d6fc3f5b926b450e+foo
121   #     bar-baabaabaabd78ad0d6fc3f5b926b450e
122   #     bad12345dae58ad0d6fc3f5b926b450e+
123   #     341dabea2bd78ad0d6fc3f5b926b450e+abc
124   #     341dabea2bd78ad0d6fc3f5b926abcdf
125   # Expectation: All these locators are preserved in salvaged_data
126   test "invalid locators preserved during salvaging" do
127     locator_data = salvage_collection_locator_data BAD_MANIFEST
128     assert_equal \
129     ["faafaafaabd78ad0d6fc3f5b926b450e",
130      "baabaabaabd78ad0d6fc3f5b926b450e",
131      "bad12345dae58ad0d6fc3f5b926b450e",
132      "341dabea2bd78ad0d6fc3f5b926b450e",
133      "341dabea2bd78ad0d6fc3f5b926abcdf",
134      "abcdabea2bd78ad0d6fc3f5b926b450e+1000",
135      "d7321a918923627c972d8f8080c07d29+2000",
136     ], locator_data[0]
137     assert_equal 1000+2000, locator_data[1]
138   end
139
140   test "salvage a collection with invalid manifest text" do
141     # create a collection to test salvaging
142     src_collection = Collection.new name: "test collection", manifest_text: BAD_MANIFEST, owner_uuid: 'zzzzz-tpzed-000000000000000'
143     src_collection.save!(validate: false)
144
145     # salvage this collection
146     salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
147
148     # verify the updated src_collection data
149     updated_src_collection = Collection.find_by_uuid src_collection.uuid
150     updated_name = updated_src_collection.name
151     assert_equal true, updated_name.include?(src_collection.name)
152
153     match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
154     assert_not_nil match
155     assert_not_nil match[1]
156     assert_empty updated_src_collection.manifest_text
157
158     # match[1] is the uuid of the new collection created from src_collection's salvaged data
159     # use this to get the new collection and verify
160     new_collection = Collection.find_by_uuid match[1]
161     match = new_collection.name.match(/^salvaged from (.*),.*/)
162     assert_not_nil match
163     assert_equal src_collection.uuid, match[1]
164     # verify the new collection's manifest includes the bad locators
165     expected_manifest = ". " + Digest::MD5.hexdigest(BAD_MANIFEST) + "+" + BAD_MANIFEST.length.to_s +
166       " 0:" + BAD_MANIFEST.length.to_s + ":invalid_manifest_text.txt\n. faafaafaabd78ad0d6fc3f5b926b450e baabaabaabd78ad0d6fc3f5b926b450e bad12345dae58ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926abcdf abcdabea2bd78ad0d6fc3f5b926b450e+1000 d7321a918923627c972d8f8080c07d29+2000 0:3000:salvaged_data\n"
167     assert_equal expected_manifest, new_collection.manifest_text
168   end
169 end