Merge branch '7167-keep-rsync' of git.curoverse.com:arvados into 7167-keep-rsync
[arvados.git] / services / api / test / unit / salvage_collection_test.rb
1 require 'test_helper'
2 require 'salvage_collection'
3 require 'shellwords'
4
5 # Valid manifest_text
6 TEST_MANIFEST = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626+Ad391622a17f61e4a254eda85d1ca751c4f368da9@55e076ce 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:82570:brca1-hg19.fa\n"
7 TEST_MANIFEST_STRIPPED = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570 0:82570:brca1-hg19.fa\n"
8
9 # This invalid manifest_text has the following flaws:
10 #   Missing stream name with locator in it's place
11 #   Invalid locators:
12 #     foofaafaafaabd78ad0d6fc3f5b926b450e+foo
13 #     bar-baabaabaabd78ad0d6fc3f5b926b450e
14 #     bad12345dae58ad0d6fc3f5b926b450e+
15 #     341dabea2bd78ad0d6fc3f5b926b450e+abc
16 #     341dabea2bd78ad0d6fc3f5b926abcdf
17 # Expectation: All these locators are preserved in salvaged_data
18 BAD_MANIFEST = "faafaafaabd78ad0d6fc3f5b926b450e+foo bar-baabaabaabd78ad0d6fc3f5b926b450e_bad12345dae58ad0d6fc3f5b926b450e+ 341dabea2bd78ad0d6fc3f5b926b450e+abc 341dabea2bd78ad0d6fc3f5b926abcdf 0:85626:brca2-hg19.fa\n. abcdabea2bd78ad0d6fc3f5b926b450e+1000 0:1000:brca-hg19.fa\n. d7321a918923627c972d8f8080c07d29+2000+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:2000:brca1-hg19.fa\n"
19
20 class SalvageCollectionTest < ActiveSupport::TestCase
21   include SalvageCollection
22
23   setup do
24     set_user_from_auth :admin
25     # arv-put needs ARV env variables
26     ENV['ARVADOS_API_HOST'] = 'unused_by_test'
27     ENV['ARVADOS_API_TOKEN'] = 'unused_by_test'
28     @backtick_mock_failure = false
29   end
30
31   teardown do
32     ENV['ARVADOS_API_HOST'] = ''
33     ENV['ARVADOS_API_TOKEN'] = ''
34   end
35
36   def ` cmd # mock Kernel `
37     assert_equal 'arv-put', cmd.shellsplit[0]
38     if @backtick_mock_failure
39       # run a process so $? indicates failure
40       return super 'false'
41     end
42     # run a process so $? indicates success
43     super 'true'
44     file_contents = File.open(cmd.shellsplit[-1], "r").read
45     ". " +
46       Digest::MD5.hexdigest(file_contents) + "+" + file_contents.length.to_s +
47       " 0:" + file_contents.length.to_s + ":invalid_manifest_text.txt\n"
48   end
49
50   test "salvage test collection with valid manifest text" do
51     # create a collection to test salvaging
52     src_collection = Collection.new name: "test collection", manifest_text: TEST_MANIFEST
53     src_collection.save!
54
55     # salvage this collection
56     salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
57
58     # verify the updated src_collection data
59     updated_src_collection = Collection.find_by_uuid src_collection.uuid
60     updated_name = updated_src_collection.name
61     assert_equal true, updated_name.include?(src_collection.name)
62
63     match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
64     assert_not_nil match
65     assert_not_nil match[1]
66     assert_empty updated_src_collection.manifest_text
67
68     # match[1] is the uuid of the new collection created from src_collection's salvaged data
69     # use this to get the new collection and verify
70     new_collection = Collection.find_by_uuid match[1]
71     match = new_collection.name.match /^salvaged from (.*),.*/
72     assert_not_nil match
73     assert_equal src_collection.uuid, match[1]
74
75     # verify the new collection's manifest format
76     expected_manifest = ". " + Digest::MD5.hexdigest(TEST_MANIFEST_STRIPPED) + "+" +
77       TEST_MANIFEST_STRIPPED.length.to_s + " 0:" + TEST_MANIFEST_STRIPPED.length.to_s +
78       ":invalid_manifest_text.txt\n. 341dabea2bd78ad0d6fc3f5b926b450e+85626 d7321a918923627c972d8f8080c07d29+82570 0:168196:salvaged_data\n"
79     assert_equal expected_manifest, new_collection.manifest_text
80   end
81
82   test "salvage collection with no uuid required argument" do
83     e = assert_raises RuntimeError do
84       salvage_collection nil
85     end
86   end
87
88   test "salvage collection with bogus uuid" do
89     e = assert_raises RuntimeError do
90       salvage_collection 'bogus-uuid'
91     end
92     assert_equal "No collection found for bogus-uuid.", e.message
93   end
94
95   test "salvage collection with no env ARVADOS_API_HOST" do
96     e = assert_raises RuntimeError do
97       ENV['ARVADOS_API_HOST'] = ''
98       ENV['ARVADOS_API_TOKEN'] = ''
99       salvage_collection collections('user_agreement').uuid
100     end
101     assert_equal "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables.", e.message
102   end
103
104   test "salvage collection with error during arv-put" do
105     # try to salvage collection while mimicking error during arv-put
106     @backtick_mock_failure = true
107     e = assert_raises RuntimeError do
108       salvage_collection collections('user_agreement').uuid
109     end
110     assert_match /Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message
111   end
112
113   # This test uses BAD_MANIFEST, which has the following flaws:
114   #   Missing stream name with locator in it's place
115   #   Invalid locators:
116   #     foo-faafaafaabd78ad0d6fc3f5b926b450e+foo
117   #     bar-baabaabaabd78ad0d6fc3f5b926b450e
118   #     bad12345dae58ad0d6fc3f5b926b450e+
119   #     341dabea2bd78ad0d6fc3f5b926b450e+abc
120   #     341dabea2bd78ad0d6fc3f5b926abcdf
121   # Expectation: All these locators are preserved in salvaged_data
122   test "invalid locators preserved during salvaging" do
123     locator_data = salvage_collection_locator_data BAD_MANIFEST
124     assert_equal \
125     ["faafaafaabd78ad0d6fc3f5b926b450e",
126      "baabaabaabd78ad0d6fc3f5b926b450e",
127      "bad12345dae58ad0d6fc3f5b926b450e",
128      "341dabea2bd78ad0d6fc3f5b926b450e",
129      "341dabea2bd78ad0d6fc3f5b926abcdf",
130      "abcdabea2bd78ad0d6fc3f5b926b450e+1000",
131      "d7321a918923627c972d8f8080c07d29+2000",
132     ], locator_data[0]
133     assert_equal 1000+2000, locator_data[1]
134   end
135
136   test "salvage a collection with invalid manifest text" do
137     # create a collection to test salvaging
138     src_collection = Collection.new name: "test collection", manifest_text: BAD_MANIFEST, owner_uuid: 'zzzzz-tpzed-000000000000000'
139     src_collection.save!(validate: false)
140
141     # salvage this collection
142     salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
143
144     # verify the updated src_collection data
145     updated_src_collection = Collection.find_by_uuid src_collection.uuid
146     updated_name = updated_src_collection.name
147     assert_equal true, updated_name.include?(src_collection.name)
148
149     match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
150     assert_not_nil match
151     assert_not_nil match[1]
152     assert_empty updated_src_collection.manifest_text
153
154     # match[1] is the uuid of the new collection created from src_collection's salvaged data
155     # use this to get the new collection and verify
156     new_collection = Collection.find_by_uuid match[1]
157     match = new_collection.name.match /^salvaged from (.*),.*/
158     assert_not_nil match
159     assert_equal src_collection.uuid, match[1]
160     # verify the new collection's manifest includes the bad locators
161     expected_manifest = ". " + Digest::MD5.hexdigest(BAD_MANIFEST) + "+" + BAD_MANIFEST.length.to_s +
162       " 0:" + BAD_MANIFEST.length.to_s + ":invalid_manifest_text.txt\n. faafaafaabd78ad0d6fc3f5b926b450e baabaabaabd78ad0d6fc3f5b926b450e bad12345dae58ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926abcdf abcdabea2bd78ad0d6fc3f5b926b450e+1000 d7321a918923627c972d8f8080c07d29+2000 0:3000:salvaged_data\n"
163     assert_equal expected_manifest, new_collection.manifest_text
164   end
165 end