6859: improve locator parsing to consider any terminating [[:xdigit:]]{32}; improve...
[arvados.git] / services / api / test / unit / salvage_collection_test.rb
1 require 'test_helper'
2 require 'salvage_collection'
3
4 # Valid manifest_text
5 TEST_MANIFEST = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626+Ad391622a17f61e4a254eda85d1ca751c4f368da9@55e076ce 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:82570:brca1-hg19.fa\n"
6 TEST_MANIFEST_STRIPPED = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570 0:82570:brca1-hg19.fa\n"
7
8 # This invalid manifest_text has the following flaws:
9 #   Missing stream name with locator in it's place
10 #   Invalid locators:
11 #     foofaafaafaabd78ad0d6fc3f5b926b450e+foo
12 #     bar-baabaabaabd78ad0d6fc3f5b926b450e
13 #     bad12345dae58ad0d6fc3f5b926b450e+
14 #     341dabea2bd78ad0d6fc3f5b926b450e+abc
15 #     341dabea2bd78ad0d6fc3f5b926abcdf
16 # Expectation: All these locators are preserved in salvaged_data
17 BAD_MANIFEST = "faaaafaafaafaabd78ad0d6fc3f5b926b450e+foo bar-baabaabaabd78ad0d6fc3f5b926b450e bad12345dae58ad0d6fc3f5b926b450e+ 341dabea2bd78ad0d6fc3f5b926b450e+abc 341dabea2bd78ad0d6fc3f5b926abcdf 0:85626:brca2-hg19.fa\n. abcdabea2bd78ad0d6fc3f5b926b450e+1000 0:1000:brca-hg19.fa\n. d7321a918923627c972d8f8080c07d29+2000+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:2000:brca1-hg19.fa\n"
18
19 # Mock arv_put
20 module SalvageCollection
21   def self.salvage_collection_arv_put(cmd)
22     file_contents = File.open(cmd.split[-1], "r").read
23     # simulate arv-put error when it is 'user_agreement'
24     if file_contents.include? 'GNU_General_Public_License'
25       raise("Error during arv-put")
26     else
27       ". " +
28       Digest::MD5.hexdigest(file_contents) + "+" + file_contents.length.to_s +
29       " 0:" + file_contents.length.to_s + ":invalid_manifest_text.txt\n"
30     end
31   end
32 end
33
34 class SalvageCollectionTest < ActiveSupport::TestCase
35   include SalvageCollection
36
37   setup do
38     set_user_from_auth :admin
39     # arv-put needs ARV env variables
40     ENV['ARVADOS_API_HOST'] = 'unused_by_test'
41     ENV['ARVADOS_API_TOKEN'] = 'unused_by_test'
42   end
43
44   teardown do
45     ENV['ARVADOS_API_HOST'] = ''
46     ENV['ARVADOS_API_TOKEN'] = ''
47   end
48
49   test "salvage test collection with valid manifest text" do
50     # create a collection to test salvaging
51     src_collection = Collection.new name: "test collection", manifest_text: TEST_MANIFEST
52     src_collection.save!
53
54     # salvage this collection
55     SalvageCollection.salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
56
57     # verify the updated src_collection data
58     updated_src_collection = Collection.find_by_uuid src_collection.uuid
59     updated_name = updated_src_collection.name
60     assert_equal true, updated_name.include?(src_collection.name)
61
62     match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
63     assert_not_nil match
64     assert_not_nil match[1]
65     assert_empty updated_src_collection.manifest_text
66
67     # match[1] is the uuid of the new collection created from src_collection's salvaged data
68     # use this to get the new collection and verify
69     new_collection = Collection.find_by_uuid match[1]
70     match = new_collection.name.match /^salvaged from (.*),.*/
71     assert_not_nil match
72     assert_equal src_collection.uuid, match[1]
73
74     # verify the new collection's manifest format
75     expected_manifest = ". " + Digest::MD5.hexdigest(TEST_MANIFEST_STRIPPED) + "+" +
76       TEST_MANIFEST_STRIPPED.length.to_s + " 0:" + TEST_MANIFEST_STRIPPED.length.to_s +
77       ":invalid_manifest_text.txt\n. 341dabea2bd78ad0d6fc3f5b926b450e+85626 d7321a918923627c972d8f8080c07d29+82570 0:168196:salvaged_data\n"
78     assert_equal expected_manifest, new_collection.manifest_text
79   end
80
81   test "salvage collection with no uuid required argument" do
82     e = assert_raises RuntimeError do
83       SalvageCollection.salvage_collection nil
84     end
85   end
86
87   test "salvage collection with bogus uuid" do
88     e = assert_raises RuntimeError do
89       SalvageCollection.salvage_collection 'bogus-uuid'
90     end
91     assert_equal "No collection found for bogus-uuid.", e.message
92   end
93
94   test "salvage collection with no env ARVADOS_API_HOST" do
95     e = assert_raises RuntimeError do
96       ENV['ARVADOS_API_HOST'] = ''
97       ENV['ARVADOS_API_TOKEN'] = ''
98       SalvageCollection.salvage_collection collections('user_agreement').uuid
99     end
100     assert_equal "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables.", e.message
101   end
102
103   test "salvage collection with error during arv-put" do
104     # try to salvage collection while mimicking error during arv-put
105     e = assert_raises RuntimeError do
106       SalvageCollection.salvage_collection collections('user_agreement').uuid
107     end
108     assert_equal "Error during arv-put", e.message
109   end
110
111   # This test uses BAD_MANIFEST, which has the following flaws:
112   #   Missing stream name with locator in it's place
113   #   Invalid locators:
114   #     foo-faafaafaabd78ad0d6fc3f5b926b450e+foo
115   #     bar-baabaabaabd78ad0d6fc3f5b926b450e
116   #     bad12345dae58ad0d6fc3f5b926b450e+
117   #     341dabea2bd78ad0d6fc3f5b926b450e+abc
118   #     341dabea2bd78ad0d6fc3f5b926abcdf
119   # Expectation: All these locators are preserved in salvaged_data
120   test "invalid locators preserved during salvaging" do
121     locator_data = SalvageCollection.salvage_collection_locator_data BAD_MANIFEST
122     assert_equal 7, locator_data[0].size
123     assert_equal false, locator_data[0].include?("foo-faafaafaabd78ad0d6fc3f5b926b450e+foo")
124     assert_equal true,  locator_data[0].include?("faafaafaabd78ad0d6fc3f5b926b450e")
125     assert_equal false, locator_data[0].include?("bar-baabaabaabd78ad0d6fc3f5b926b450e")
126     assert_equal true,  locator_data[0].include?("baabaabaabd78ad0d6fc3f5b926b450e")
127     assert_equal false, locator_data[0].include?("bad12345dae58ad0d6fc3f5b926b450e+")
128     assert_equal true,  locator_data[0].include?("bad12345dae58ad0d6fc3f5b926b450e")
129     assert_equal false, locator_data[0].include?("341dabea2bd78ad0d6fc3f5b926b450e+abc")
130     assert_equal true,  locator_data[0].include?("341dabea2bd78ad0d6fc3f5b926b450e")
131     assert_equal true,  locator_data[0].include?("341dabea2bd78ad0d6fc3f5b926abcdf")
132     assert_equal true,  locator_data[0].include?("abcdabea2bd78ad0d6fc3f5b926b450e+1000")
133     assert_equal true,  locator_data[0].include?("d7321a918923627c972d8f8080c07d29+2000")
134     assert_equal true,  locator_data[1].eql?(1000 + 2000)   # size
135   end
136
137   test "salvage a collection with invalid manifest text" do
138     # create a collection to test salvaging
139     src_collection = Collection.new name: "test collection", manifest_text: BAD_MANIFEST, owner_uuid: 'zzzzz-tpzed-000000000000000'
140     src_collection.save!(validate: false)
141
142     # salvage this collection
143     SalvageCollection.salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
144
145     # verify the updated src_collection data
146     updated_src_collection = Collection.find_by_uuid src_collection.uuid
147     updated_name = updated_src_collection.name
148     assert_equal true, updated_name.include?(src_collection.name)
149
150     match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
151     assert_not_nil match
152     assert_not_nil match[1]
153     assert_empty updated_src_collection.manifest_text
154
155     # match[1] is the uuid of the new collection created from src_collection's salvaged data
156     # use this to get the new collection and verify
157     new_collection = Collection.find_by_uuid match[1]
158     match = new_collection.name.match /^salvaged from (.*),.*/
159     assert_not_nil match
160     assert_equal src_collection.uuid, match[1]
161     # verify the new collection's manifest includes the bad locators
162     expected_manifest = ". " + Digest::MD5.hexdigest(BAD_MANIFEST) + "+" + BAD_MANIFEST.length.to_s +
163       " 0:" + BAD_MANIFEST.length.to_s + ":invalid_manifest_text.txt\n. faafaafaabd78ad0d6fc3f5b926b450e baabaabaabd78ad0d6fc3f5b926b450e bad12345dae58ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926abcdf abcdabea2bd78ad0d6fc3f5b926b450e+1000 d7321a918923627c972d8f8080c07d29+2000 0:3000:salvaged_data\n"
164     assert_equal expected_manifest, new_collection.manifest_text
165   end
166 end