From 647c3bac741b56ce16c7e22ab2d462725a34198d Mon Sep 17 00:00:00 2001 From: radhika Date: Wed, 19 Aug 2015 19:18:19 -0400 Subject: [PATCH] 6859: Always raise an exception on errors during salvaging. Catch any such exceptions in the script and exit. Update test accordingly. --- services/api/lib/salvage_collection.rb | 124 +++++++----------- services/api/script/salvage_collection.rb | 7 +- .../api/test/unit/salvage_collection_test.rb | 65 ++++++--- 3 files changed, 102 insertions(+), 94 deletions(-) diff --git a/services/api/lib/salvage_collection.rb b/services/api/lib/salvage_collection.rb index 8854cb7761..9b182cfda3 100755 --- a/services/api/lib/salvage_collection.rb +++ b/services/api/lib/salvage_collection.rb @@ -15,102 +15,80 @@ module SalvageCollection require 'tempfile' require 'shellwords' - def self.salvage_collection_arv_put temp_file - %x(arv-put --as-stream --use-filename invalid_manifest_text.txt #{Shellwords::shellescape(temp_file.path)}) + def self.salvage_collection_arv_put cmd + new_manifest = %x(#{cmd}) + if $?.success? + new_manifest + else + raise "Error during arv-put." + end + end + + def self.salvage_collection_locator_data manifest + # Get all the locators from the original manifest + locators = [] + size = 0 + manifest.each_line do |line| + line.split(' ').each do |word| + if match = Keep::Locator::LOCATOR_REGEXP.match(word) + word = match[1]+match[2] # get rid of any hints + locators << word + size += match[3].to_i + end + end + end + locators << 'd41d8cd98f00b204e9800998ecf8427e+0' if !locators.any? + return [locators, size] end def self.salvage_collection uuid, reason='salvaged - see #6277, #6859' act_as_system_user do if !ENV['ARVADOS_API_TOKEN'].present? or !ENV['ARVADOS_API_HOST'].present? - $stderr.puts "Please set your admin user credentials as ARVADOS environment variables." - # exit with a code outside the range of special exit codes; http://tldp.org/LDP/abs/html/exitcodes.html - exit 200 + raise "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables." end if !uuid.present? - $stderr.puts "Required uuid argument is missing." - return false + raise "Collection UUID is required." end src_collection = Collection.find_by_uuid uuid if !src_collection - $stderr.puts "No collection found for #{uuid}. Returning." - return false + raise "No collection found for #{uuid}." end - begin - src_manifest = src_collection.manifest_text || '' - - # Get all the locators from the original manifest - locators = [] - src_manifest.each_line do |line| - line.split(' ').each do |word| - if match = Keep::Locator::LOCATOR_REGEXP.match(word) - word = word.split('+')[0..1].join('+') # get rid of any hints - locators << word if !word.start_with?('00000000000000000000000000000000') - end - end - end - locators << 'd41d8cd98f00b204e9800998ecf8427e+0' if !locators.any? - - # create new collection using 'arv-put' with original manifest_text as the data - temp_file = Tempfile.new('temp') - temp_file.write(src_manifest) - temp_file.close + src_manifest = src_collection.manifest_text || '' - new_manifest = salvage_collection_arv_put temp_file + # create new collection using 'arv-put' with original manifest_text as the data + temp_file = Tempfile.new('temp') + temp_file.write(src_manifest) + temp_file.close - temp_file.unlink + new_manifest = salvage_collection_arv_put "arv-put --as-stream --use-filename invalid_manifest_text.txt #{Shellwords::shellescape(temp_file.path)}" - if !new_manifest.present? - $stderr.puts "arv-put --as-stream failed for #{uuid}" - return false - end + temp_file.unlink - words = [] - new_manifest.split(' ').each do |word| - if match = Keep::Locator::LOCATOR_REGEXP.match(word) - word = word.split('+')[0..1].join('+') # get rid of any hints - words << word - else - words << word - end - end + # Get the locator data in the format [[locators], size] from the original manifest + locator_data = salvage_collection_locator_data src_manifest - new_manifest = words.join(' ') + "\n" - new_collection = Collection.new + new_manifest += (". #{locator_data[0].join(' ')} 0:#{locator_data[1]}:salvaged_data\n") - total_size = 0 - locators.each do |locator| - total_size += locator.split('+')[1].to_i - end - new_manifest += (". #{locators.join(' ')} 0:#{total_size}:salvaged_data\n") - - new_collection.name = "salvaged from #{src_collection.uuid}, #{src_collection.portable_data_hash}" - new_collection.manifest_text = new_manifest - new_collection.portable_data_hash = Digest::MD5.hexdigest(new_collection.manifest_text) + new_collection = Collection.new + new_collection.name = "salvaged from #{src_collection.uuid}, #{src_collection.portable_data_hash}" + new_collection.manifest_text = new_manifest + new_collection.portable_data_hash = Digest::MD5.hexdigest(new_collection.manifest_text) - created = new_collection.save! - raise "New collection creation failed." if !created + created = new_collection.save! + raise "New collection creation failed." if !created - $stderr.puts "Salvaged manifest and data for #{uuid} are in #{new_collection.uuid}." - puts "Created new collection #{new_collection.uuid}" - rescue => error - $stderr.puts "Error creating collection for #{uuid}: #{error}" - return false - end + $stderr.puts "Salvaged manifest and data for #{uuid} are in #{new_collection.uuid}." + puts "Created new collection #{new_collection.uuid}" - begin - # update src_collection collection name, pdh, and manifest_text - src_collection.name = (src_collection.name || '') + ' (' + (reason || '') + '; salvaged data at ' + new_collection.uuid + ')' - src_collection.manifest_text = '' - src_collection.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0' - src_collection.save! - $stderr.puts "Collection #{uuid} emptied and renamed to #{src_collection.name.inspect}." - rescue => error - $stderr.puts "Error salvaging collection #{new_collection.uuid}: #{error}" - return false - end + # update src_collection collection name, pdh, and manifest_text + src_collection.name = (src_collection.name || '') + ' (' + (reason || '') + '; salvaged data at ' + new_collection.uuid + ')' + src_collection.manifest_text = '' + src_collection.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0' + src_collection.save! + $stderr.puts "Collection #{uuid} emptied and renamed to #{src_collection.name.inspect}." end end end diff --git a/services/api/script/salvage_collection.rb b/services/api/script/salvage_collection.rb index b70807bd38..3212d8862d 100755 --- a/services/api/script/salvage_collection.rb +++ b/services/api/script/salvage_collection.rb @@ -23,4 +23,9 @@ opts = Trollop::options do end # Salvage the collection with the given uuid -SalvageCollection.salvage_collection opts.uuid, opts.reason +begin + SalvageCollection.salvage_collection opts.uuid, opts.reason +rescue => e + $stderr.puts "Error during arv-put" + exit 1 +end diff --git a/services/api/test/unit/salvage_collection_test.rb b/services/api/test/unit/salvage_collection_test.rb index 83f3c5bd70..953ce63f88 100644 --- a/services/api/test/unit/salvage_collection_test.rb +++ b/services/api/test/unit/salvage_collection_test.rb @@ -3,19 +3,13 @@ require 'salvage_collection' TEST_MANIFEST = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626+Ad391622a17f61e4a254eda85d1ca751c4f368da9@55e076ce 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:82570:brca1-hg19.fa\n" -module Kernel - def exit code - raise "Exit code #{code}" if code == 200 - end -end - module SalvageCollection - def self.salvage_collection_arv_put(temp_file) - file_contents = file = File.new(temp_file.path, "r").gets + def self.salvage_collection_arv_put(cmd) + file_contents = File.new(cmd.split[-1], "r").gets # simulate arv-put error when it is 'user_agreement' if file_contents.include? 'GNU_General_Public_License' - return '' + raise("Error during arv-put") else ". " + Digest::MD5.hexdigest(TEST_MANIFEST) + @@ -24,7 +18,7 @@ module SalvageCollection end end -class SalvageCollectionTest < ActiveSupport::TestCase +class SalvageCollectionMockTest < ActiveSupport::TestCase include SalvageCollection setup do @@ -70,31 +64,62 @@ class SalvageCollectionTest < ActiveSupport::TestCase end test "salvage collection with no uuid required argument" do - status = SalvageCollection.salvage_collection nil - assert_equal false, status + raised = false + begin + SalvageCollection.salvage_collection nil + rescue => e + assert_equal "Collection UUID is required.", e.message + raised = true + end + assert_equal true, raised end test "salvage collection with bogus uuid" do - status = SalvageCollection.salvage_collection 'bogus-uuid' - assert_equal false, status + raised = false + begin + SalvageCollection.salvage_collection 'bogus-uuid' + rescue => e + assert_equal "No collection found for bogus-uuid.", e.message + raised = true + end + assert_equal true, raised end test "salvage collection with no env ARVADOS_API_HOST" do - exited = false + raised = false begin ENV['ARVADOS_API_HOST'] = '' ENV['ARVADOS_API_TOKEN'] = '' SalvageCollection.salvage_collection collections('user_agreement').uuid rescue => e - assert_equal "Exit code 200", e.message - exited = true + assert_equal "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables.", e.message + raised = true end - assert_equal true, exited + assert_equal true, raised end test "salvage collection with error during arv-put" do # try to salvage collection while mimicking error during arv-put - status = SalvageCollection.salvage_collection collections('user_agreement').uuid - assert_equal false, status + raised = false + begin + SalvageCollection.salvage_collection collections('user_agreement').uuid + rescue => e + assert_equal "Error during arv-put", e.message + raised = true + end + assert_equal true, raised + end + + test "invalid locators dropped during salvaging" do + manifest = ". 341dabea2bd78ad0d6fc3f5b926b450e+abc 0:85626:brca2-hg19.fa\n. 341dabea2bd78ad0d6fc3f5b926b450e+1000 0:1000:brca-hg19.fa\n . d7321a918923627c972d8f8080c07d29+2000+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:2000:brca1-hg19.fa\n" + + # salvage this collection + locator_data = SalvageCollection.salvage_collection_locator_data manifest + + assert_equal true, locator_data[0].size.eql?(2) + assert_equal false, locator_data[0].include?("341dabea2bd78ad0d6fc3f5b926b450e+abc") + assert_equal true, locator_data[0].include?("341dabea2bd78ad0d6fc3f5b926b450e+1000") + assert_equal true, locator_data[0].include?("d7321a918923627c972d8f8080c07d29+2000") + assert_equal true, locator_data[1].eql?(1000 + 2000) # size end end -- 2.30.2