1 module SalvageCollection
2 # Take two input parameters: a collection uuid and reason
3 # Get "src_collection" with the given uuid
4 # Create a new collection with:
5 # src_collection.manifest_text as "invalid_manifest_text.txt"
6 # Locators from src_collection.manifest_text as "salvaged_data"
7 # Update src_collection:
8 # Set src_collection.manifest_text to: ""
9 # Append to src_collection.name: " (reason; salvaged data at new_collection.uuid)"
10 # Set portable_data_hash to "d41d8cd98f00b204e9800998ecf8427e+0"
12 require File.dirname(__FILE__) + '/../config/environment'
13 require 'arvados/keep'
14 include ApplicationHelper
18 def self.salvage_collection uuid, reason='salvaged - see #6277, #6859'
20 if !ENV['ARVADOS_API_TOKEN'].present? or !ENV['ARVADOS_API_HOST'].present?
21 $stderr.puts "Please set your admin user credentials as ARVADOS environment variables."
26 $stderr.puts "Required uuid argument is missing."
30 src_collection = Collection.find_by_uuid uuid
32 $stderr.puts "No collection found for #{uuid}. Returning."
37 src_manifest = src_collection.manifest_text || ''
39 # Get all the locators from the original manifest
41 src_manifest.each_line do |line|
42 line.split(' ').each do |word|
43 if match = Keep::Locator::LOCATOR_REGEXP.match(word)
44 word = word.split('+')[0..1].join('+') # get rid of any hints
45 locators << word if !word.start_with?('00000000000000000000000000000000')
49 locators << 'd41d8cd98f00b204e9800998ecf8427e+0' if !locators.any?
51 # create new collection using 'arv-put' with original manifest_text as the data
52 temp_file = Tempfile.new('temp')
53 temp_file.write(src_manifest)
55 new_manifest = %x(arv-put --as-stream --use-filename invalid_manifest_text.txt #{Shellwords::shellescape(temp_file.path)})
59 if !new_manifest.present?
60 $stderr.puts "arv-put --as-stream failed for #{uuid}"
65 new_manifest.split(' ').each do |word|
66 if match = Keep::Locator::LOCATOR_REGEXP.match(word)
67 word = word.split('+')[0..1].join('+') # get rid of any hints
74 new_manifest = words.join(' ') + "\n"
75 new_collection = Collection.new
78 locators.each do |locator|
79 total_size += locator.split('+')[1].to_i
81 new_manifest += (". #{locators.join(' ')} 0:#{total_size}:salvaged_data\n")
83 new_collection.name = "salvaged from #{src_collection.uuid}, #{src_collection.portable_data_hash}"
84 new_collection.manifest_text = new_manifest
85 new_collection.portable_data_hash = Digest::MD5.hexdigest(new_collection.manifest_text)
87 created = new_collection.save!
88 raise "New collection creation failed." if !created
90 $stderr.puts "Salvaged manifest and data for #{uuid} are in #{new_collection.uuid}."
91 puts "Created new collection #{new_collection.uuid}"
93 $stderr.puts "Error creating collection for #{uuid}: #{error}"
98 # update src_collection collection name, pdh, and manifest_text
99 src_collection.name = (src_collection.name || '') + ' (' + (reason || '') + '; salvaged data at ' + new_collection.uuid + ')'
100 src_collection.manifest_text = ''
101 src_collection.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0'
103 $stderr.puts "Collection #{uuid} emptied and renamed to #{src_collection.name.inspect}."
105 $stderr.puts "Error salvaging collection #{new_collection.uuid}: #{error}"