3 # Take two input parameters: a collection uuid and reason
4 # Get "src" collection with the given uuid
5 # Create a new collection "dest" with:
6 # src.manifest_text as "invalid manifest_text.txt"
7 # Locators from src.manifest_text as "salvaged_data"
8 # Update src collection:
9 # Set src.manifest_text to: ""
10 # Append to src.name: " (reason; salvaged data at dest.uuid)"
11 # Set portable_data_hash to "d41d8cd98f00b204e9800998ecf8427e+0"
15 opts = Trollop::options do
17 banner "Usage: salvage_collection.rb " +
21 uuid of the collection to be salvaged.
29 Trollop::die "required uuid argument is missing"
34 require File.dirname(__FILE__) + '/../config/environment'
35 require 'arvados/keep'
36 include ApplicationHelper
39 def salvage_collection uuid, reason
41 src = Collection.find_by_uuid uuid
43 puts "No collection found for #{uuid}"
48 src_manifest_text = src.manifest_text || ''
50 # Get all the locators from the original manifest
52 src_manifest_text.each_line do |line|
53 line.split(' ').each do |word|
54 if match = Keep::Locator::LOCATOR_REGEXP.match(word)
55 word = word.split('+')[0..1].join('+') # get rid of any hints
60 locators << 'd41d8cd98f00b204e9800998ecf8427e+0' if !locators.any?
62 # create new collection using 'arv-put' with original manifest_text as the data
63 temp_file = Tempfile.new('temp')
64 temp_file.write(src.manifest_text)
66 created = %x(arv-put --use-filename invalid_manifest_text.txt #{temp_file.path})
72 match = created.match HasUuid::UUID_REGEX
73 raise "uuid not found" if !match
75 # update this new collection manifest to reference all locators from the original manifest
76 new_collection = Collection.find_by_uuid created
78 new_manifest = new_collection['manifest_text']
79 new_manifest = new_manifest.gsub(/\+A[^+]*/, '')
81 locators.each do |locator|
82 total_size += locator.split('+')[1].to_i
84 new_manifest += (". #{locators.join(' ')} 0:#{total_size}:salvaged_data\n")
86 new_collection.name = "salvaged from #{src.uuid}, #{src.portable_data_hash}"
87 new_collection.manifest_text = new_manifest
88 new_collection.portable_data_hash = Digest::MD5.hexdigest(new_manifest)
92 puts "Created collection for salvaged #{uuid} with uuid: #{created} #{match}"
94 puts "Error creating collection for #{uuid}: #{error}"
99 # update src collection name, pdh, and manifest_text
100 src.name = (src.name || '') + ' (' + (reason || '') + '; salvaged data at ' + created + ')'
101 src.manifest_text = ''
102 src.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0'
104 puts "Salvaged collection #{uuid}"
106 puts "Error salvaging collection #{uuid}: #{error}"
111 # Salvage the collection with the given uuid
112 salvage_collection uuid, reason