From: Tom Clegg Date: Mon, 20 Jan 2014 07:55:55 +0000 (-0800) Subject: Remove +K@{name} hints from collection uuid, link head/tail, job X-Git-Tag: 1.1.0~2722^2~62^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/07a7c1ca8ea3fb014a7eb4922ef762af7da5a0b5 Remove +K@{name} hints from collection uuid, link head/tail, job output/log, and job_task output. refs #1881 --- diff --git a/services/api/db/migrate/20140117231056_normalize_collection_uuid.rb b/services/api/db/migrate/20140117231056_normalize_collection_uuid.rb new file mode 100644 index 0000000000..bec7ec7c4a --- /dev/null +++ b/services/api/db/migrate/20140117231056_normalize_collection_uuid.rb @@ -0,0 +1,91 @@ +class NormalizeCollectionUuid < ActiveRecord::Migration + def count_orphans + %w(head tail).each do |ht| + results = ActiveRecord::Base.connection.execute(<<-EOS) +SELECT COUNT(links.*) + FROM links + LEFT JOIN collections c + ON links.#{ht}_uuid = c.uuid + WHERE (#{ht}_kind='arvados#collection' or #{ht}_uuid ~ '^[0-9a-f]{32,}') + AND #{ht}_uuid IS NOT NULL + AND #{ht}_uuid NOT IN (SELECT uuid FROM collections) +EOS + puts "#{results.first['count'].to_i} links with #{ht}_uuid pointing nowhere." + end + end + + def up + # Normalize uuids in the collections table to + # {hash}+{size}. Existing uuids might be {hash}, + # {hash}+{size}+K@{instance-name}, {hash}+K@{instance-name}, etc. + + count_orphans + puts "Normalizing collection UUIDs." + + update_sql <<-EOS +UPDATE collections + SET uuid = regexp_replace(uuid,'\\+.*','') || '+' || length(manifest_text) + WHERE uuid !~ '^[0-9a-f]{32,}\\+[0-9]+' + AND (regexp_replace(uuid,'\\+.*','') || '+' || length(manifest_text)) + NOT IN (SELECT uuid FROM collections) +EOS + + count_orphans + puts "Updating links by stripping +K@.* from *_uuid attributes." + + update_sql <<-EOS +UPDATE links + SET head_uuid = regexp_replace(head_uuid,'\\+K@.*','') + WHERE head_uuid like '%+K@%' +EOS + update_sql <<-EOS +UPDATE links + SET tail_uuid = regexp_replace(tail_uuid,'\\+K@.*','') + WHERE tail_uuid like '%+K@%' +EOS + + count_orphans + puts "Updating links by searching bare collection hashes using regexp." + + # Next, update {hash} (and any other non-normalized forms) to + # {hash}+{size}. This can only work where the corresponding + # collection is found in the collections table (otherwise we can't + # know the size). + %w(head tail).each do |ht| + update_sql <<-EOS +UPDATE links + SET #{ht}_uuid = c.uuid + FROM collections c + WHERE #{ht}_uuid IS NOT NULL + AND (#{ht}_kind='arvados#collection' or #{ht}_uuid ~ '^[0-9a-f]{32,}') + AND #{ht}_uuid NOT IN (SELECT uuid FROM collections) + AND regexp_replace(#{ht}_uuid,'\\+.*','') = regexp_replace(c.uuid,'\\+.*','') + AND c.uuid ~ '^[0-9a-f]{32,}\\+[0-9]+' +EOS + end + + count_orphans + puts "Stripping \"+K@.*\" from jobs.output, jobs.log, job_tasks.output." + + update_sql <<-EOS +UPDATE jobs + SET output = regexp_replace(output,'\\+K@.*','') + WHERE output ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$' +EOS + update_sql <<-EOS +UPDATE jobs + SET log = regexp_replace(log,'\\+K@.*','') + WHERE log ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$' +EOS + update_sql <<-EOS +UPDATE job_tasks + SET output = regexp_replace(output,'\\+K@.*','') + WHERE output ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$' +EOS + + puts "Done." + end + + def down + end +end