1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
5 class NormalizeCollectionUuid < ActiveRecord::Migration[4.2]
7 %w(head tail).each do |ht|
8 results = ActiveRecord::Base.connection.execute(<<-EOS)
11 LEFT JOIN collections c
12 ON links.#{ht}_uuid = c.uuid
13 WHERE (#{ht}_kind='arvados#collection' or #{ht}_uuid ~ '^[0-9a-f]{32,}')
14 AND #{ht}_uuid IS NOT NULL
15 AND #{ht}_uuid NOT IN (SELECT uuid FROM collections)
17 puts "#{results.first['count'].to_i} links with #{ht}_uuid pointing nowhere."
22 # Normalize uuids in the collections table to
23 # {hash}+{size}. Existing uuids might be {hash},
24 # {hash}+{size}+K@{instance-name}, {hash}+K@{instance-name}, etc.
27 puts "Normalizing collection UUIDs."
31 SET uuid = regexp_replace(uuid,'\\+.*','') || '+' || length(manifest_text)
32 WHERE uuid !~ '^[0-9a-f]{32,}\\+[0-9]+$'
33 AND (regexp_replace(uuid,'\\+.*','') || '+' || length(manifest_text))
34 NOT IN (SELECT uuid FROM collections)
38 puts "Updating links by stripping +K@.* from *_uuid attributes."
42 SET head_uuid = regexp_replace(head_uuid,'\\+K@.*','')
43 WHERE head_uuid like '%+K@%'
47 SET tail_uuid = regexp_replace(tail_uuid,'\\+K@.*','')
48 WHERE tail_uuid like '%+K@%'
52 puts "Updating links by searching bare collection hashes using regexp."
54 # Next, update {hash} (and any other non-normalized forms) to
55 # {hash}+{size}. This can only work where the corresponding
56 # collection is found in the collections table (otherwise we can't
58 %w(head tail).each do |ht|
61 SET #{ht}_uuid = c.uuid
63 WHERE #{ht}_uuid IS NOT NULL
64 AND (#{ht}_kind='arvados#collection' or #{ht}_uuid ~ '^[0-9a-f]{32,}')
65 AND #{ht}_uuid NOT IN (SELECT uuid FROM collections)
66 AND regexp_replace(#{ht}_uuid,'\\+.*','') = regexp_replace(c.uuid,'\\+.*','')
67 AND c.uuid ~ '^[0-9a-f]{32,}\\+[0-9]+$'
72 puts "Stripping \"+K@.*\" from jobs.output, jobs.log, job_tasks.output."
76 SET output = regexp_replace(output,'\\+K@.*','')
77 WHERE output ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$'
81 SET log = regexp_replace(log,'\\+K@.*','')
82 WHERE log ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$'
86 SET output = regexp_replace(output,'\\+K@.*','')
87 WHERE output ~ '^[0-9a-f]{32,}\\+[0-9]+\\+K@\\w+$'