18004: Fixes a couple of race condition bugs related to caching remote users.
[arvados.git] / services / api / app / controllers / arvados / v1 / collections_controller.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require "arvados/keep"
6 require "trashable"
7
8 class Arvados::V1::CollectionsController < ApplicationController
9   include DbCurrentTime
10   include TrashableController
11
12   def self._index_requires_parameters
13     (super rescue {}).
14       merge({
15         include_trash: {
16           type: 'boolean', required: false, default: false, description: "Include collections whose is_trashed attribute is true.",
17         },
18         include_old_versions: {
19           type: 'boolean', required: false, default: false, description: "Include past collection versions.",
20         },
21       })
22   end
23
24   def self._show_requires_parameters
25     (super rescue {}).
26       merge({
27         include_trash: {
28           type: 'boolean', required: false, default: false, description: "Show collection even if its is_trashed attribute is true.",
29         },
30         include_old_versions: {
31           type: 'boolean', required: false, default: true, description: "Include past collection versions.",
32         },
33       })
34   end
35
36   def create
37     if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
38       resource_attrs[:portable_data_hash] = loc.to_s
39       resource_attrs.delete :uuid
40     end
41     resource_attrs.delete :version
42     resource_attrs.delete :current_version_uuid
43     super
44   end
45
46   def update
47     # preserve_version should be disabled unless explicitly asked otherwise.
48     if !resource_attrs[:preserve_version]
49       resource_attrs[:preserve_version] = false
50     end
51     super
52   end
53
54   def find_objects_for_index
55     opts = {
56       include_trash: params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name),
57       include_old_versions: params[:include_old_versions] || false,
58     }
59     @objects = Collection.readable_by(*@read_users, opts) if !opts.empty?
60     super
61   end
62
63   def find_object_by_uuid
64     if loc = Keep::Locator.parse(params[:id])
65       loc.strip_hints!
66
67       opts = {
68         include_trash: params[:include_trash],
69         include_old_versions: params[:include_old_versions],
70       }
71
72       # It matters which Collection object we pick because we use it to get signed_manifest_text,
73       # the value of which is affected by the value of trash_at.
74       #
75       # From postgres doc: "By default, null values sort as if larger than any non-null
76       # value; that is, NULLS FIRST is the default for DESC order, and
77       # NULLS LAST otherwise."
78       #
79       # "trash_at desc" sorts null first, then latest to earliest, so
80       # it will select the Collection object with the longest
81       # available lifetime.
82
83       if c = Collection.readable_by(*@read_users, opts).where({ portable_data_hash: loc.to_s }).order("trash_at desc").limit(1).first
84         @object = {
85           uuid: c.portable_data_hash,
86           portable_data_hash: c.portable_data_hash,
87           manifest_text: c.signed_manifest_text,
88         }
89       end
90     else
91       super
92     end
93   end
94
95   def show
96     if @object.is_a? Collection
97       # Omit unsigned_manifest_text
98       @select ||= model_class.selectable_attributes - ["unsigned_manifest_text"]
99       super
100     else
101       send_json @object
102     end
103   end
104
105
106   def find_collections(visited, sp, ignore_columns=[], &b)
107     case sp
108     when ArvadosModel
109       sp.class.columns.each do |c|
110         find_collections(visited, sp[c.name.to_sym], &b) if !ignore_columns.include?(c.name)
111       end
112     when Hash
113       sp.each do |k, v|
114         find_collections(visited, v, &b)
115       end
116     when Array
117       sp.each do |v|
118         find_collections(visited, v, &b)
119       end
120     when String
121       if m = /[a-f0-9]{32}\+\d+/.match(sp)
122         yield m[0], nil
123       elsif m = Collection.uuid_regex.match(sp)
124         yield nil, m[0]
125       end
126     end
127   end
128
129   def search_edges(visited, uuid, direction)
130     if uuid.nil? or uuid.empty? or visited[uuid]
131       return
132     end
133
134     if loc = Keep::Locator.parse(uuid)
135       loc.strip_hints!
136       return if visited[loc.to_s]
137     end
138
139     if loc
140       # uuid is a portable_data_hash
141       collections = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s)
142       c = collections.limit(2).all
143       if c.size == 1
144         visited[loc.to_s] = c[0]
145       elsif c.size > 1
146         name = collections.limit(1).where("name <> ''").first
147         if name
148           visited[loc.to_s] = {
149             portable_data_hash: c[0].portable_data_hash,
150             name: "#{name.name} + #{collections.count-1} more"
151           }
152         else
153           visited[loc.to_s] = {
154             portable_data_hash: c[0].portable_data_hash,
155             name: loc.to_s
156           }
157         end
158       end
159
160       if direction == :search_up
161         # Search upstream for jobs where this locator is the output of some job
162         if !Rails.configuration.API.DisabledAPIs["jobs.list"]
163           Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
164             search_edges(visited, job.uuid, :search_up)
165           end
166
167           Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
168             search_edges(visited, job.uuid, :search_up)
169           end
170         end
171
172         Container.readable_by(*@read_users).where(output: loc.to_s).each do |c|
173           search_edges(visited, c.uuid, :search_up)
174         end
175
176         Container.readable_by(*@read_users).where(log: loc.to_s).each do |c|
177           search_edges(visited, c.uuid, :search_up)
178         end
179       elsif direction == :search_down
180         if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
181           # Special case, don't follow the empty collection.
182           return
183         end
184
185         # Search downstream for jobs where this locator is in script_parameters
186         if !Rails.configuration.API.DisabledAPIs["jobs.list"]
187           Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
188             search_edges(visited, job.uuid, :search_down)
189           end
190
191           Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
192             search_edges(visited, job.uuid, :search_down)
193           end
194         end
195
196         Container.readable_by(*@read_users).where([Container.full_text_trgm + " like ?", "%#{loc.to_s}%"]).each do |c|
197           if c.output != loc.to_s && c.log != loc.to_s
198             search_edges(visited, c.uuid, :search_down)
199           end
200         end
201       end
202     else
203       # uuid is a regular Arvados UUID
204       rsc = ArvadosModel::resource_class_for_uuid uuid
205       if rsc == Job
206         Job.readable_by(*@read_users).where(uuid: uuid).each do |job|
207           visited[uuid] = job.as_api_response
208           if direction == :search_up
209             # Follow upstream collections referenced in the script parameters
210             find_collections(visited, job) do |hash, col_uuid|
211               search_edges(visited, hash, :search_up) if hash
212               search_edges(visited, col_uuid, :search_up) if col_uuid
213             end
214           elsif direction == :search_down
215             # Follow downstream job output
216             search_edges(visited, job.output, direction)
217           end
218         end
219       elsif rsc == Container
220         c = Container.readable_by(*@read_users).where(uuid: uuid).limit(1).first
221         if c
222           visited[uuid] = c.as_api_response
223           if direction == :search_up
224             # Follow upstream collections referenced in the script parameters
225             find_collections(visited, c, ignore_columns=["log", "output"]) do |hash, col_uuid|
226               search_edges(visited, hash, :search_up) if hash
227               search_edges(visited, col_uuid, :search_up) if col_uuid
228             end
229           elsif direction == :search_down
230             # Follow downstream job output
231             search_edges(visited, c.output, :search_down)
232           end
233         end
234       elsif rsc == ContainerRequest
235         c = ContainerRequest.readable_by(*@read_users).where(uuid: uuid).limit(1).first
236         if c
237           visited[uuid] = c.as_api_response
238           if direction == :search_up
239             # Follow upstream collections
240             find_collections(visited, c, ignore_columns=["log_uuid", "output_uuid"]) do |hash, col_uuid|
241               search_edges(visited, hash, :search_up) if hash
242               search_edges(visited, col_uuid, :search_up) if col_uuid
243             end
244           elsif direction == :search_down
245             # Follow downstream job output
246             search_edges(visited, c.output_uuid, :search_down)
247           end
248         end
249       elsif rsc == Collection
250         c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
251         if c
252           if direction == :search_up
253             visited[c.uuid] = c.as_api_response
254
255             if !Rails.configuration.API.DisabledAPIs["jobs.list"]
256               Job.readable_by(*@read_users).where(output: c.portable_data_hash).each do |job|
257                 search_edges(visited, job.uuid, :search_up)
258               end
259
260               Job.readable_by(*@read_users).where(log: c.portable_data_hash).each do |job|
261                 search_edges(visited, job.uuid, :search_up)
262               end
263             end
264
265             ContainerRequest.readable_by(*@read_users).where(output_uuid: uuid).each do |cr|
266               search_edges(visited, cr.uuid, :search_up)
267             end
268
269             ContainerRequest.readable_by(*@read_users).where(log_uuid: uuid).each do |cr|
270               search_edges(visited, cr.uuid, :search_up)
271             end
272           elsif direction == :search_down
273             search_edges(visited, c.portable_data_hash, :search_down)
274           end
275         end
276       elsif rsc != nil
277         rsc.where(uuid: uuid).each do |r|
278           visited[uuid] = r.as_api_response
279         end
280       end
281     end
282
283     if direction == :search_up
284       # Search for provenance links pointing to the current uuid
285       Link.readable_by(*@read_users).
286         where(head_uuid: uuid, link_class: "provenance").
287         each do |link|
288         visited[link.uuid] = link.as_api_response
289         search_edges(visited, link.tail_uuid, direction)
290       end
291     elsif direction == :search_down
292       # Search for provenance links emanating from the current uuid
293       Link.readable_by(current_user).
294         where(tail_uuid: uuid, link_class: "provenance").
295         each do |link|
296         visited[link.uuid] = link.as_api_response
297         search_edges(visited, link.head_uuid, direction)
298       end
299     end
300   end
301
302   def provenance
303     visited = {}
304     if @object[:uuid]
305       search_edges(visited, @object[:uuid], :search_up)
306     else
307       search_edges(visited, @object[:portable_data_hash], :search_up)
308     end
309     send_json visited
310   end
311
312   def used_by
313     visited = {}
314     if @object[:uuid]
315       search_edges(visited, @object[:uuid], :search_down)
316     else
317       search_edges(visited, @object[:portable_data_hash], :search_down)
318     end
319     send_json visited
320   end
321
322   protected
323
324   def load_limit_offset_order_params *args
325     super
326     if action_name == 'index'
327       # Omit manifest_text and unsigned_manifest_text from index results unless expressly selected.
328       @select ||= model_class.selectable_attributes - ["manifest_text", "unsigned_manifest_text"]
329     end
330   end
331 end