1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
5 module ProvenanceHelper
8 def initialize(pdata, opts)
16 def self.collection_uuid(uuid)
17 Keep::Locator.parse(uuid).andand.strip_hints.andand.to_s
21 p = { :host => @opts[:request].host,
22 :port => @opts[:request].port,
23 :protocol => @opts[:request].protocol }
25 Rails.application.routes.url_helpers.url_for (p)
28 def determine_fillcolor(n)
29 fillcolor = %w(666666 669966 666699 666666 996666)[n || 0] || '666666'
30 "style=\"filled\",color=\"#ffffff\",fillcolor=\"##{fillcolor}\",fontcolor=\"#ffffff\""
33 def describe_node(uuid, describe_opts={})
34 bgcolor = determine_fillcolor (describe_opts[:pip] || @opts[:pips].andand[uuid])
36 rsc = ArvadosBase::resource_class_for_uuid uuid
38 if GenerateGraph::collection_uuid(uuid) || rsc == Collection
39 if Collection.is_empty_blob_locator? uuid.to_s
41 return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
44 if describe_opts[:col_uuid]
45 href = url_for ({:controller => Collection.to_s.tableize,
47 :id => describe_opts[:col_uuid].to_s })
49 href = url_for ({:controller => Collection.to_s.tableize,
54 return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || (@pdata[uuid] and @pdata[uuid][:name]) || uuid)}\",shape=box,href=\"#{href}\",#{bgcolor}];\n"
57 if describe_opts[:href]
58 href = ",href=\"#{url_for ({:controller => describe_opts[:href][:controller],
60 :id => describe_opts[:href][:id] })}\""
62 return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || uuid)}\",#{bgcolor},shape=#{describe_opts[:shape] || 'box'}#{href}];\n"
67 d = Digest::MD5.hexdigest(job[:script_parameters].to_json)
68 if @opts[:combine_jobs] == :script_only
69 uuid = "#{job[:script]}_#{d}"
70 elsif @opts[:combine_jobs] == :script_and_version
71 uuid = "#{job[:script]}_#{job[:script_version]}_#{d}"
73 uuid = "#{job[:uuid]}"
76 @jobs[uuid] = [] unless @jobs[uuid]
77 @jobs[uuid] << job unless @jobs[uuid].include? job
82 def edge(tail, head, extra)
83 if @opts[:direction] == :bottom_up
84 gr = "\"#{encode_quotes head}\" -> \"#{encode_quotes tail}\""
86 gr = "\"#{encode_quotes tail}\" -> \"#{encode_quotes head}\""
92 gr += "#{k}=\"#{encode_quotes v}\","
100 def script_param_edges(uuid, sp)
104 if @opts[:all_script_parameters]
105 if v.is_a? Array or v.is_a? Hash
106 encv = JSON.pretty_generate(v).gsub("\n", "\\l") + "\\l"
110 gr += "\"#{encode_quotes encv}\" [shape=box];\n"
111 gr += edge(encv, uuid, {:label => k})
117 def job_edges job, edge_opts={}
121 ProvenanceHelper::find_collections job[:script_parameters] do |collection_hash, collection_uuid, key|
123 gr += describe_node(collection_uuid)
124 gr += edge(collection_uuid, uuid, {:label => key})
126 gr += describe_node(collection_hash)
127 gr += edge(collection_hash, uuid, {:label => key})
131 if job[:docker_image_locator] and !@opts[:no_docker]
132 gr += describe_node(job[:docker_image_locator], {label: (job[:runtime_constraints].andand[:docker_image] || job[:docker_image_locator])})
133 gr += edge(job[:docker_image_locator], uuid, {label: "docker_image"})
136 if @opts[:script_version_nodes]
137 gr += describe_node(job[:script_version], {:label => "git:#{job[:script_version]}"})
138 gr += edge(job[:script_version], uuid, {:label => "script_version"})
141 if job[:output] and !edge_opts[:no_output]
142 gr += describe_node(job[:output])
143 gr += edge(uuid, job[:output], {label: "output" })
146 if job[:log] and !edge_opts[:no_log]
147 gr += describe_node(job[:log])
148 gr += edge(uuid, job[:log], {label: "log"})
154 def cr_edges cr, edge_opts={}
157 gr += describe_node(cr[:uuid], {href: {controller: 'container_requests',
162 children = @opts[:cr_children_of].andand[cr[:uuid]]
164 children.each do |child|
165 gr += edge(child[:uuid], cr[:uuid], {label: 'child'})
168 # Output collection node
169 if cr[:output_uuid] and @opts[:output_collections][cr[:output_uuid]]
170 c = @opts[:output_collections][cr[:output_uuid]]
171 gr += describe_node(c[:portable_data_hash],
176 gr += edge(cr[:uuid],
177 c[:portable_data_hash],
180 # Input collection nodes
181 output_pdhs = @opts[:output_collections].values.collect{|oc|
182 oc[:portable_data_hash]}
183 ProvenanceHelper::cr_input_pdhs(cr).each do |pdh|
184 if not output_pdhs.include?(pdh)
185 # Search for collections on the same project first
186 cols = @opts[:input_collections][pdh].andand.select{|ic|
187 ic[:owner_uuid] == cr[:owner_uuid]}
188 if not cols or cols.empty?
189 # Search for any collection with this PDH
190 cols = @opts[:input_collections][pdh]
193 names = cols.collect{|x| x[:name]}.uniq
195 names = ['(collection not found)']
197 input_name = names.first
199 input_name += " + #{names.length - 1} more"
201 gr += describe_node(pdh, {label: input_name})
203 gr += edge(pdh, cr[:uuid], {label: 'input'})
209 def container_edges cont, edge_opts={}
213 gr += describe_node(cont[:uuid], {href: {controller: 'containers',
217 ProvenanceHelper::find_collections cont[:mounts] do |collection_hash, collection_uuid, key|
218 if collection_uuid and @pdata[collection_uuid]
219 gr += describe_node(collection_uuid)
220 gr += edge(collection_uuid, uuid, {:label => key})
221 elsif collection_hash and @pdata[collection_hash]
222 gr += describe_node(collection_hash)
223 gr += edge(collection_hash, uuid, {:label => key})
227 if cont[:container_image] and !@opts[:no_docker] and @pdata[cont[:container_image]]
228 gr += describe_node(cont[:container_image], {label: cont[:container_image]})
229 gr += edge(cont[:container_image], uuid, {label: "docker_image"})
232 if cont[:output] and !edge_opts[:no_output] and @pdata[cont[:output]]
233 gr += describe_node(cont[:output])
234 gr += edge(uuid, cont[:output], {label: "output" })
237 if cont[:log] and !edge_opts[:no_log] and @pdata[cont[:log]]
238 gr += describe_node(cont[:log])
239 gr += edge(uuid, cont[:log], {label: "log"})
245 def generate_provenance_edges(uuid)
247 m = GenerateGraph::collection_uuid(uuid)
250 if uuid.nil? or uuid.empty? or @visited[uuid]
257 @visited[uuid] = true
260 if uuid.start_with? "component_"
261 # Pipeline component inputs
262 job = @pdata[@pdata[uuid][:job].andand[:uuid]]
265 gr += describe_node(job_uuid(job), {label: uuid[38..-1], pip: @opts[:pips].andand[job[:uuid]], shape: "oval",
266 href: {controller: 'jobs', id: job[:uuid]}})
267 gr += job_edges job, {no_output: true, no_log: true}
270 # Pipeline component output
271 outuuid = @pdata[uuid][:output_uuid]
273 outcollection = @pdata[outuuid]
275 gr += edge(job_uuid(job), outcollection[:portable_data_hash], {label: "output"})
276 gr += describe_node(outcollection[:portable_data_hash], {label: outcollection[:name]})
278 elsif job and job[:output]
279 gr += describe_node(job[:output])
280 gr += edge(job_uuid(job), job[:output], {label: "output" })
283 rsc = ArvadosBase::resource_class_for_uuid uuid
287 gr += job_edges job if job
288 elsif rsc == ContainerRequest
290 gr += cr_edges cr if cr
291 elsif rsc == Container
293 gr += container_edges cr if cr
297 @pdata.each do |k, link|
298 if link[:head_uuid] == uuid.to_s and link[:link_class] == "provenance"
299 href = url_for ({:controller => Link.to_s.tableize,
301 :id => link[:uuid] })
303 gr += describe_node(link[:tail_uuid])
304 gr += edge(link[:head_uuid], link[:tail_uuid], {:label => link[:name], :href => href})
305 gr += generate_provenance_edges(link[:tail_uuid])
315 href = url_for ({:controller => Job.to_s.tableize,
318 gr += "\"#{k}\" [href=\"#{href}?"
322 gr += ";" unless gr.end_with? "?"
323 gr += "uuid%5b%5d=#{u[:uuid]}"
324 n |= @opts[:pips][u[:uuid]] if @opts[:pips] and @opts[:pips][u[:uuid]]
329 label = "#{v[0][:script]}"
331 if label == "run-command" and v[0][:script_parameters][:command].is_a? Array
332 label = v[0][:script_parameters][:command].join(' ')
335 if not @opts[:combine_jobs]
336 label += "\\n#{v[0][:finished_at]}"
339 gr += encode_quotes label
341 gr += "\",#{determine_fillcolor n}];\n"
346 def encode_quotes value
347 value.to_s.gsub("\"", "\\\"").gsub("\n", "\\n")
351 def self.create_provenance_graph(pdata, svgId, opts={})
352 if pdata.is_a? Array or pdata.is_a? ArvadosResourceList
355 p2[k[:uuid]] = k if k[:uuid]
360 unless pdata.is_a? Hash
361 raise "create_provenance_graph accepts Array or Hash for pdata only, pdata is #{pdata.class}"
364 gr = """strict digraph {
365 node [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
366 edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
370 if opts[:direction] == :bottom_up
371 gr += "edge [dir=back];"
375 pdata = pdata.stringify_keys
377 g = GenerateGraph.new(pdata, opts)
380 if !opts[:only_components] or k.start_with? "component_"
381 gr += g.generate_provenance_edges(k)
383 #gr += describe_node(k)
387 if !opts[:only_components]
388 gr += g.describe_jobs
392 Rails.logger.warn "#{e.inspect}"
393 Rails.logger.warn "#{e.backtrace.join("\n\t")}"
402 Open3.popen2("dot", "-Tsvg") do |stdin, stdout, wait_thr|
410 svg = svg.sub(/<\?xml.*?\?>/m, "")
411 svg = svg.sub(/<!DOCTYPE.*?>/m, "")
412 svg = svg.sub(/<svg /, "<svg id=\"#{svgId}\" ")
416 # Position indicates whether it is a content hash or arvados uuid.
417 # One will hold a value, the other will always be nil.
418 def self.find_collections(sp, key=nil, &b)
421 sp.class.columns.each do |c|
422 find_collections(sp[c.name.to_sym], nil, &b)
426 find_collections(v, key || k, &b)
430 find_collections(v, key, &b)
433 if m = /[a-f0-9]{32}\+\d+/.match(sp)
435 elsif m = /[0-9a-z]{5}-4zz18-[0-9a-z]{15}/.match(sp)
441 def self.cr_input_pdhs cr
443 input_obj = cr[:mounts].andand[:"/var/lib/cwl/cwl.input.json"].andand[:content] || cr[:mounts]
445 find_collections input_obj do |col_hash, col_uuid, key|