1 module ProvenanceHelper
4 def initialize(pdata, opts)
12 def self.collection_uuid(uuid)
13 Keep::Locator.parse(uuid).andand.strip_hints.andand.to_s
17 p = { :host => @opts[:request].host,
18 :port => @opts[:request].port,
19 :protocol => @opts[:request].protocol }
21 Rails.application.routes.url_helpers.url_for (p)
24 def determine_fillcolor(n)
25 fillcolor = %w(666666 669966 666699 666666 996666)[n || 0] || '666666'
26 "style=\"filled\",color=\"#ffffff\",fillcolor=\"##{fillcolor}\",fontcolor=\"#ffffff\""
29 def describe_node(uuid, describe_opts={})
30 bgcolor = determine_fillcolor (describe_opts[:pip] || @opts[:pips].andand[uuid])
32 rsc = ArvadosBase::resource_class_for_uuid uuid
34 if GenerateGraph::collection_uuid(uuid) || rsc == Collection
35 if Collection.is_empty_blob_locator? uuid.to_s
37 return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
40 href = url_for ({:controller => Collection.to_s.tableize,
44 return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || (@pdata[uuid] and @pdata[uuid][:name]) || uuid)}\",shape=box,href=\"#{href}\",#{bgcolor}];\n"
47 if describe_opts[:href]
48 href = ",href=\"#{url_for ({:controller => describe_opts[:href][:controller],
50 :id => describe_opts[:href][:id] })}\""
52 return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || uuid)}\",#{bgcolor},shape=#{describe_opts[:shape] || 'box'}#{href}];\n"
57 d = Digest::MD5.hexdigest(job[:script_parameters].to_json)
58 if @opts[:combine_jobs] == :script_only
59 uuid = "#{job[:script]}_#{d}"
60 elsif @opts[:combine_jobs] == :script_and_version
61 uuid = "#{job[:script]}_#{job[:script_version]}_#{d}"
63 uuid = "#{job[:uuid]}"
66 @jobs[uuid] = [] unless @jobs[uuid]
67 @jobs[uuid] << job unless @jobs[uuid].include? job
72 def edge(tail, head, extra)
73 if @opts[:direction] == :bottom_up
74 gr = "\"#{encode_quotes head}\" -> \"#{encode_quotes tail}\""
76 gr = "\"#{encode_quotes tail}\" -> \"#{encode_quotes head}\""
82 gr += "#{k}=\"#{encode_quotes v}\","
90 def script_param_edges(uuid, sp)
94 if @opts[:all_script_parameters]
95 if v.is_a? Array or v.is_a? Hash
96 encv = JSON.pretty_generate(v).gsub("\n", "\\l") + "\\l"
100 gr += "\"#{encode_quotes encv}\" [shape=box];\n"
101 gr += edge(encv, uuid, {:label => k})
109 input_obj = cr[:mounts].andand[:"/var/lib/cwl/cwl.input.json"].andand[:content] || cr[:mounts]
111 ProvenanceHelper::find_collections input_obj do |col_hash, col_uuid, key|
120 def job_edges job, edge_opts={}
124 ProvenanceHelper::find_collections job[:script_parameters] do |collection_hash, collection_uuid, key|
126 gr += describe_node(collection_uuid)
127 gr += edge(collection_uuid, uuid, {:label => key})
129 gr += describe_node(collection_hash)
130 gr += edge(collection_hash, uuid, {:label => key})
134 if job[:docker_image_locator] and !@opts[:no_docker]
135 gr += describe_node(job[:docker_image_locator], {label: (job[:runtime_constraints].andand[:docker_image] || job[:docker_image_locator])})
136 gr += edge(job[:docker_image_locator], uuid, {label: "docker_image"})
139 if @opts[:script_version_nodes]
140 gr += describe_node(job[:script_version], {:label => "git:#{job[:script_version]}"})
141 gr += edge(job[:script_version], uuid, {:label => "script_version"})
144 if job[:output] and !edge_opts[:no_output]
145 gr += describe_node(job[:output])
146 gr += edge(uuid, job[:output], {label: "output" })
149 if job[:log] and !edge_opts[:no_log]
150 gr += describe_node(job[:log])
151 gr += edge(uuid, job[:log], {label: "log"})
157 def generate_provenance_edges(uuid)
159 m = GenerateGraph::collection_uuid(uuid)
162 if uuid.nil? or uuid.empty? or @visited[uuid]
169 @visited[uuid] = true
172 if uuid.start_with? "component_"
173 # Pipeline component inputs
174 job = @pdata[@pdata[uuid][:job].andand[:uuid]]
177 gr += describe_node(job_uuid(job), {label: uuid[38..-1], pip: @opts[:pips].andand[job[:uuid]], shape: "oval",
178 href: {controller: 'jobs', id: job[:uuid]}})
179 gr += job_edges job, {no_output: true, no_log: true}
182 # Pipeline component output
183 outuuid = @pdata[uuid][:output_uuid]
185 outcollection = @pdata[outuuid]
187 gr += edge(job_uuid(job), outcollection[:portable_data_hash], {label: "output"})
188 gr += describe_node(outcollection[:portable_data_hash], {label: outcollection[:name]})
190 elsif job and job[:output]
191 gr += describe_node(job[:output])
192 gr += edge(job_uuid(job), job[:output], {label: "output" })
195 rsc = ArvadosBase::resource_class_for_uuid uuid
199 gr += job_edges job if job
200 elsif rsc == ContainerRequest
206 col_uuids << cr[:output_uuid] if cr[:output_uuid]
207 col_pdhs += cr_input_pdhs(cr)
208 # Search for child CRs
209 if cr[:container_uuid]
210 child_crs = ContainerRequest.where(requesting_container_uuid: cr[:container_uuid])
211 child_crs.each do |child|
212 col_uuids << child[:output_uuid] if child[:output_uuid]
213 col_pdhs += cr_input_pdhs(child)
217 output_cols = {} # Indexed by UUID
218 input_cols = {} # Indexed by PDH
220 # Batch requests to get all related collections
221 Collection.filter([['uuid', 'in', col_uuids.uniq]]).each do |c|
222 output_cols[c[:uuid]] = c
224 output_pdhs = output_cols.values.map{|c| c[:portable_data_hash]}.uniq
225 Collection.filter([['portable_data_hash', 'in', col_pdhs - output_pdhs]]).each do |c|
226 if input_cols[c[:portable_data_hash]]
227 input_cols[c[:portable_data_hash]] << c
229 input_cols[c[:portable_data_hash]] = [c]
235 all_cr_nodes = [cr] + child_crs.results
237 # First pass: add the CR nodes with their outputs, because they're
238 # referenced by UUID.
239 all_cr_nodes.each do |cr_node|
241 gr += describe_node(cr_node[:uuid], {href: {controller: 'container_requests',
243 label: cr_node[:name],
245 # Connect child CRs with the main one
247 gr += edge(cr_node[:uuid], cr[:uuid], {label: 'child'})
249 # Output collection node
250 if cr_node[:output_uuid] and output_cols[cr_node[:output_uuid]]
251 c = output_cols[cr_node[:output_uuid]]
252 visited_pdhs << c[:portable_data_hash]
253 gr += describe_node(c[:portable_data_hash], {label: c[:name]})
254 gr += edge(cr_node[:uuid], c[:portable_data_hash], {label: 'output'})
258 # Second pass: add the input collection nodes.
259 all_cr_nodes.each do |cr_node|
260 cr_input_pdhs(cr_node).each do |pdh|
261 if not visited_pdhs.include?(pdh)
264 # First search for collections within the CR project
265 cols = input_cols[pdh].select{|x| x[:owner_uuid] == cr_node[:owner_uuid]}
267 # Search for any collection with this PDH
268 cols = input_cols[pdh]
270 names = cols.collect{|x| x[:name]}.uniq
271 input_name = names.first
273 input_name += " + #{names.length - 1} others"
276 # No collection found by this PDH
279 gr += describe_node(pdh, {label: input_name})
281 gr += edge(pdh, cr_node[:uuid], {label: 'input'})
288 @pdata.each do |k, link|
289 if link[:head_uuid] == uuid.to_s and link[:link_class] == "provenance"
290 href = url_for ({:controller => Link.to_s.tableize,
292 :id => link[:uuid] })
294 gr += describe_node(link[:tail_uuid])
295 gr += edge(link[:head_uuid], link[:tail_uuid], {:label => link[:name], :href => href})
296 gr += generate_provenance_edges(link[:tail_uuid])
306 href = url_for ({:controller => Job.to_s.tableize,
309 gr += "\"#{k}\" [href=\"#{href}?"
313 gr += ";" unless gr.end_with? "?"
314 gr += "uuid%5b%5d=#{u[:uuid]}"
315 n |= @opts[:pips][u[:uuid]] if @opts[:pips] and @opts[:pips][u[:uuid]]
320 label = "#{v[0][:script]}"
322 if label == "run-command" and v[0][:script_parameters][:command].is_a? Array
323 label = v[0][:script_parameters][:command].join(' ')
326 if not @opts[:combine_jobs]
327 label += "\\n#{v[0][:finished_at]}"
330 gr += encode_quotes label
332 gr += "\",#{determine_fillcolor n}];\n"
337 def encode_quotes value
338 value.to_s.gsub("\"", "\\\"").gsub("\n", "\\n")
342 def self.create_provenance_graph(pdata, svgId, opts={})
343 if pdata.is_a? Array or pdata.is_a? ArvadosResourceList
346 p2[k[:uuid]] = k if k[:uuid]
351 unless pdata.is_a? Hash
352 raise "create_provenance_graph accepts Array or Hash for pdata only, pdata is #{pdata.class}"
355 gr = """strict digraph {
356 node [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
357 edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
360 if opts[:direction] == :bottom_up
361 gr += "edge [dir=back];"
365 pdata = pdata.stringify_keys
367 g = GenerateGraph.new(pdata, opts)
370 if !opts[:only_components] or k.start_with? "component_"
371 gr += g.generate_provenance_edges(k)
373 #gr += describe_node(k)
377 if !opts[:only_components]
378 gr += g.describe_jobs
382 Rails.logger.warn "#{e.inspect}"
383 Rails.logger.warn "#{e.backtrace.join("\n\t")}"
392 Open3.popen2("dot", "-Tsvg") do |stdin, stdout, wait_thr|
400 svg = svg.sub(/<\?xml.*?\?>/m, "")
401 svg = svg.sub(/<!DOCTYPE.*?>/m, "")
402 svg = svg.sub(/<svg /, "<svg id=\"#{svgId}\" ")
406 # Position indicates whether it is a content hash or arvados uuid.
407 # One will hold a value, the other will always be nil.
408 def self.find_collections(sp, key=nil, &b)
411 sp.class.columns.each do |c|
412 find_collections(sp[c.name.to_sym], nil, &b)
416 find_collections(v, key || k, &b)
420 find_collections(v, key, &b)
423 if m = /[a-f0-9]{32}\+\d+/.match(sp)
425 elsif m = /[0-9a-z]{5}-4zz18-[0-9a-z]{15}/.match(sp)