Color-coded graphical pipeline comparison, alternate approach to Tom's
[arvados.git] / apps / workbench / app / helpers / provenance_helper.rb
1 module ProvenanceHelper
2
3   class GenerateGraph
4     def initialize(pdata, opts)
5       @pdata = pdata
6       @opts = opts
7       @visited = {}
8       @jobs = {}
9     end
10
11     def self.collection_uuid(uuid)
12       m = /^([a-f0-9]{32}(\+[0-9]+)?)(\+.*)?$/.match(uuid.to_s)
13       if m
14         #if m[2]
15         return m[1]
16         #else
17         #  Collection.where(uuid: ['contains', m[1]]).each do |u|
18         #    puts "fixup #{uuid} to #{u.uuid}"
19         #    return u.uuid
20         #  end
21         #end
22       else
23         nil
24       end
25     end
26
27     def determine_fillcolor(n)
28       bgcolor = ""
29       case n
30       when 1
31         bgcolor = "style=filled,fillcolor=\"#88ff88\""
32       when 2
33         bgcolor = "style=filled,fillcolor=\"#8888ff\""
34       when 3
35         bgcolor = "style=filled,fillcolor=\"#88ffff\""
36       end
37       bgcolor
38     end
39
40     def describe_node(uuid)
41       bgcolor = determine_fillcolor @opts[:pips][uuid] if @opts[:pips]
42
43       rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
44       if rsc
45         href = "/#{rsc.to_s.underscore.pluralize rsc}/#{uuid}"
46       
47         #"\"#{uuid}\" [label=\"#{rsc}\\n#{uuid}\",href=\"#{href}\"];\n"
48         if rsc == Collection
49           if @pdata[uuid] 
50             #puts @pdata[uuid]
51             if @pdata[uuid][:name]
52               return "\"#{uuid}\" [label=\"#{@pdata[uuid][:name]}\",href=\"#{href}\",shape=oval,#{bgcolor}];\n"
53             else
54               files = nil
55               if @pdata[uuid].respond_to? :files
56                 files = @pdata[uuid].files
57               elsif @pdata[uuid][:files]
58                 files = @pdata[uuid][:files]
59               end
60               
61               if files
62                 i = 0
63                 label = ""
64                 while i < 3 and i < files.length
65                   label += "\\n" unless label == ""
66                   label += files[i][1]
67                   i += 1
68                 end
69                 if i < files.length
70                   label += "\\n&vellip;"
71                 end
72                 return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor}];\n"
73               end
74             end  
75           end
76           return "\"#{uuid}\" [label=\"#{rsc}\",href=\"#{href}\",#{bgcolor}];\n"
77         end
78       end
79       "\"#{uuid}\" [#{bgcolor}];\n"
80     end
81
82     def job_uuid(job)
83       if @opts[:combine_jobs] == :script_only
84         uuid = "#{job[:script]}"
85       elsif @opts[:combine_jobs] == :script_and_version
86         uuid = "#{job[:script]}_#{job[:script_version]}"
87       else
88         uuid = "#{job[:uuid]}"
89       end
90
91       @jobs[uuid] = [] unless @jobs[uuid]
92       @jobs[uuid] << job unless @jobs[uuid].include? job
93
94       uuid
95     end
96
97     def edge(tail, head, extra)
98       if @opts[:direction] == :bottom_up
99         gr = "\"#{tail}\" -> \"#{head}\""
100       else
101         gr = "\"#{head}\" -> \"#{tail}\""
102       end
103       if extra.length > 0
104         gr += "["
105         extra.each do |k, v|
106           gr += "#{k}=\"#{v}\","
107         end
108         gr += "]"
109       end
110       gr += ";\n"
111       gr
112     end
113
114     def script_param_edges(job, prefix, sp)
115       gr = ""
116       if sp and not sp.empty?
117         case sp
118         when Hash
119           sp.each do |k, v|
120             if prefix.size > 0
121               k = prefix + "::" + k.to_s
122             end
123             gr += script_param_edges(job, k.to_s, v)
124           end
125         when Array
126           i = 0
127           node = ""
128           sp.each do |v|
129             if GenerateGraph::collection_uuid(v)
130               gr += script_param_edges(job, "#{prefix}[#{i}]", v)
131             else
132               node += "', '" unless node == ""
133               node = "['" if node == ""
134               node += "#{v}"
135             end
136             i += 1
137           end
138           unless node == ""
139             node += "']"
140             #puts node
141             #id = "#{job[:uuid]}_#{prefix}"
142             gr += "\"#{node}\" [label=\"#{node}\"];\n"
143             gr += edge(job_uuid(job), node, {:label => prefix})        
144           end
145         else
146           m = GenerateGraph::collection_uuid(sp)
147           if m
148             gr += edge(job_uuid(job), m, {:label => prefix})
149             gr += generate_provenance_edges(m)
150           elsif @opts[:all_script_parameters]
151             #id = "#{job[:uuid]}_#{prefix}"
152             gr += "\"#{sp}\" [label=\"#{sp}\"];\n"
153             gr += edge(job_uuid(job), sp, {:label => prefix})
154           end
155         end
156       end
157       gr
158     end
159
160     def generate_provenance_edges(uuid)
161       gr = ""
162       m = GenerateGraph::collection_uuid(uuid)
163       uuid = m if m
164
165       uuid = uuid.intern if uuid
166
167       if (not uuid) or uuid.empty? or @visited[uuid]
168
169         #puts "already @visited #{uuid}"
170         return ""
171       end
172
173       if not @pdata[uuid] then 
174         return describe_node(uuid)
175       else
176         @visited[uuid] = true
177       end
178
179       #puts "visiting #{uuid}"
180
181       if m  
182         # uuid is a collection
183         gr += describe_node(uuid)
184
185         @pdata.each do |k, job|
186           if job[:output] == uuid.to_s
187             gr += edge(uuid, job_uuid(job), {:label => "output"})
188             gr += generate_provenance_edges(job[:uuid])
189           end
190           if job[:log] == uuid.to_s
191             gr += edge(uuid, job_uuid(job), {:label => "log"})
192             gr += generate_provenance_edges(job[:uuid])
193           end
194         end
195       else
196         # uuid is something else
197         rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
198
199         if rsc == Job
200           job = @pdata[uuid]
201           if job
202             gr += script_param_edges(job, "", job[:script_parameters])
203
204             if @opts[:script_version_nodes]
205               gr += describe_node(job[:script_version])
206               gr += edge(job_uuid(job), job[:script_version], {:label => "script_version"})
207             end
208           end
209         else
210           gr += describe_node(uuid)
211         end
212       end
213
214       @pdata.each do |k, link|
215         if link[:head_uuid] == uuid.to_s and link[:link_class] == "provenance"
216           gr += describe_node(link[:tail_uuid])
217           gr += edge(link[:head_uuid], link[:tail_uuid], {:label => link[:name], :href => "/links/#{link[:uuid]}"}) 
218           gr += generate_provenance_edges(link[:tail_uuid])
219         end
220       end
221
222       #puts "finished #{uuid}"
223
224       gr
225     end
226
227     def describe_jobs
228       gr = ""
229       @jobs.each do |k, v|
230         gr += "\"#{k}\" [href=\"/jobs?"
231         
232         n = 0
233         v.each do |u|
234           gr += "uuid%5b%5d=#{u[:uuid]}&"
235           n |= @opts[:pips][u[:uuid].intern] if @opts[:pips] and @opts[:pips][u[:uuid].intern]
236         end
237
238         gr += "\",label=\""
239         
240         if @opts[:combine_jobs] == :script_only
241           gr += uuid = "#{v[0][:script]}"
242         elsif @opts[:combine_jobs] == :script_and_version
243           gr += uuid = "#{v[0][:script]}"
244         else
245           gr += uuid = "#{v[0][:script]}\\n#{v[0][:finished_at]}"
246         end
247         gr += "\",#{determine_fillcolor n}];\n"
248       end
249       gr
250     end
251
252   end
253
254   def self.create_provenance_graph(pdata, opts={})
255     if pdata.is_a? Array or pdata.is_a? ArvadosResourceList
256       p2 = {}
257       pdata.each do |k|
258         p2[k[:uuid].intern] = k if k[:uuid]
259       end
260       pdata = p2
261     end
262
263     unless pdata.is_a? Hash
264       raise "create_provenance_graph accepts Array or Hash for pdata only, pdata is #{pdata.class}"
265     end
266     
267     gr = """strict digraph {
268 node [fontsize=8,shape=box];
269 edge [fontsize=8];
270 """
271
272     if opts[:direction] == :bottom_up
273       gr += "edge [dir=back];"
274     end
275
276     #puts "@pdata is #{pdata}"
277
278     g = GenerateGraph.new(pdata, opts)
279
280     pdata.each do |k, v|
281       gr += g.generate_provenance_edges(k)
282     end
283
284     gr += g.describe_jobs
285
286     gr += "}"
287     svg = ""
288
289     #puts gr
290
291     require 'open3'
292
293     Open3.popen2("dot", "-Tsvg") do |stdin, stdout, wait_thr|
294       stdin.print(gr)
295       stdin.close
296       svg = stdout.read()
297       wait_thr.value
298       stdout.close()
299     end
300
301     svg = svg.sub(/<\?xml.*?\?>/m, "")
302     svg = svg.sub(/<!DOCTYPE.*?>/m, "")
303   end
304
305   def self.find_collections(sp)
306     c = []
307     if sp and not sp.empty?
308       case sp
309       when Hash
310         sp.each do |k, v|
311           c.concat(find_collections(v))
312         end
313       when Array
314         sp.each do |v|
315           c.concat(find_collections(v))
316         end
317       else
318         m = GenerateGraph::collection_uuid(sp)
319         if m
320           c << m
321         end
322       end
323     end
324     c
325   end
326 end