if params["tab_pane"] == "Provenance_graph"
@prov_svg = ProvenanceHelper::create_provenance_graph(@object.provenance, "provenance_svg",
{:request => request,
- :direction => :top_down,
+ :direction => "RL",
:combine_jobs => :script_only}) rescue nil
end
if params["tab_pane"] == "Used_by"
@used_by_svg = ProvenanceHelper::create_provenance_graph(@object.used_by, "used_by_svg",
{:request => request,
- :direction => :top_down,
+ :direction => "LR",
:combine_jobs => :script_only,
:pdata_only => true}) rescue nil
end
def generate_provenance(cr)
return if params['tab_pane'] != "Provenance"
- nodes = {cr[:uuid] => cr}
+ nodes = {}
child_crs = []
col_uuids = []
col_pdhs = []
end
end
- output_cols = {} # Indexed by UUID
- input_cols = {} # Indexed by PDH
+ if nodes.length == 0
+ nodes[cr[:uuid]] = cr
+ end
+
+ pdh_to_col = {} # Indexed by PDH
output_pdhs = []
# Batch requests to get all related collections
# First fetch output collections by UUID.
Collection.filter([['uuid', 'in', col_uuids.uniq]]).each do |c|
- output_cols[c[:uuid]] = c
output_pdhs << c[:portable_data_hash]
+ pdh_to_col[c[:portable_data_hash]] = c
+ nodes[c[:uuid]] = c
end
- # Then, get only input collections by PDH. There could be more than one collection
- # per PDH: the number of collections is used on the collection node label.
+ # Next, get input collections by PDH.
Collection.filter(
[['portable_data_hash', 'in', col_pdhs - output_pdhs]]).each do |c|
- if input_cols[c[:portable_data_hash]]
- input_cols[c[:portable_data_hash]] << c
- else
- input_cols[c[:portable_data_hash]] = [c]
- end
+ nodes[c[:portable_data_hash]] = c
end
@svg = ProvenanceHelper::create_provenance_graph(
nodes, "provenance_svg",
{
:request => request,
- :direction => :top_down,
- :output_collections => output_cols,
- :input_collections => input_cols,
- :cr_children_of => {
- cr[:uuid] => child_crs.select{|child| child[:uuid]},
- },
- })
+ :pdh_to_uuid => pdh_to_col,
+ }
+ )
end
def show_pane_list
@svg = ProvenanceHelper::create_provenance_graph nodes, "provenance_svg", {
:request => request,
- :direction => :top_down,
:all_script_parameters => true,
:script_version_nodes => true}
end
if provenance
@prov_svg = ProvenanceHelper::create_provenance_graph provenance, "provenance_svg", {
:request => request,
- :direction => :top_down,
:all_script_parameters => true,
:combine_jobs => :script_and_version,
:pips => pips,
gr
end
+ def cr_edges cont, edge_opts={}
+ uuid = cont[:uuid]
+ gr = ""
+
+ gr += describe_node(cont[:uuid], {href: {controller: 'container_requests',
+ id: cont[:uuid]},
+ shape: 'oval',
+ label: cont[:name]})
+
+ ProvenanceHelper::find_collections cont[:mounts] do |collection_hash, collection_uuid, key|
+ if @opts[:pdh_to_uuid] and @opts[:pdh_to_uuid][collection_hash]
+ collection_uuid = @opts[:pdh_to_uuid][collection_hash].uuid
+ collection_hash = nil
+ end
+ if collection_uuid and @pdata[collection_uuid]
+ gr += describe_node(collection_uuid)
+ gr += edge(collection_uuid, uuid, {:label => key})
+ elsif collection_hash and @pdata[collection_hash]
+ gr += describe_node(collection_hash)
+ gr += edge(collection_hash, uuid, {:label => key})
+ end
+ end
+
+ if cont[:container_image] and !@opts[:no_docker] and @pdata[cont[:container_image]]
+ gr += describe_node(cont[:container_image], {label: cont[:container_image]})
+ gr += edge(cont[:container_image], uuid, {label: "docker_image"})
+ end
+
+ if cont[:output_uuid] and !edge_opts[:no_output] and @pdata[cont[:output_uuid]]
+ gr += describe_node(cont[:output_uuid])
+ gr += edge(uuid, cont[:output_uuid], {label: "output" })
+ end
+
+ if cont[:log_uuid] and !edge_opts[:no_log] and @pdata[cont[:log_uuid]]
+ gr += describe_node(cont[:log_uuid])
+ gr += edge(uuid, cont[:log_uuid], {label: "log"})
+ end
+
+ gr
+ end
+
+ def container_edges cont, edge_opts={}
+ uuid = cont[:uuid]
+ gr = ""
+
+ gr += describe_node(cont[:uuid], {href: {controller: 'containers',
+ id: cont[:uuid]},
+ shape: 'oval'})
+
+ ProvenanceHelper::find_collections cont[:mounts] do |collection_hash, collection_uuid, key|
+ if collection_uuid and @pdata[collection_uuid]
+ gr += describe_node(collection_uuid)
+ gr += edge(collection_uuid, uuid, {:label => key})
+ elsif collection_hash and @pdata[collection_hash]
+ gr += describe_node(collection_hash)
+ gr += edge(collection_hash, uuid, {:label => key})
+ end
+ end
+
+ if cont[:container_image] and !@opts[:no_docker] and @pdata[cont[:container_image]]
+ gr += describe_node(cont[:container_image], {label: cont[:container_image]})
+ gr += edge(cont[:container_image], uuid, {label: "docker_image"})
+ end
+
+ if cont[:output] and !edge_opts[:no_output] and @pdata[cont[:output]]
+ gr += describe_node(cont[:output])
+ gr += edge(uuid, cont[:output], {label: "output" })
+ end
+
+ if cont[:log] and !edge_opts[:no_log] and @pdata[cont[:log]]
+ gr += describe_node(cont[:log])
+ gr += edge(uuid, cont[:log], {label: "log"})
+ end
+
+ gr
+ end
+
def generate_provenance_edges(uuid)
gr = ""
m = GenerateGraph::collection_uuid(uuid)
gr += job_edges job if job
elsif rsc == ContainerRequest
cr = @pdata[uuid]
- if cr
- gr += describe_node(cr[:uuid], {href: {controller: 'container_requests',
- id: cr[:uuid]},
- label: cr[:name],
- shape: 'oval'})
- # Connect child CRs
- children = @opts[:cr_children_of].andand[cr[:uuid]]
- if children
- children.each do |child|
- gr += edge(child[:uuid], cr[:uuid], {label: 'child'})
- end
- end
- # Output collection node
- if cr[:output_uuid] and @opts[:output_collections][cr[:output_uuid]]
- c = @opts[:output_collections][cr[:output_uuid]]
- gr += describe_node(c[:portable_data_hash],
- {
- label: c[:name],
- col_uuid: c[:uuid],
- })
- gr += edge(cr[:uuid],
- c[:portable_data_hash],
- {label: 'output'})
- end
- # Input collection nodes
- output_pdhs = @opts[:output_collections].values.collect{|oc|
- oc[:portable_data_hash]}
- ProvenanceHelper::cr_input_pdhs(cr).each do |pdh|
- if not output_pdhs.include?(pdh)
- # Search for collections on the same project first
- cols = @opts[:input_collections][pdh].andand.select{|ic|
- ic[:owner_uuid] == cr[:owner_uuid]}
- if not cols or cols.empty?
- # Search for any collection with this PDH
- cols = @opts[:input_collections][pdh]
- end
- if cols
- names = cols.collect{|x| x[:name]}.uniq
- else
- names = ['(collection not found)']
- end
- input_name = names.first
- if names.length > 1
- input_name += " + #{names.length - 1} more"
- end
- gr += describe_node(pdh, {label: input_name})
- end
- gr += edge(pdh, cr[:uuid], {label: 'input'})
- end
- end
+ gr += cr_edges cr if cr
+ elsif rsc == Container
+ cr = @pdata[uuid]
+ gr += container_edges cr if cr
end
end
node [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
"""
-
- if opts[:direction] == :bottom_up
- gr += "edge [dir=back];"
+ if ["LR", "RL"].include? opts[:direction]
+ gr += "rankdir=#{opts[:direction]};"
end
begin
|_. Argument |_. Type |_. Description |_. Location |_. Example |
{background:#ccffcc}.|uuid|string|The UUID of the Collection to untrash.|path||
|ensure_unique_name|boolean (default false)|Rename collection uniquely if untrashing it would fail with a unique name conflict.|query||
+
+
+h3. provenance
+
+Returns a list of objects in the database that directly or indirectly contributed to producing this collection, such as the container request that produced this collection as output.
+
+The general algorithm is:
+
+# Visit the container request that produced this collection (via @output_uuid@ or @log_uuid@ attributes of the container request)
+# Visit the input collections to that container request (via @mounts@ and @container_image@ of the container request)
+# Iterate until there are no more objects to visit
+
+Arguments:
+
+table(table table-bordered table-condensed).
+|_. Argument |_. Type |_. Description |_. Location |_. Example |
+{background:#ccffcc}.|uuid|string|The UUID of the Collection to get provenance.|path||
+
+h3. used_by
+
+Returns a list of objects in the database this collection directly or indirectly contributed to, such as containers that takes this collection as input.
+
+The general algorithm is:
+
+# Visit containers that take this collection as input (via @mounts@ or @container_image@ of the container)
+# Visit collections produced by those containers (via @output@ or @log@ of the container)
+# Iterate until there are no more objects to visit
+
+Arguments:
+
+table(table table-bordered table-condensed).
+|_. Argument |_. Type |_. Description |_. Location |_. Example |
+{background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path||
end
- def find_collections(visited, sp, &b)
+ def find_collections(visited, sp, ignore_columns=[], &b)
case sp
when ArvadosModel
sp.class.columns.each do |c|
- find_collections(visited, sp[c.name.to_sym], &b) if c.name != "log"
+ find_collections(visited, sp[c.name.to_sym], &b) if !ignore_columns.include?(c.name)
end
when Hash
sp.each do |k, v|
return if visited[loc.to_s]
end
- logger.debug "visiting #{uuid}"
-
if loc
# uuid is a portable_data_hash
collections = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s)
if direction == :search_up
# Search upstream for jobs where this locator is the output of some job
- Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
- search_edges(visited, job.uuid, :search_up)
+ if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+ Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+
+ Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+ end
+
+ Container.readable_by(*@read_users).where(output: loc.to_s).each do |c|
+ search_edges(visited, c.uuid, :search_up)
end
- Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
- search_edges(visited, job.uuid, :search_up)
+ Container.readable_by(*@read_users).where(log: loc.to_s).each do |c|
+ search_edges(visited, c.uuid, :search_up)
end
elsif direction == :search_down
if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
end
# Search downstream for jobs where this locator is in script_parameters
- Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
- search_edges(visited, job.uuid, :search_down)
+ if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+ Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
+ search_edges(visited, job.uuid, :search_down)
+ end
+
+ Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
+ search_edges(visited, job.uuid, :search_down)
+ end
end
- Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
- search_edges(visited, job.uuid, :search_down)
+ Container.readable_by(*@read_users).where([Container.full_text_trgm + " like ?", "%#{loc.to_s}%"]).each do |c|
+ if c.output != loc.to_s && c.log != loc.to_s
+ search_edges(visited, c.uuid, :search_down)
+ end
end
end
else
search_edges(visited, job.output, direction)
end
end
+ elsif rsc == Container
+ c = Container.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ visited[uuid] = c.as_api_response
+ if direction == :search_up
+ # Follow upstream collections referenced in the script parameters
+ find_collections(visited, c, ignore_columns=["log", "output"]) do |hash, col_uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, col_uuid, :search_up) if col_uuid
+ end
+ elsif direction == :search_down
+ # Follow downstream job output
+ search_edges(visited, c.output, :search_down)
+ end
+ end
+ elsif rsc == ContainerRequest
+ c = ContainerRequest.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ visited[uuid] = c.as_api_response
+ if direction == :search_up
+ # Follow upstream collections
+ find_collections(visited, c, ignore_columns=["log_uuid", "output_uuid"]) do |hash, col_uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, col_uuid, :search_up) if col_uuid
+ end
+ elsif direction == :search_down
+ # Follow downstream job output
+ search_edges(visited, c.output_uuid, :search_down)
+ end
+ end
elsif rsc == Collection
- if c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
- search_edges(visited, c.portable_data_hash, direction)
- visited[c.portable_data_hash] = c.as_api_response
+ c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ if direction == :search_up
+ visited[c.uuid] = c.as_api_response
+
+ if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+ Job.readable_by(*@read_users).where(output: c.portable_data_hash).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+
+ Job.readable_by(*@read_users).where(log: c.portable_data_hash).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+ end
+
+ ContainerRequest.readable_by(*@read_users).where(output_uuid: uuid).each do |cr|
+ search_edges(visited, cr.uuid, :search_up)
+ end
+
+ ContainerRequest.readable_by(*@read_users).where(log_uuid: uuid).each do |cr|
+ search_edges(visited, cr.uuid, :search_up)
+ end
+ elsif direction == :search_down
+ search_edges(visited, c.portable_data_hash, :search_down)
+ end
end
elsif rsc != nil
rsc.where(uuid: uuid).each do |r|
def provenance
visited = {}
- search_edges(visited, @object[:portable_data_hash], :search_up)
- search_edges(visited, @object[:uuid], :search_up)
+ if @object[:uuid]
+ search_edges(visited, @object[:uuid], :search_up)
+ else
+ search_edges(visited, @object[:portable_data_hash], :search_up)
+ end
send_json visited
end
def used_by
visited = {}
- search_edges(visited, @object[:uuid], :search_down)
- search_edges(visited, @object[:portable_data_hash], :search_down)
+ if @object[:uuid]
+ search_edges(visited, @object[:uuid], :search_down)
+ else
+ search_edges(visited, @object[:portable_data_hash], :search_down)
+ end
send_json visited
end