Merge branch '15422-collections-prov-doc' refs #15422
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 16 Jul 2019 20:06:03 +0000 (16:06 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 16 Jul 2019 20:06:03 +0000 (16:06 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

apps/workbench/app/controllers/collections_controller.rb
apps/workbench/app/controllers/container_requests_controller.rb
apps/workbench/app/controllers/jobs_controller.rb
apps/workbench/app/controllers/pipeline_instances_controller.rb
apps/workbench/app/helpers/provenance_helper.rb
doc/api/methods/collections.html.textile.liquid
services/api/app/controllers/arvados/v1/collections_controller.rb

index 5141012443c33f9dc98492cf41535219d0f6a2a4..de20b8858ac6c5857eed076c507680ee478fab9c 100644 (file)
@@ -176,7 +176,7 @@ class CollectionsController < ApplicationController
     if params["tab_pane"] == "Provenance_graph"
       @prov_svg = ProvenanceHelper::create_provenance_graph(@object.provenance, "provenance_svg",
                                                             {:request => request,
-                                                             :direction => :top_down,
+                                                             :direction => "RL",
                                                              :combine_jobs => :script_only}) rescue nil
     end
 
@@ -217,7 +217,7 @@ class CollectionsController < ApplicationController
         if params["tab_pane"] == "Used_by"
           @used_by_svg = ProvenanceHelper::create_provenance_graph(@object.used_by, "used_by_svg",
                                                                    {:request => request,
-                                                                    :direction => :top_down,
+                                                                    :direction => "LR",
                                                                     :combine_jobs => :script_only,
                                                                     :pdata_only => true}) rescue nil
         end
index 385d9dc6d7292c8cc5d47cf18b94ecf5189b8354..587819dd442c2c1fbe9a8261b491dfc41cc83742 100644 (file)
@@ -11,7 +11,7 @@ class ContainerRequestsController < ApplicationController
   def generate_provenance(cr)
     return if params['tab_pane'] != "Provenance"
 
-    nodes = {cr[:uuid] => cr}
+    nodes = {}
     child_crs = []
     col_uuids = []
     col_pdhs = []
@@ -29,38 +29,33 @@ class ContainerRequestsController < ApplicationController
       end
     end
 
-    output_cols = {} # Indexed by UUID
-    input_cols = {} # Indexed by PDH
+    if nodes.length == 0
+      nodes[cr[:uuid]] = cr
+    end
+
+    pdh_to_col = {} # Indexed by PDH
     output_pdhs = []
 
     # Batch requests to get all related collections
     # First fetch output collections by UUID.
     Collection.filter([['uuid', 'in', col_uuids.uniq]]).each do |c|
-      output_cols[c[:uuid]] = c
       output_pdhs << c[:portable_data_hash]
+      pdh_to_col[c[:portable_data_hash]] = c
+      nodes[c[:uuid]] = c
     end
-    # Then, get only input collections by PDH. There could be more than one collection
-    # per PDH: the number of collections is used on the collection node label.
+    # Next, get input collections by PDH.
     Collection.filter(
       [['portable_data_hash', 'in', col_pdhs - output_pdhs]]).each do |c|
-      if input_cols[c[:portable_data_hash]]
-        input_cols[c[:portable_data_hash]] << c
-      else
-        input_cols[c[:portable_data_hash]] = [c]
-      end
+      nodes[c[:portable_data_hash]] = c
     end
 
     @svg = ProvenanceHelper::create_provenance_graph(
       nodes, "provenance_svg",
       {
         :request => request,
-        :direction => :top_down,
-        :output_collections => output_cols,
-        :input_collections => input_cols,
-        :cr_children_of => {
-          cr[:uuid] => child_crs.select{|child| child[:uuid]},
-        },
-      })
+        :pdh_to_uuid => pdh_to_col,
+      }
+    )
   end
 
   def show_pane_list
index 4f7bfcee53e5f11acceefc5c2a0d1707287143c1..bac1530d39a23fecf70077fe1e696c749a38b64b 100644 (file)
@@ -34,7 +34,6 @@ class JobsController < ApplicationController
 
     @svg = ProvenanceHelper::create_provenance_graph nodes, "provenance_svg", {
       :request => request,
-      :direction => :top_down,
       :all_script_parameters => true,
       :script_version_nodes => true}
   end
index c8863653a1b8f30433ce77d2570bd831cc7cfbd7..6431057ddf308855b73d9a582daa348f5c020a6e 100644 (file)
@@ -192,7 +192,6 @@ class PipelineInstancesController < ApplicationController
     if provenance
       @prov_svg = ProvenanceHelper::create_provenance_graph provenance, "provenance_svg", {
         :request => request,
-        :direction => :top_down,
         :all_script_parameters => true,
         :combine_jobs => :script_and_version,
         :pips => pips,
index 75261adbdaa55d6516ef92ab7e00015453864969..cef5cc7ee816db5e906537e6da643b33223466f7 100644 (file)
@@ -151,6 +151,83 @@ module ProvenanceHelper
       gr
     end
 
+    def cr_edges cont, edge_opts={}
+      uuid = cont[:uuid]
+      gr = ""
+
+      gr += describe_node(cont[:uuid], {href: {controller: 'container_requests',
+                                             id: cont[:uuid]},
+                                        shape: 'oval',
+                                        label: cont[:name]})
+
+      ProvenanceHelper::find_collections cont[:mounts] do |collection_hash, collection_uuid, key|
+        if @opts[:pdh_to_uuid] and @opts[:pdh_to_uuid][collection_hash]
+          collection_uuid = @opts[:pdh_to_uuid][collection_hash].uuid
+          collection_hash = nil
+        end
+        if collection_uuid and @pdata[collection_uuid]
+          gr += describe_node(collection_uuid)
+          gr += edge(collection_uuid, uuid, {:label => key})
+        elsif collection_hash and @pdata[collection_hash]
+          gr += describe_node(collection_hash)
+          gr += edge(collection_hash, uuid, {:label => key})
+        end
+      end
+
+      if cont[:container_image] and !@opts[:no_docker] and @pdata[cont[:container_image]]
+        gr += describe_node(cont[:container_image], {label: cont[:container_image]})
+        gr += edge(cont[:container_image], uuid, {label: "docker_image"})
+      end
+
+      if cont[:output_uuid] and !edge_opts[:no_output] and @pdata[cont[:output_uuid]]
+        gr += describe_node(cont[:output_uuid])
+        gr += edge(uuid, cont[:output_uuid], {label: "output" })
+      end
+
+      if cont[:log_uuid] and !edge_opts[:no_log] and @pdata[cont[:log_uuid]]
+        gr += describe_node(cont[:log_uuid])
+        gr += edge(uuid, cont[:log_uuid], {label: "log"})
+      end
+
+      gr
+    end
+
+    def container_edges cont, edge_opts={}
+      uuid = cont[:uuid]
+      gr = ""
+
+      gr += describe_node(cont[:uuid], {href: {controller: 'containers',
+                                             id: cont[:uuid]},
+                                      shape: 'oval'})
+
+      ProvenanceHelper::find_collections cont[:mounts] do |collection_hash, collection_uuid, key|
+        if collection_uuid and @pdata[collection_uuid]
+          gr += describe_node(collection_uuid)
+          gr += edge(collection_uuid, uuid, {:label => key})
+        elsif collection_hash and @pdata[collection_hash]
+          gr += describe_node(collection_hash)
+          gr += edge(collection_hash, uuid, {:label => key})
+        end
+      end
+
+      if cont[:container_image] and !@opts[:no_docker] and @pdata[cont[:container_image]]
+        gr += describe_node(cont[:container_image], {label: cont[:container_image]})
+        gr += edge(cont[:container_image], uuid, {label: "docker_image"})
+      end
+
+      if cont[:output] and !edge_opts[:no_output] and @pdata[cont[:output]]
+        gr += describe_node(cont[:output])
+        gr += edge(uuid, cont[:output], {label: "output" })
+      end
+
+      if cont[:log] and !edge_opts[:no_log] and @pdata[cont[:log]]
+        gr += describe_node(cont[:log])
+        gr += edge(uuid, cont[:log], {label: "log"})
+      end
+
+      gr
+    end
+
     def generate_provenance_edges(uuid)
       gr = ""
       m = GenerateGraph::collection_uuid(uuid)
@@ -196,56 +273,10 @@ module ProvenanceHelper
           gr += job_edges job if job
         elsif rsc == ContainerRequest
           cr = @pdata[uuid]
-          if cr
-            gr += describe_node(cr[:uuid], {href: {controller: 'container_requests',
-                                                   id: cr[:uuid]},
-                                            label: cr[:name],
-                                            shape: 'oval'})
-            # Connect child CRs
-            children = @opts[:cr_children_of].andand[cr[:uuid]]
-            if children
-              children.each do |child|
-                gr += edge(child[:uuid], cr[:uuid], {label: 'child'})
-              end
-            end
-            # Output collection node
-            if cr[:output_uuid] and @opts[:output_collections][cr[:output_uuid]]
-              c = @opts[:output_collections][cr[:output_uuid]]
-              gr += describe_node(c[:portable_data_hash],
-                                  {
-                                    label: c[:name],
-                                    col_uuid: c[:uuid],
-                                  })
-              gr += edge(cr[:uuid],
-                         c[:portable_data_hash],
-                         {label: 'output'})
-            end
-            # Input collection nodes
-            output_pdhs = @opts[:output_collections].values.collect{|oc|
-              oc[:portable_data_hash]}
-            ProvenanceHelper::cr_input_pdhs(cr).each do |pdh|
-              if not output_pdhs.include?(pdh)
-                # Search for collections on the same project first
-                cols = @opts[:input_collections][pdh].andand.select{|ic|
-                  ic[:owner_uuid] == cr[:owner_uuid]}
-                if not cols or cols.empty?
-                  # Search for any collection with this PDH
-                  cols = @opts[:input_collections][pdh]
-                end
-                if cols
-                  names = cols.collect{|x| x[:name]}.uniq
-                else
-                  names = ['(collection not found)']
-                end
-                input_name = names.first
-                if names.length > 1
-                  input_name += " + #{names.length - 1} more"
-                end
-                gr += describe_node(pdh, {label: input_name})
-              end
-              gr += edge(pdh, cr[:uuid], {label: 'input'})
-            end
-          end
+          gr += cr_edges cr if cr
+        elsif rsc == Container
+          cr = @pdata[uuid]
+          gr += container_edges cr if cr
         end
       end
 
@@ -320,9 +351,8 @@ module ProvenanceHelper
 node [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
 edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
 """
-
-    if opts[:direction] == :bottom_up
-      gr += "edge [dir=back];"
+    if ["LR", "RL"].include? opts[:direction]
+      gr += "rankdir=#{opts[:direction]};"
     end
 
     begin
index d611c5b1613ce4ba93c48039c6c335457c5584e4..9f60e2ff1699d50415dfbc73397418c855a72daa 100644 (file)
@@ -124,3 +124,36 @@ table(table table-bordered table-condensed).
 |_. Argument |_. Type |_. Description |_. Location |_. Example |
 {background:#ccffcc}.|uuid|string|The UUID of the Collection to untrash.|path||
 |ensure_unique_name|boolean (default false)|Rename collection uniquely if untrashing it would fail with a unique name conflict.|query||
+
+
+h3. provenance
+
+Returns a list of objects in the database that directly or indirectly contributed to producing this collection, such as the container request that produced this collection as output.
+
+The general algorithm is:
+
+# Visit the container request that produced this collection (via @output_uuid@ or @log_uuid@ attributes of the container request)
+# Visit the input collections to that container request (via @mounts@ and @container_image@ of the container request)
+# Iterate until there are no more objects to visit
+
+Arguments:
+
+table(table table-bordered table-condensed).
+|_. Argument |_. Type |_. Description |_. Location |_. Example |
+{background:#ccffcc}.|uuid|string|The UUID of the Collection to get provenance.|path||
+
+h3. used_by
+
+Returns a list of objects in the database this collection directly or indirectly contributed to, such as containers that takes this collection as input.
+
+The general algorithm is:
+
+# Visit containers that take this collection as input (via @mounts@ or @container_image@ of the container)
+# Visit collections produced by those containers (via @output@ or @log@ of the container)
+# Iterate until there are no more objects to visit
+
+Arguments:
+
+table(table table-bordered table-condensed).
+|_. Argument |_. Type |_. Description |_. Location |_. Example |
+{background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path||
index c771fcea796872541b59921badb5966601b5069c..a8ef4b91b4b018f863c0a156e784e05622995893 100644 (file)
@@ -96,11 +96,11 @@ class Arvados::V1::CollectionsController < ApplicationController
   end
 
 
-  def find_collections(visited, sp, &b)
+  def find_collections(visited, sp, ignore_columns=[], &b)
     case sp
     when ArvadosModel
       sp.class.columns.each do |c|
-        find_collections(visited, sp[c.name.to_sym], &b) if c.name != "log"
+        find_collections(visited, sp[c.name.to_sym], &b) if !ignore_columns.include?(c.name)
       end
     when Hash
       sp.each do |k, v|
@@ -129,8 +129,6 @@ class Arvados::V1::CollectionsController < ApplicationController
       return if visited[loc.to_s]
     end
 
-    logger.debug "visiting #{uuid}"
-
     if loc
       # uuid is a portable_data_hash
       collections = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s)
@@ -154,12 +152,22 @@ class Arvados::V1::CollectionsController < ApplicationController
 
       if direction == :search_up
         # Search upstream for jobs where this locator is the output of some job
-        Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
-          search_edges(visited, job.uuid, :search_up)
+        if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+          Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
+            search_edges(visited, job.uuid, :search_up)
+          end
+
+          Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
+            search_edges(visited, job.uuid, :search_up)
+          end
+        end
+
+        Container.readable_by(*@read_users).where(output: loc.to_s).each do |c|
+          search_edges(visited, c.uuid, :search_up)
         end
 
-        Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
-          search_edges(visited, job.uuid, :search_up)
+        Container.readable_by(*@read_users).where(log: loc.to_s).each do |c|
+          search_edges(visited, c.uuid, :search_up)
         end
       elsif direction == :search_down
         if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
@@ -168,12 +176,20 @@ class Arvados::V1::CollectionsController < ApplicationController
         end
 
         # Search downstream for jobs where this locator is in script_parameters
-        Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
-          search_edges(visited, job.uuid, :search_down)
+        if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+          Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
+            search_edges(visited, job.uuid, :search_down)
+          end
+
+          Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
+            search_edges(visited, job.uuid, :search_down)
+          end
         end
 
-        Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
-          search_edges(visited, job.uuid, :search_down)
+        Container.readable_by(*@read_users).where([Container.full_text_trgm + " like ?", "%#{loc.to_s}%"]).each do |c|
+          if c.output != loc.to_s && c.log != loc.to_s
+            search_edges(visited, c.uuid, :search_down)
+          end
         end
       end
     else
@@ -193,10 +209,62 @@ class Arvados::V1::CollectionsController < ApplicationController
             search_edges(visited, job.output, direction)
           end
         end
+      elsif rsc == Container
+        c = Container.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          visited[uuid] = c.as_api_response
+          if direction == :search_up
+            # Follow upstream collections referenced in the script parameters
+            find_collections(visited, c, ignore_columns=["log", "output"]) do |hash, col_uuid|
+              search_edges(visited, hash, :search_up) if hash
+              search_edges(visited, col_uuid, :search_up) if col_uuid
+            end
+          elsif direction == :search_down
+            # Follow downstream job output
+            search_edges(visited, c.output, :search_down)
+          end
+        end
+      elsif rsc == ContainerRequest
+        c = ContainerRequest.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          visited[uuid] = c.as_api_response
+          if direction == :search_up
+            # Follow upstream collections
+            find_collections(visited, c, ignore_columns=["log_uuid", "output_uuid"]) do |hash, col_uuid|
+              search_edges(visited, hash, :search_up) if hash
+              search_edges(visited, col_uuid, :search_up) if col_uuid
+            end
+          elsif direction == :search_down
+            # Follow downstream job output
+            search_edges(visited, c.output_uuid, :search_down)
+          end
+        end
       elsif rsc == Collection
-        if c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
-          search_edges(visited, c.portable_data_hash, direction)
-          visited[c.portable_data_hash] = c.as_api_response
+        c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          if direction == :search_up
+            visited[c.uuid] = c.as_api_response
+
+            if !Rails.configuration.API.DisabledAPIs.include?("jobs.list")
+              Job.readable_by(*@read_users).where(output: c.portable_data_hash).each do |job|
+                search_edges(visited, job.uuid, :search_up)
+              end
+
+              Job.readable_by(*@read_users).where(log: c.portable_data_hash).each do |job|
+                search_edges(visited, job.uuid, :search_up)
+              end
+            end
+
+            ContainerRequest.readable_by(*@read_users).where(output_uuid: uuid).each do |cr|
+              search_edges(visited, cr.uuid, :search_up)
+            end
+
+            ContainerRequest.readable_by(*@read_users).where(log_uuid: uuid).each do |cr|
+              search_edges(visited, cr.uuid, :search_up)
+            end
+          elsif direction == :search_down
+            search_edges(visited, c.portable_data_hash, :search_down)
+          end
         end
       elsif rsc != nil
         rsc.where(uuid: uuid).each do |r|
@@ -226,15 +294,21 @@ class Arvados::V1::CollectionsController < ApplicationController
 
   def provenance
     visited = {}
-    search_edges(visited, @object[:portable_data_hash], :search_up)
-    search_edges(visited, @object[:uuid], :search_up)
+    if @object[:uuid]
+      search_edges(visited, @object[:uuid], :search_up)
+    else
+      search_edges(visited, @object[:portable_data_hash], :search_up)
+    end
     send_json visited
   end
 
   def used_by
     visited = {}
-    search_edges(visited, @object[:uuid], :search_down)
-    search_edges(visited, @object[:portable_data_hash], :search_down)
+    if @object[:uuid]
+      search_edges(visited, @object[:uuid], :search_down)
+    else
+      search_edges(visited, @object[:portable_data_hash], :search_down)
+    end
     send_json visited
   end