4031: Find for collection dependencies even in the middle of script parameter
fields. Server side collections with same PDH into a single entry with a
special name. Fix node descriptions, hyperlinks. More options to eliminate
certain node types for certain graphs to make them easier to read. Handle both
PDH and UUID when identifying collections.
diff --git a/apps/workbench/app/controllers/jobs_controller.rb b/apps/workbench/app/controllers/jobs_controller.rb
index 40f4378..00ce0a5 100644
--- a/apps/workbench/app/controllers/jobs_controller.rb
+++ b/apps/workbench/app/controllers/jobs_controller.rb
@@ -3,17 +3,29 @@ class JobsController < ApplicationController
def generate_provenance(jobs)
return if params['tab_pane'] != "Provenance"
- nodes = []
+ nodes = {}
collections = []
+ hashes = []
jobs.each do |j|
- nodes << j
- collections << j[:output]
- collections.concat(ProvenanceHelper::find_collections(j[:script_parameters]))
- nodes << {:uuid => j[:script_version]}
+ nodes[j[:uuid]] = j
+ hashes << j[:output]
+ ProvenanceHelper::find_collections(j[:script_parameters]) do |hash, uuid|
+ collections << uuid if uuid
+ hashes << hash if hash
+ end
+ nodes[j[:script_version]] = {:uuid => j[:script_version]}
Collection.where(uuid: collections).each do |c|
- nodes << c
+ nodes[c[:portable_data_hash]] = c
+ end
+ Collection.where(portable_data_hash: hashes).each do |c|
+ nodes[c[:portable_data_hash]] = c
+ end
+ nodes.each do |n|
+ puts "\n#{n.inspect}"
@svg = ProvenanceHelper::create_provenance_graph nodes, "provenance_svg", {
diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb
index ffa0f5f..c94037b 100644
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -172,7 +172,9 @@ class PipelineInstancesController < ApplicationController
:all_script_parameters => true,
:combine_jobs => :script_and_version,
:pips => pips,
- :only_components => true}
+ :only_components => true,
+ :no_docker => true,
+ :no_log => true}
diff --git a/apps/workbench/app/helpers/provenance_helper.rb b/apps/workbench/app/helpers/provenance_helper.rb
index bd34b48..89d9ee6 100644
--- a/apps/workbench/app/helpers/provenance_helper.rb
+++ b/apps/workbench/app/helpers/provenance_helper.rb
@@ -29,7 +29,9 @@ module ProvenanceHelper
def describe_node(uuid, describe_opts={})
bgcolor = determine_fillcolor (describe_opts[:pip] || @opts[:pips].andand[uuid])
- if GenerateGraph::collection_uuid(uuid)
+ rsc = ArvadosBase::resource_class_for_uuid uuid
+ if GenerateGraph::collection_uuid(uuid) || rsc == Collection
if Collection.is_empty_blob_locator? uuid.to_s
# special case
return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
@@ -39,9 +41,15 @@ module ProvenanceHelper
:action => :show,
:id => uuid.to_s })
- return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || @pdata[uuid][:name] || uuid)}\",shape=box,href=\"#{href}\",#{bgcolor}];\n"
+ return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || (@pdata[uuid] and @pdata[uuid][:name]) || uuid)}\",shape=box,href=\"#{href}\",#{bgcolor}];\n"
- "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || uuid)}\",#{bgcolor},shape=#{describe_opts[:shape] || 'box'}];\n"
+ href = ""
+ if describe_opts[:href]
+ href = ",href=\"#{url_for ({:controller => describe_opts[:href][:controller],
+ :action => :show,
+ :id => describe_opts[:href][:id] })}\""
+ end
+ return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || uuid)}\",#{bgcolor},shape=#{describe_opts[:shape] || 'box'}#{href}];\n"
@@ -63,15 +71,15 @@ module ProvenanceHelper
def edge(tail, head, extra)
if @opts[:direction] == :bottom_up
- gr = "\"#{head}\" -> \"#{tail}\""
+ gr = "\"#{encode_quotes head}\" -> \"#{encode_quotes tail}\""
- gr = "\"#{tail}\" -> \"#{head}\""
+ gr = "\"#{encode_quotes tail}\" -> \"#{encode_quotes head}\""
if extra.length > 0
gr += " ["
extra.each do |k, v|
- gr += "#{k}=\"#{v}\","
+ gr += "#{k}=\"#{encode_quotes v}\","
gr += "]"
@@ -79,50 +87,18 @@ module ProvenanceHelper
- def script_param_edges(uuid, prefix, sp)
+ def script_param_edges(uuid, sp)
gr = ""
- case sp
- when Hash
- sp.each do |k, v|
- if prefix.size > 0
- k = prefix + "::" + k.to_s
- end
- gr += script_param_edges(uuid, k.to_s, v)
- end
- when Array
- i = 0
- node = ""
- count = 0
- sp.each do |v|
- if GenerateGraph::collection_uuid(v)
- gr += script_param_edges(uuid, "#{prefix}[#{i}]", v)
- elsif @opts[:all_script_parameters]
- t = "#{v}"
- nl = (if (count+t.length) > 60 then "\\n" else " " end)
- count = 0 if (count+t.length) > 60
- node += "',#{nl}'" unless node == ""
- node = "['" if node == ""
- node += t
- count += t.length
+ sp.each do |k, v|
+ if @opts[:all_script_parameters]
+ if v.is_a? Array or v.is_a? Hash
+ encv = JSON.pretty_generate(v).gsub("\n", "\\l") + "\\l"
+ else
+ encv = v.to_json
- i += 1
- end
- unless node == ""
- node += "']"
- node_value = encode_quotes node
- gr += "\"#{node_value}\" [label=\"#{node_value}\"];\n"
- gr += edge(uuid, node_value, {:label => prefix})
- end
- when String
- return '' if sp.empty?
- m = GenerateGraph::collection_uuid(sp)
- if m and (@pdata[m] or (not @opts[:pdata_only]))
- gr += edge(m, uuid, {:label => prefix})
- elsif @opts[:all_script_parameters]
- sp_value = encode_quotes sp
- gr += "\"#{sp_value}\" [label=\"\\\"#{sp_value}\\\"\",shape=box];\n"
- gr += edge(sp_value, uuid, {:label => prefix})
+ gr += "\"#{encode_quotes encv}\" [shape=box];\n"
+ gr += edge(encv, uuid, {:label => k})
@@ -132,19 +108,32 @@ module ProvenanceHelper
uuid = job_uuid(job)
gr = ""
- gr += script_param_edges(uuid, "", job[:script_parameters])
- if job[:docker_image_locator]
+ ProvenanceHelper::find_collections job[:script_parameters] do |collection_hash, collection_uuid, key|
+ if collection_uuid
+ gr += describe_node(collection_uuid)
+ gr += edge(collection_uuid, uuid, {:label => key})
+ else
+ gr += describe_node(collection_hash)
+ gr += edge(collection_hash, uuid, {:label => key})
+ end
+ end
+ if job[:docker_image_locator] and !@opts[:no_docker]
gr += describe_node(job[:docker_image_locator], {label: (job[:runtime_constraints].andand[:docker_image] || job[:docker_image_locator])})
- gr += edge(job[:docker_image_locator], uuid, {:label => "docker_image"})
+ gr += edge(job[:docker_image_locator], uuid, {label: "docker_image"})
if @opts[:script_version_nodes]
- #gr += describe_node(job[:script_version])
+ gr += describe_node(job[:script_version], {:label => "git:#{job[:script_version]}"})
gr += edge(job[:script_version], uuid, {:label => "script_version"})
- gr += edge(uuid, job[:output], {label: "output" }) if job[:output] and !edge_opts[:no_output]
- #gr += edge(uuid, job[:log], {label: "log"}) if job[:log] and !edge_opts[:no_log]
+ if job[:output] and !edge_opts[:no_output]
+ gr += describe_node(job[:output])
+ gr += edge(uuid, job[:output], {label: "output" })
+ end
+ gr += edge(uuid, job[:log], {label: "log"}) if job[:log] and !edge_opts[:no_log]
@@ -168,24 +157,30 @@ module ProvenanceHelper
# Pipeline component inputs
job = @pdata[@pdata[uuid][:job].andand[:uuid]]
- gr += describe_node(job_uuid(job), {label: uuid[38..-1], pip: @opts[:pips].andand[job[:uuid]], shape: "oval"})
- gr += job_edges job, {no_output: true, no_log: true}
+ if job
+ gr += describe_node(job_uuid(job), {label: uuid[38..-1], pip: @opts[:pips].andand[job[:uuid]], shape: "oval",
+ href: {controller: 'jobs', id: job[:uuid]}})
+ gr += job_edges job, {no_output: true, no_log: true}
+ end
# Pipeline component output
outuuid = @pdata[uuid][:output_uuid]
- outcollection = @pdata[outuuid]
- gr += edge(job_uuid(job), outcollection[:portable_data_hash], {label: "output"}) if outuuid
- gr += describe_node(outcollection[:portable_data_hash], {label: outcollection[:name]})
+ if outuuid
+ outcollection = @pdata[outuuid]
+ if outcollection
+ gr += edge(job_uuid(job), outcollection[:portable_data_hash], {label: "output"})
+ gr += describe_node(outcollection[:portable_data_hash], {label: outcollection[:name]})
+ end
+ elsif job and job[:output]
+ gr += describe_node(job[:output])
+ gr += edge(job_uuid(job), job[:output], {label: "output" })
+ end
rsc = ArvadosBase::resource_class_for_uuid uuid
if rsc == Job
job = @pdata[uuid]
gr += job_edges job if job
- elsif rsc == Link
- # do nothing
- else
- gr += describe_node(uuid)
@@ -221,20 +216,25 @@ module ProvenanceHelper
gr += "\",label=\""
- if @opts[:combine_jobs] == :script_only
- gr += "#{v[0][:script]}"
- elsif @opts[:combine_jobs] == :script_and_version
- gr += "#{v[0][:script]}" # Just show the name but the nodes will be distinct
- else
- gr += "#{v[0][:script]}\\n#{v[0][:finished_at]}"
+ label = "#{v[0][:script]}"
+ if label == "run-command"
+ label = v[0][:script_parameters][:command].join(' ')
+ if not @opts[:combine_jobs]
+ label += "\\n#{v[0][:finished_at]}"
+ end
+ gr += encode_quotes label
gr += "\",#{determine_fillcolor n}];\n"
def encode_quotes value
- value.andand.gsub("\"", "\\\"")
+ value.andand.to_s.gsub("\"", "\\\"").gsub("\n", "\\n")
@@ -260,18 +260,27 @@ edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
gr += "edge [dir=back];"
- g = GenerateGraph.new(pdata, opts)
+ begin
+ pdata = pdata.stringify_keys
- pdata.each do |k, v|
- if !opts[:only_components] or k.start_with? "component_"
- gr += g.generate_provenance_edges(k)
- else
- #gr += describe_node(k)
+ g = GenerateGraph.new(pdata, opts)
+ pdata.each do |k, v|
+ if !opts[:only_components] or k.start_with? "component_"
+ gr += g.generate_provenance_edges(k)
+ else
+ #gr += describe_node(k)
+ end
+ end
+ if !opts[:only_components]
+ gr += g.describe_jobs
- end
- if !opts[:only_components]
- gr += g.describe_jobs
+ rescue => e
+ Rails.logger.warn "#{e.inspect}"
+ Rails.logger.warn "#{e.backtrace.join("\n\t")}"
+ raise
gr += "}"
@@ -292,25 +301,26 @@ edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
svg = svg.sub(/<svg /, "<svg id=\"#{svgId}\" ")
- def self.find_collections(sp, &b)
+ # returns hash, uuid
+ def self.find_collections(sp, key=nil, &b)
case sp
when ArvadosBase
sp.class.columns.each do |c|
- find_collections(sp[c.name.to_sym], &b)
+ find_collections(sp[c.name.to_sym], nil, &b)
when Hash
sp.each do |k, v|
- find_collections(v, &b)
+ find_collections(v, key || k, &b)
when Array
sp.each do |v|
- find_collections(v, &b)
+ find_collections(v, key, &b)
when String
if m = /[a-f0-9]{32}\+\d+/.match(sp)
- yield m[0], nil
+ yield m[0], nil, key
elsif m = /[0-9a-z]{5}-4zz18-[0-9a-z]{15}/.match(sp)
- yield nil, m[0]
+ yield nil, m[0], key
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 1d0fc2d..dbcc046 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -39,20 +39,25 @@ class Arvados::V1::CollectionsController < ApplicationController
- def script_param_edges(visited, sp)
+ def find_collections(visited, sp, &b)
case sp
+ when ArvadosModel
+ sp.class.columns.each do |c|
+ find_collections(visited, sp[c.name.to_sym], &b) if c.name != "log"
+ end
when Hash
sp.each do |k, v|
- script_param_edges(visited, v)
+ find_collections(visited, v, &b)
when Array
sp.each do |v|
- script_param_edges(visited, v)
+ find_collections(visited, v, &b)
when String
- return if sp.empty?
- if loc = Keep::Locator.parse(sp)
- search_edges(visited, loc.to_s, :search_up)
+ if m = /[a-f0-9]{32}\+\d+/.match(sp)
+ yield m[0], nil
+ elsif m = /[0-9a-z]{5}-4zz18-[0-9a-z]{15}/.match(sp)
+ yield nil, m[0]
@@ -73,12 +78,20 @@ class Arvados::V1::CollectionsController < ApplicationController
# uuid is a portable_data_hash
c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).all
if c.size == 1
- visited[loc.to_s] = c
+ visited[loc.to_s] = c[0]
elsif c.size > 1
- visited[loc.to_s] = {
- portable_data_hash: c[0].portable_data_hash,
- name: "#{c[0].name} + #{c.size-1} more"
- }
+ named = c.select {|n| not n.name.nil? and not n.name.empty? }
+ if named.any?
+ visited[loc.to_s] = {
+ portable_data_hash: c[0].portable_data_hash,
+ name: "#{named[0].name} + #{c.size-1} more"
+ }
+ else
+ visited[loc.to_s] = {
+ portable_data_hash: c[0].portable_data_hash,
+ name: loc.to_s
+ }
+ end
if direction == :search_up
@@ -100,6 +113,10 @@ class Arvados::V1::CollectionsController < ApplicationController
Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
search_edges(visited, job.uuid, :search_down)
+ Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
+ search_edges(visited, job.uuid, :search_down)
+ end
# uuid is a regular Arvados UUID
@@ -109,7 +126,10 @@ class Arvados::V1::CollectionsController < ApplicationController
visited[uuid] = job.as_api_response
if direction == :search_up
# Follow upstream collections referenced in the script parameters
- script_param_edges(visited, job.script_parameters)
+ find_collections(visited, job) do |hash, uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, uuid, :search_up) if uuid
+ end
elsif direction == :search_down
# Follow downstream job output
search_edges(visited, job.output, direction)
@@ -148,13 +168,15 @@ class Arvados::V1::CollectionsController < ApplicationController
def provenance
visited = {}
- search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_up)
+ search_edges(visited, @object[:portable_data_hash], :search_up)
+ search_edges(visited, @object[:uuid], :search_up)
render json: visited
def used_by
visited = {}
- search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_down)
+ search_edges(visited, @object[:uuid], :search_down)
+ search_edges(visited, @object[:portable_data_hash], :search_down)
render json: visited
commit 6d23c29612aedb12e35930cf37a9c3b36839359b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Oct 27 17:04:43 2014 -0400
4031: Refresh provenance helper graph generation to fix bugs and make better
use of available human readable names such as collections and components. Added tests.
diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb
index a618d43..ffa0f5f 100644
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -87,38 +87,67 @@ class PipelineInstancesController < ApplicationController
def graph(pipelines)
return nil, nil if params['tab_pane'] != "Graph"
- count = {}
provenance = {}
pips = {}
n = 1
+ # When comparing more than one pipeline, "pips" stores bit fields that
+ # indicates which objects are part of which pipelines.
pipelines.each do |p|
collections = []
+ hashes = []
+ jobs = []
+ p[:components].each do |k, v|
+ provenance["component_#{p[:uuid]}_#{k}"] = v
+ collections << v[:output_uuid] if v[:output_uuid]
+ jobs << v[:job][:uuid] if v[:job]
+ end
+ jobs = jobs.compact.uniq
+ if jobs.any?
+ Job.where(uuid: jobs).each do |j|
+ job_uuid = j.uuid
- p.components.each do |k, v|
- j = v[:job] || next
+ provenance[job_uuid] = j
+ pips[job_uuid] = 0 unless pips[job_uuid] != nil
+ pips[job_uuid] |= n
- uuid = j[:uuid].intern
- provenance[uuid] = j
- pips[uuid] = 0 unless pips[uuid] != nil
- pips[uuid] |= n
+ hashes << j[:output] if j[:output]
+ ProvenanceHelper::find_collections(j) do |hash, uuid|
+ collections << uuid if uuid
+ hashes << hash if hash
+ end
- collections << j[:output]
- ProvenanceHelper::find_collections(j[:script_parameters]).each do |k|
- collections << k
+ if j[:script_version]
+ script_uuid = j[:script_version]
+ provenance[script_uuid] = {:uuid => script_uuid}
+ pips[script_uuid] = 0 unless pips[script_uuid] != nil
+ pips[script_uuid] |= n
+ end
+ end
- uuid = j[:script_version].intern
- provenance[uuid] = {:uuid => uuid}
- pips[uuid] = 0 unless pips[uuid] != nil
- pips[uuid] |= n
+ hashes = hashes.compact.uniq
+ if hashes.any?
+ Collection.where(portable_data_hash: hashes).each do |c|
+ hash_uuid = c.portable_data_hash
+ provenance[hash_uuid] = c
+ pips[hash_uuid] = 0 unless pips[hash_uuid] != nil
+ pips[hash_uuid] |= n
+ end
- Collection.where(uuid: collections.compact).each do |c|
- uuid = c.uuid.intern
- provenance[uuid] = c
- pips[uuid] = 0 unless pips[uuid] != nil
- pips[uuid] |= n
+ collections = collections.compact.uniq
+ if collections.any?
+ Collection.where(uuid: collections).each do |c|
+ collection_uuid = c.uuid
+ provenance[collection_uuid] = c
+ pips[collection_uuid] = 0 unless pips[collection_uuid] != nil
+ pips[collection_uuid] |= n
+ end
n = n << 1
@@ -142,8 +171,8 @@ class PipelineInstancesController < ApplicationController
:request => request,
:all_script_parameters => true,
:combine_jobs => :script_and_version,
- :script_version_nodes => true,
- :pips => pips }
+ :pips => pips,
+ :only_components => true}
diff --git a/apps/workbench/app/helpers/provenance_helper.rb b/apps/workbench/app/helpers/provenance_helper.rb
index 4faad99..bd34b48 100644
--- a/apps/workbench/app/helpers/provenance_helper.rb
+++ b/apps/workbench/app/helpers/provenance_helper.rb
@@ -10,16 +10,7 @@ module ProvenanceHelper
def self.collection_uuid(uuid)
- m = CollectionsHelper.match(uuid)
- if m
- if m[2]
- return m[1]+m[2]
- else
- return m[1]
- end
- else
- nil
- end
+ Keep::Locator.parse(uuid).andand.strip_hints.andand.to_s
def url_for u
@@ -31,59 +22,27 @@ module ProvenanceHelper
def determine_fillcolor(n)
- fillcolor = %w(aaaaaa aaffaa aaaaff aaaaaa ffaaaa)[n || 0] || 'aaaaaa'
- "style=filled,fillcolor=\"##{fillcolor}\""
+ fillcolor = %w(666666 669966 666699 666666 996666)[n || 0] || '666666'
+ "style=\"filled\",color=\"#ffffff\",fillcolor=\"##{fillcolor}\",fontcolor=\"#ffffff\""
- def describe_node(uuid)
- uuid = uuid.to_sym
- bgcolor = determine_fillcolor @opts[:pips].andand[uuid]
+ def describe_node(uuid, describe_opts={})
+ bgcolor = determine_fillcolor (describe_opts[:pip] || @opts[:pips].andand[uuid])
+ if GenerateGraph::collection_uuid(uuid)
+ if Collection.is_empty_blob_locator? uuid.to_s
+ # special case
+ return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
+ end
- rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
- if rsc
- href = url_for ({:controller => rsc.to_s.tableize,
+ href = url_for ({:controller => Collection.to_s.tableize,
:action => :show,
:id => uuid.to_s })
- #"\"#{uuid}\" [label=\"#{rsc}\\n#{uuid}\",href=\"#{href}\"];\n"
- if rsc == Collection
- if Collection.is_empty_blob_locator? uuid.to_s
- # special case
- return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
- end
- if @pdata[uuid]
- if @pdata[uuid][:name]
- return "\"#{uuid}\" [label=\"#{@pdata[uuid][:name]}\",href=\"#{href}\",shape=oval,#{bgcolor}];\n"
- else
- files = nil
- if @pdata[uuid].respond_to? :files
- files = @pdata[uuid].files
- elsif @pdata[uuid][:files]
- files = @pdata[uuid][:files]
- end
- if files
- i = 0
- label = ""
- while i < 3 and i < files.length
- label += "\\n" unless label == ""
- label += files[i][1]
- i += 1
- end
- if i < files.length
- label += "\\n⋮"
- end
- extra_s = @node_extra[uuid].andand.map { |k,v|
- "#{k}=\"#{v}\""
- }.andand.join ","
- return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor},#{extra_s}];\n"
- end
- end
- end
- end
- return "\"#{uuid}\" [label=\"#{rsc}\",href=\"#{href}\",#{bgcolor}];\n"
+ return "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || @pdata[uuid][:name] || uuid)}\",shape=box,href=\"#{href}\",#{bgcolor}];\n"
+ else
+ "\"#{uuid}\" [label=\"#{encode_quotes(describe_opts[:label] || uuid)}\",#{bgcolor},shape=#{describe_opts[:shape] || 'box'}];\n"
- "\"#{uuid}\" [#{bgcolor}];\n"
def job_uuid(job)
@@ -104,10 +63,11 @@ module ProvenanceHelper
def edge(tail, head, extra)
if @opts[:direction] == :bottom_up
- gr = "\"#{tail}\" -> \"#{head}\""
- else
gr = "\"#{head}\" -> \"#{tail}\""
+ else
+ gr = "\"#{tail}\" -> \"#{head}\""
if extra.length > 0
gr += " ["
extra.each do |k, v|
@@ -119,15 +79,16 @@ module ProvenanceHelper
- def script_param_edges(job, prefix, sp)
+ def script_param_edges(uuid, prefix, sp)
gr = ""
case sp
when Hash
sp.each do |k, v|
if prefix.size > 0
k = prefix + "::" + k.to_s
- gr += script_param_edges(job, k.to_s, v)
+ gr += script_param_edges(uuid, k.to_s, v)
when Array
i = 0
@@ -135,7 +96,7 @@ module ProvenanceHelper
count = 0
sp.each do |v|
if GenerateGraph::collection_uuid(v)
- gr += script_param_edges(job, "#{prefix}[#{i}]", v)
+ gr += script_param_edges(uuid, "#{prefix}[#{i}]", v)
elsif @opts[:all_script_parameters]
t = "#{v}"
nl = (if (count+t.length) > 60 then "\\n" else " " end)
@@ -151,70 +112,76 @@ module ProvenanceHelper
node += "']"
node_value = encode_quotes node
gr += "\"#{node_value}\" [label=\"#{node_value}\"];\n"
- gr += edge(job_uuid(job), node_value, {:label => prefix})
+ gr += edge(uuid, node_value, {:label => prefix})
when String
return '' if sp.empty?
m = GenerateGraph::collection_uuid(sp)
- if m and (@pdata[m.intern] or (not @opts[:pdata_only]))
- gr += edge(job_uuid(job), m, {:label => prefix})
- gr += generate_provenance_edges(m)
+ if m and (@pdata[m] or (not @opts[:pdata_only]))
+ gr += edge(m, uuid, {:label => prefix})
elsif @opts[:all_script_parameters]
sp_value = encode_quotes sp
- gr += "\"#{sp_value}\" [label=\"#{sp_value}\"];\n"
- gr += edge(job_uuid(job), sp_value, {:label => prefix})
+ gr += "\"#{sp_value}\" [label=\"\\\"#{sp_value}\\\"\",shape=box];\n"
+ gr += edge(sp_value, uuid, {:label => prefix})
+ def job_edges job, edge_opts={}
+ uuid = job_uuid(job)
+ gr = ""
+ gr += script_param_edges(uuid, "", job[:script_parameters])
+ if job[:docker_image_locator]
+ gr += describe_node(job[:docker_image_locator], {label: (job[:runtime_constraints].andand[:docker_image] || job[:docker_image_locator])})
+ gr += edge(job[:docker_image_locator], uuid, {:label => "docker_image"})
+ end
+ if @opts[:script_version_nodes]
+ #gr += describe_node(job[:script_version])
+ gr += edge(job[:script_version], uuid, {:label => "script_version"})
+ end
+ gr += edge(uuid, job[:output], {label: "output" }) if job[:output] and !edge_opts[:no_output]
+ #gr += edge(uuid, job[:log], {label: "log"}) if job[:log] and !edge_opts[:no_log]
+ gr
+ end
def generate_provenance_edges(uuid)
gr = ""
m = GenerateGraph::collection_uuid(uuid)
uuid = m if m
- uuid = uuid.intern if uuid
- if (not uuid) or uuid.empty? or @visited[uuid]
+ if uuid.nil? or uuid.empty? or @visited[uuid]
return ""
- if not @pdata[uuid] then
- return describe_node(uuid)
+ if @pdata[uuid].nil?
+ return ""
@visited[uuid] = true
- if m
- # uuid is a collection
- if not Collection.is_empty_blob_locator? uuid.to_s
- @pdata.each do |k, job|
- if job[:output] == uuid.to_s
- extra = { label: 'output' }
- gr += edge(uuid, job_uuid(job), extra)
- gr += generate_provenance_edges(job[:uuid])
- end
- if job[:log] == uuid.to_s
- gr += edge(uuid, job_uuid(job), {:label => "log"})
- gr += generate_provenance_edges(job[:uuid])
- end
- end
- end
- gr += describe_node(uuid)
+ if uuid.start_with? "component_"
+ # Pipeline component inputs
+ job = @pdata[@pdata[uuid][:job].andand[:uuid]]
+ gr += describe_node(job_uuid(job), {label: uuid[38..-1], pip: @opts[:pips].andand[job[:uuid]], shape: "oval"})
+ gr += job_edges job, {no_output: true, no_log: true}
+ # Pipeline component output
+ outuuid = @pdata[uuid][:output_uuid]
+ outcollection = @pdata[outuuid]
+ gr += edge(job_uuid(job), outcollection[:portable_data_hash], {label: "output"}) if outuuid
+ gr += describe_node(outcollection[:portable_data_hash], {label: outcollection[:name]})
- # uuid is something else
- rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
+ rsc = ArvadosBase::resource_class_for_uuid uuid
if rsc == Job
job = @pdata[uuid]
- if job
- gr += script_param_edges(job, "", job[:script_parameters])
- if @opts[:script_version_nodes]
- gr += describe_node(job[:script_version])
- gr += edge(job_uuid(job), job[:script_version], {:label => "script_version"})
- end
- end
+ gr += job_edges job if job
elsif rsc == Link
# do nothing
@@ -247,8 +214,9 @@ module ProvenanceHelper
n = 0
v.each do |u|
- gr += "uuid%5b%5d=#{u[:uuid]}&"
- n |= @opts[:pips][u[:uuid].intern] if @opts[:pips] and @opts[:pips][u[:uuid].intern]
+ gr += ";" unless gr.end_with? "?"
+ gr += "uuid%5b%5d=#{u[:uuid]}"
+ n |= @opts[:pips][u[:uuid]] if @opts[:pips] and @opts[:pips][u[:uuid]]
gr += "\",label=\""
@@ -274,7 +242,7 @@ module ProvenanceHelper
if pdata.is_a? Array or pdata.is_a? ArvadosResourceList
p2 = {}
pdata.each do |k|
- p2[k[:uuid].intern] = k if k[:uuid]
+ p2[k[:uuid]] = k if k[:uuid]
pdata = p2
@@ -284,8 +252,8 @@ module ProvenanceHelper
gr = """strict digraph {
-node [fontsize=10,shape=box];
-edge [fontsize=10];
+node [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
+edge [fontsize=10,fontname=\"Helvetica,Arial,sans-serif\"];
if opts[:direction] == :bottom_up
@@ -295,10 +263,16 @@ edge [fontsize=10];
g = GenerateGraph.new(pdata, opts)
pdata.each do |k, v|
- gr += g.generate_provenance_edges(k)
+ if !opts[:only_components] or k.start_with? "component_"
+ gr += g.generate_provenance_edges(k)
+ else
+ #gr += describe_node(k)
+ end
- gr += g.describe_jobs
+ if !opts[:only_components]
+ gr += g.describe_jobs
+ end
gr += "}"
svg = ""
@@ -318,25 +292,26 @@ edge [fontsize=10];
svg = svg.sub(/<svg /, "<svg id=\"#{svgId}\" ")
- def self.find_collections(sp)
- c = []
+ def self.find_collections(sp, &b)
case sp
+ when ArvadosBase
+ sp.class.columns.each do |c|
+ find_collections(sp[c.name.to_sym], &b)
+ end
when Hash
sp.each do |k, v|
- c.concat(find_collections(v))
+ find_collections(v, &b)
when Array
sp.each do |v|
- c.concat(find_collections(v))
+ find_collections(v, &b)
when String
- if !sp.empty?
- m = GenerateGraph::collection_uuid(sp)
- if m
- c << m
- end
+ if m = /[a-f0-9]{32}\+\d+/.match(sp)
+ yield m[0], nil
+ elsif m = /[0-9a-z]{5}-4zz18-[0-9a-z]{15}/.match(sp)
+ yield nil, m[0]
- c
diff --git a/apps/workbench/test/controllers/pipeline_instances_controller_test.rb b/apps/workbench/test/controllers/pipeline_instances_controller_test.rb
index d9f915b..70b7493 100644
--- a/apps/workbench/test/controllers/pipeline_instances_controller_test.rb
+++ b/apps/workbench/test/controllers/pipeline_instances_controller_test.rb
@@ -37,4 +37,182 @@ class PipelineInstancesControllerTest < ActionController::TestCase
{started_at: 6, finished_at: 8}]
assert_equal 6, determine_wallclock_runtime(r)
+ class RequestDuck
+ def self.host
+ "localhost"
+ end
+ def self.port
+ 8080
+ end
+ def self.protocol
+ "http"
+ end
+ end
+ test "generate graph" do
+ use_token 'admin'
+ pipeline_for_graph = {
+ state: 'Complete',
+ uuid: 'zzzzz-d1hrv-9fm8l10i9z2kqc9',
+ components: {
+ stage1: {
+ repository: 'foo',
+ script: 'hash',
+ script_version: 'master',
+ job: {uuid: 'zzzzz-8i9sb-graphstage10000'},
+ output_uuid: 'zzzzz-4zz18-bv31uwvy3neko22'
+ },
+ stage2: {
+ repository: 'foo',
+ script: 'hash2',
+ script_version: 'master',
+ script_parameters: {
+ input: 'fa7aeb5140e2848d39b416daeef4ffc5+45'
+ },
+ job: {uuid: 'zzzzz-8i9sb-graphstage20000'},
+ output_uuid: 'zzzzz-4zz18-uukreo9rbgwsujx'
+ }
+ }
+ }
+ @controller.params['tab_pane'] = "Graph"
+ provenance, pips = @controller.graph([pipeline_for_graph])
+ ['component_zzzzz-d1hrv-9fm8l10i9z2kqc9_stage1',
+ 'component_zzzzz-d1hrv-9fm8l10i9z2kqc9_stage2',
+ 'zzzzz-8i9sb-graphstage10000',
+ 'zzzzz-8i9sb-graphstage20000',
+ 'b519d9cb706a29fc7ea24dbea2f05851+93',
+ 'fa7aeb5140e2848d39b416daeef4ffc5+45',
+ 'zzzzz-4zz18-bv31uwvy3neko22',
+ 'zzzzz-4zz18-uukreo9rbgwsujx'].each do |k|
+ assert_not_nil provenance[k], "Expected key #{k} in provenance set"
+ assert_equal 1, pips[k], "Expected key #{k} in pips set" if !k.start_with? "component_"
+ end
+ prov_svg = ProvenanceHelper::create_provenance_graph provenance, "provenance_svg", {
+ :request => RequestDuck,
+ :all_script_parameters => true,
+ :combine_jobs => :script_and_version,
+ :pips => pips,
+ :only_components => true }
+ # hash -> owned_by_active
+ assert /hash_4fe459abe02d9b365932b8f5dc419439ab4e2577_99914b932bd37a50b983c5e7c90ae93b->fa7aeb5140e2848d39b416daeef4ffc5\+45/.match(prov_svg)
+ # owned_by_active -> hash2
+ assert /fa7aeb5140e2848d39b416daeef4ffc5\+45->hash2_4fe459abe02d9b365932b8f5dc419439ab4e2577_4900033ec5cfaf8a63566f3664aeaa70/.match(prov_svg)
+ #File::open "./tmp/stuff1.svg", "w" do |f|
+ # f.write "<?xml version=\"1.0\" ?>\n"
+ # f.write prov_svg
+ #end
+ end
+ test "generate graph compare" do
+ use_token 'admin'
+ pipeline_for_graph1 = {
+ state: 'Complete',
+ uuid: 'zzzzz-d1hrv-9fm8l10i9z2kqc9',
+ components: {
+ stage1: {
+ repository: 'foo',
+ script: 'hash',
+ script_version: 'master',
+ job: {uuid: 'zzzzz-8i9sb-graphstage10000'},
+ output_uuid: 'zzzzz-4zz18-bv31uwvy3neko22'
+ },
+ stage2: {
+ repository: 'foo',
+ script: 'hash2',
+ script_version: 'master',
+ script_parameters: {
+ input: 'fa7aeb5140e2848d39b416daeef4ffc5+45'
+ },
+ job: {uuid: 'zzzzz-8i9sb-graphstage20000'},
+ output_uuid: 'zzzzz-4zz18-uukreo9rbgwsujx'
+ }
+ }
+ }
+ pipeline_for_graph2 = {
+ state: 'Complete',
+ uuid: 'zzzzz-d1hrv-9fm8l10i9z2kqc0',
+ components: {
+ stage1: {
+ repository: 'foo',
+ script: 'hash',
+ script_version: 'master',
+ job: {uuid: 'zzzzz-8i9sb-graphstage10000'},
+ output_uuid: 'zzzzz-4zz18-bv31uwvy3neko22'
+ },
+ stage2: {
+ repository: 'foo',
+ script: 'hash2',
+ script_version: 'master',
+ script_parameters: {
+ },
+ job: {uuid: 'zzzzz-8i9sb-graphstage30000'},
+ output_uuid: 'zzzzz-4zz18-uukreo9rbgwsujj'
+ }
+ }
+ }
+ @controller.params['tab_pane'] = "Graph"
+ provenance, pips = @controller.graph([pipeline_for_graph1, pipeline_for_graph2])
+ [['component_zzzzz-d1hrv-9fm8l10i9z2kqc9_stage1', nil],
+ ['component_zzzzz-d1hrv-9fm8l10i9z2kqc9_stage2', nil],
+ ['component_zzzzz-d1hrv-9fm8l10i9z2kqc0_stage1', nil],
+ ['component_zzzzz-d1hrv-9fm8l10i9z2kqc0_stage2', nil],
+ ['zzzzz-8i9sb-graphstage10000', 3],
+ ['zzzzz-8i9sb-graphstage20000', 1],
+ ['zzzzz-8i9sb-graphstage30000', 2],
+ ['b519d9cb706a29fc7ea24dbea2f05851+93', 1],
+ ['fa7aeb5140e2848d39b416daeef4ffc5+45', 3],
+ ['ea10d51bcf88862dbcc36eb292017dfd+45', 2],
+ ['zzzzz-4zz18-bv31uwvy3neko22', 3],
+ ['zzzzz-4zz18-uukreo9rbgwsujx', 1],
+ ['zzzzz-4zz18-uukreo9rbgwsujj', 2]
+ ].each do |k|
+ assert_not_nil provenance[k[0]], "Expected key #{k[0]} in provenance set"
+ assert_equal k[1], pips[k[0]], "Expected key #{k} in pips" if !k[0].start_with? "component_"
+ end
+ prov_svg = ProvenanceHelper::create_provenance_graph provenance, "provenance_svg", {
+ :request => RequestDuck,
+ :all_script_parameters => true,
+ :combine_jobs => :script_and_version,
+ :pips => pips,
+ :only_components => true }
+ # owned_by_active -> hash2 (stuff)
+ assert /fa7aeb5140e2848d39b416daeef4ffc5\+45->hash2_4fe459abe02d9b365932b8f5dc419439ab4e2577_4900033ec5cfaf8a63566f3664aeaa70/.match(prov_svg)
+ # owned_by_active -> hash2 (stuff2)
+ assert /fa7aeb5140e2848d39b416daeef4ffc5\+45->hash2_4fe459abe02d9b365932b8f5dc419439ab4e2577_02a085407e751d00b5dc88f1bd5e8247/.match(prov_svg)
+ # hash2 (stuff) -> GPL
+ assert /hash2_4fe459abe02d9b365932b8f5dc419439ab4e2577_4900033ec5cfaf8a63566f3664aeaa70->b519d9cb706a29fc7ea24dbea2f05851\+93/.match(prov_svg)
+ # hash2 (stuff2) -> baz file
+ assert /hash2_4fe459abe02d9b365932b8f5dc419439ab4e2577_02a085407e751d00b5dc88f1bd5e8247->ea10d51bcf88862dbcc36eb292017dfd\+45/.match(prov_svg)
+ # File::open "./tmp/stuff2.svg", "w" do |f|
+ # f.write "<?xml version=\"1.0\" ?>\n"
+ # f.write prov_svg
+ # end
+ end
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 45331a3..1d0fc2d 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -71,9 +71,13 @@ class Arvados::V1::CollectionsController < ApplicationController
if loc
# uuid is a portable_data_hash
- if c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).limit(1).first
+ c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).all
+ if c.size == 1
+ visited[loc.to_s] = c
+ elsif c.size > 1
visited[loc.to_s] = {
- portable_data_hash: c.portable_data_hash,
+ portable_data_hash: c[0].portable_data_hash,
+ name: "#{c[0].name} + #{c.size-1} more"
diff --git a/services/api/test/fixtures/collections.yml b/services/api/test/fixtures/collections.yml
index 045e1c7..379bebd 100644
--- a/services/api/test/fixtures/collections.yml
+++ b/services/api/test/fixtures/collections.yml
@@ -279,6 +279,24 @@ collection_with_files_in_subdir:
updated_at: 2014-02-03T17:22:54Z
manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae at 545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64 at 545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64 at 545a9920 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
+ uuid: zzzzz-4zz18-bv31uwvy3neko22
+ portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+ manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
+ name: bar_file
+ uuid: zzzzz-4zz18-uukreo9rbgwsujx
+ portable_data_hash: b519d9cb706a29fc7ea24dbea2f05851+93
+ manifest_text: ". 6a4ff0499484c6c79c95cd8c566bd25f+249025 0:249025:GNU_General_Public_License,_version_3.pdf\n"
+ name: "GNU General Public License, version 3"
+ uuid: zzzzz-4zz18-uukreo9rbgwsujj
+ portable_data_hash: ea10d51bcf88862dbcc36eb292017dfd+45
+ manifest_text: ". 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz\n"
+ name: "baz file"
# Test Helper trims the rest of the file
# Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml
index 1381078..dacddbb 100644
--- a/services/api/test/fixtures/jobs.yml
+++ b/services/api/test/fixtures/jobs.yml
@@ -176,6 +176,8 @@ previous_docker_job_run:
input: fa7aeb5140e2848d39b416daeef4ffc5+45
an_integer: "1"
+ runtime_constraints:
+ docker_image: arvados/test
success: true
output: ea10d51bcf88862dbcc36eb292017dfd+45
docker_image_locator: fa3c1a9cb6783f85f2ecda037e07b8c3+167
@@ -295,6 +297,40 @@ job_in_subproject:
script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
state: Complete
+ uuid: zzzzz-8i9sb-graphstage10000
+ owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+ repository: foo
+ script: hash
+ script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+ state: Complete
+ output: fa7aeb5140e2848d39b416daeef4ffc5+45
+ uuid: zzzzz-8i9sb-graphstage20000
+ owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+ repository: foo
+ script: hash2
+ script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+ state: Complete
+ script_parameters:
+ input: fa7aeb5140e2848d39b416daeef4ffc5+45
+ input2: "stuff"
+ output: b519d9cb706a29fc7ea24dbea2f05851+93
+ uuid: zzzzz-8i9sb-graphstage30000
+ owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+ repository: foo
+ script: hash2
+ script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+ state: Complete
+ script_parameters:
+ input: fa7aeb5140e2848d39b416daeef4ffc5+45
+ input2: "stuff2"
+ output: ea10d51bcf88862dbcc36eb292017dfd+45
# Test Helper trims the rest of the file
# Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
