[ARVADOS] created: b50bbef5f429465fe71781be7edebbb8c4479d50
git at public.curoverse.com
git at public.curoverse.com
Mon Mar 24 02:19:40 EDT 2014
at b50bbef5f429465fe71781be7edebbb8c4479d50 (commit)
commit b50bbef5f429465fe71781be7edebbb8c4479d50
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Mar 24 01:59:08 2014 -0400
Indicate which outputs are marked "persistent" on pipeline provenance graphs.
diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb
index c2a398c..4231475 100644
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -15,6 +15,12 @@ class PipelineInstancesController < ApplicationController
p.components.each do |k, v|
j = v[:job] || next
+ # The graph is interested in whether the component is
+ # indicated as persistent, more than whether the job
+ # satisfying it (which could have been reused, or someone
+ # else's) is.
+ j[:output_is_persistent] = v[:output_is_persistent]
+
uuid = j[:uuid].intern
provenance[uuid] = j
pips[uuid] = 0 unless pips[uuid] != nil
diff --git a/apps/workbench/app/helpers/provenance_helper.rb b/apps/workbench/app/helpers/provenance_helper.rb
index 66754d2..828fbfe 100644
--- a/apps/workbench/app/helpers/provenance_helper.rb
+++ b/apps/workbench/app/helpers/provenance_helper.rb
@@ -6,6 +6,7 @@ module ProvenanceHelper
@opts = opts
@visited = {}
@jobs = {}
+ @node_extra = {}
end
def self.collection_uuid(uuid)
@@ -41,7 +42,7 @@ module ProvenanceHelper
def describe_node(uuid)
uuid = uuid.to_sym
- bgcolor = determine_fillcolor @opts[:pips][uuid] if @opts[:pips]
+ bgcolor = determine_fillcolor @opts[:pips].andand[uuid]
rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
if rsc
@@ -57,7 +58,7 @@ module ProvenanceHelper
#puts "empty!"
return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
end
- puts "#{uuid.class} #{@pdata[uuid]}"
+ #puts "#{uuid.class} #{@pdata[uuid]}"
if @pdata[uuid]
#puts @pdata[uuid]
if @pdata[uuid][:name]
@@ -82,7 +83,10 @@ module ProvenanceHelper
label += "\\n⋮"
end
#puts "#{uuid} #{label} #{files}"
- return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor}];\n"
+ extra_s = @node_extra[uuid].andand.map { |k,v|
+ "#{k}=\"#{v}\""
+ }.andand.join ","
+ return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor},#{extra_s}];\n"
end
end
end
@@ -189,27 +193,31 @@ module ProvenanceHelper
@visited[uuid] = true
end
- #puts "visiting #{uuid}"
+ #puts "visiting #{uuid.inspect}"
- if m
+ if m
# uuid is a collection
- gr += describe_node(uuid)
-
- if m == :"d41d8cd98f00b204e9800998ecf8427e+0"
- # empty collection, don't follow any further
- return gr
- end
-
- @pdata.each do |k, job|
- if job[:output] == uuid.to_s
- gr += edge(uuid, job_uuid(job), {:label => "output"})
- gr += generate_provenance_edges(job[:uuid])
- end
- if job[:log] == uuid.to_s
- gr += edge(uuid, job_uuid(job), {:label => "log"})
- gr += generate_provenance_edges(job[:uuid])
+ if uuid != :"d41d8cd98f00b204e9800998ecf8427e+0"
+ # not the empty collection
+
+ @pdata.each do |k, job|
+ if job[:output] == uuid.to_s
+ extra = { label: 'output' }
+ if job[:output_is_persistent]
+ extra[:label] += ' (persistent)'
+ @node_extra[uuid] ||= {}
+ @node_extra[uuid][:penwidth] = 4
+ end
+ gr += edge(uuid, job_uuid(job), extra)
+ gr += generate_provenance_edges(job[:uuid])
+ end
+ if job[:log] == uuid.to_s
+ gr += edge(uuid, job_uuid(job), {:label => "log"})
+ gr += generate_provenance_edges(job[:uuid])
+ end
end
end
+ gr += describe_node(uuid)
else
# uuid is something else
rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
@@ -313,7 +321,7 @@ edge [fontsize=10];
gr += "}"
svg = ""
- puts gr
+ #puts gr
require 'open3'
commit 381f3ce2529a027cc0eb1c402b94135711658f6b
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Mar 24 00:55:04 2014 -0400
Make sure persistence links get added for persistent components, even
when reusing jobs that are not marked persistent or belong to other
users.
diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec
index 5551fec..c43e3b8 100644
--- a/sdk/cli/arvados-cli.gemspec
+++ b/sdk/cli/arvados-cli.gemspec
@@ -18,6 +18,7 @@ Gem::Specification.new do |s|
s.executables << "arv-run-pipeline-instance"
s.executables << "arv-crunch-job"
s.executables << "arv-tag"
+ s.add_runtime_dependency 'arvados', '~> 0.1.0'
s.add_runtime_dependency 'google-api-client', '~> 0.6.3'
s.add_runtime_dependency 'activesupport', '~> 3.2', '>= 3.2.13'
s.add_runtime_dependency 'json', '~> 1.7', '>= 1.7.7'
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 09f894a..7578abc 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -79,6 +79,7 @@ $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or
abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set."
begin
+ require 'arvados'
require 'rubygems'
require 'json'
require 'pp'
@@ -89,7 +90,7 @@ rescue LoadError => l
abort <<-EOS
#{$0}: fatal: #{l.message}
Some runtime dependencies may be missing.
-Try: gem install pp google-api-client json trollop
+Try: gem install arvados pp google-api-client json trollop
EOS
end
@@ -225,6 +226,7 @@ $client ||= Google::APIClient.
:application_name => File.split($0).last,
:application_version => $application_version.to_s)
$arvados = $client.discovered_api('arvados', $arvados_api_version)
+$arv = Arvados.new api_version: 'v1'
class PipelineInstance
@@ -433,6 +435,9 @@ class WhRunPipelineInstance
moretodo = false
@components.each do |cname, c|
job = nil
+ c_already_finished = (c[:job] &&
+ c[:job][:uuid] &&
+ !c[:job][:success].nil?)
if !c[:job] and
c[:script_parameters].select { |pname, p| p.is_a? Hash }.empty?
# Job is fully specified (all parameter values are present) but
@@ -524,6 +529,41 @@ class WhRunPipelineInstance
end
end
end
+ unless c_already_finished
+ if c[:output_is_persistent]
+ # This is my first time discovering that the job
+ # succeeded. I need to make sure a resources/wants
+ # link is in place to protect the output from garbage
+ # collection. (Normally Crunch does this for me, but
+ # here I might be reusing the output of someone else's
+ # job and I need to make sure it's understood that the
+ # output is valuable to me, too.)
+ wanted = c[:job][:output]
+ debuglog "checking for existing persistence link for #{wanted}"
+ @my_user_uuid ||= $arv.user.current[:uuid]
+ links = $arv.link.list(limit: 1,
+ filters:
+ [%w(link_class = resources),
+ %w(name = wants),
+ %w(tail_uuid =) + [@my_user_uuid],
+ %w(head_uuid =) + [wanted]
+ ])[:items]
+ if links.any?
+ debuglog "link already exists, uuid #{links.first[:uuid]}"
+ else
+ newlink = $arv.link.create link: \
+ {
+ link_class: 'resources',
+ name: 'wants',
+ tail_kind: 'arvados#user',
+ tail_uuid: @my_user_uuid,
+ head_kind: 'arvados#collection',
+ head_uuid: wanted
+ }
+ debuglog "added link, uuid #{newlink[:uuid]}"
+ end
+ end
+ end
elsif c[:job][:running] ||
(!c[:job][:started_at] && !c[:job][:cancelled_at])
moretodo = true
commit 6a8e3decba6bf4cd890bf98241646d0d070990b7
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Mar 23 23:07:40 2014 -0400
Propagate output_is_persistent flag from pipeline to job.
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 1e62a52..09f894a 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -494,7 +494,8 @@ class WhRunPipelineInstance
job = JobCache.create(:script => c[:script],
:script_parameters => c[:script_parameters],
:runtime_constraints => c[:runtime_constraints] || {},
- :script_version => c[:script_version] || 'master')
+ :script_version => c[:script_version] || 'master',
+ :output_is_persistent => c[:output_is_persistent] || false)
if job
debuglog "component #{cname} new job #{job[:uuid]}"
c[:job] = job
commit 73e9bb7585add05c044a80c6811681d58b6148b4
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Mar 23 23:03:32 2014 -0400
Add output_is_persistent flags to example pipeline template.
diff --git a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json b/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
index 3668f00..f84ad3f 100644
--- a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
+++ b/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
@@ -11,7 +11,8 @@
],
"input":"d237a90bae3870b3b033aea1e99de4a9+10820+K at qr1hi"
},
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153"
+ "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
+ "output_is_persistent":false
},
"bwa-index":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -24,7 +25,8 @@
"value":"8b6e2c4916133e1d859c9e812861ce13+70",
"required":true
}
- }
+ },
+ "output_is_persistent":false
},
"bwa-aln":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -48,7 +50,8 @@
},
"runtime_constraints":{
"max_tasks_per_node":1
- }
+ },
+ "output_is_persistent":false
},
"picard-gatk2-prep":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -67,7 +70,8 @@
},
"runtime_constraints":{
"max_tasks_per_node":1
- }
+ },
+ "output_is_persistent":false
},
"GATK2-realign":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -97,7 +101,8 @@
},
"runtime_constraints":{
"max_tasks_per_node":2
- }
+ },
+ "output_is_persistent":false
},
"GATK2-bqsr":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -118,7 +123,8 @@
"value":"7e0a277d6d2353678a11f56bab3b13f2+87",
"required":true
}
- }
+ },
+ "output_is_persistent":false
},
"GATK2-merge-call":{
"script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -155,7 +161,8 @@
"200"
]
}
- }
+ },
+ "output_is_persistent":true
}
}
}
commit 69d0e98d74c221ba2161b4851c51dd47ee7a9409
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Mar 23 23:02:43 2014 -0400
Add output_is_persistent attribute to job model.
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index 1f0ef75..a3fe08f 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -27,6 +27,7 @@ class Job < ArvadosModel
t.add :started_at
t.add :finished_at
t.add :output
+ t.add :output_is_persistent
t.add :success
t.add :running
t.add :is_locked_by_uuid
diff --git a/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb b/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb
new file mode 100644
index 0000000..04a03c0
--- /dev/null
+++ b/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb
@@ -0,0 +1,5 @@
+class AddOutputIsPersistentToJob < ActiveRecord::Migration
+ def change
+ add_column :jobs, :output_is_persistent, :boolean, null: false, default: false
+ end
+end
diff --git a/services/api/db/schema.rb b/services/api/db/schema.rb
index df6ea9b..5c2fe29 100644
--- a/services/api/db/schema.rb
+++ b/services/api/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended to check this file into your version control system.
-ActiveRecord::Schema.define(:version => 20140129184311) do
+ActiveRecord::Schema.define(:version => 20140324024606) do
create_table "api_client_authorizations", :force => true do |t|
t.string "api_token", :null => false
@@ -182,13 +182,14 @@ ActiveRecord::Schema.define(:version => 20140129184311) do
t.boolean "running"
t.boolean "success"
t.string "output"
- t.datetime "created_at", :null => false
- t.datetime "updated_at", :null => false
+ t.datetime "created_at", :null => false
+ t.datetime "updated_at", :null => false
t.string "priority"
t.string "is_locked_by_uuid"
t.string "log"
t.text "tasks_summary"
t.text "runtime_constraints"
+ t.boolean "output_is_persistent", :default => false, :null => false
end
add_index "jobs", ["created_at"], :name => "index_jobs_on_created_at"
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
index f68cbc2..7920bce 100644
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -14,6 +14,8 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
new_job = JSON.parse(@response.body)
assert_not_nil new_job['uuid']
assert_not_nil new_job['script_version'].match(/^[0-9a-f]{40}$/)
+ # Default: not persistent
+ assert_equal false, new_job['output_is_persistent']
end
test "normalize output and log uuids when creating job" do
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list