[ARVADOS] created: b50bbef5f429465fe71781be7edebbb8c4479d50

git at public.curoverse.com git at public.curoverse.com
Mon Mar 24 02:19:40 EDT 2014


        at  b50bbef5f429465fe71781be7edebbb8c4479d50 (commit)


commit b50bbef5f429465fe71781be7edebbb8c4479d50
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Mar 24 01:59:08 2014 -0400

    Indicate which outputs are marked "persistent" on pipeline provenance graphs.

diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb
index c2a398c..4231475 100644
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -15,6 +15,12 @@ class PipelineInstancesController < ApplicationController
       p.components.each do |k, v|
         j = v[:job] || next
 
+        # The graph is interested in whether the component is
+        # indicated as persistent, more than whether the job
+        # satisfying it (which could have been reused, or someone
+        # else's) is.
+        j[:output_is_persistent] = v[:output_is_persistent]
+
         uuid = j[:uuid].intern
         provenance[uuid] = j
         pips[uuid] = 0 unless pips[uuid] != nil
diff --git a/apps/workbench/app/helpers/provenance_helper.rb b/apps/workbench/app/helpers/provenance_helper.rb
index 66754d2..828fbfe 100644
--- a/apps/workbench/app/helpers/provenance_helper.rb
+++ b/apps/workbench/app/helpers/provenance_helper.rb
@@ -6,6 +6,7 @@ module ProvenanceHelper
       @opts = opts
       @visited = {}
       @jobs = {}
+      @node_extra = {}
     end
     
     def self.collection_uuid(uuid)
@@ -41,7 +42,7 @@ module ProvenanceHelper
 
     def describe_node(uuid)
       uuid = uuid.to_sym
-      bgcolor = determine_fillcolor @opts[:pips][uuid] if @opts[:pips]
+      bgcolor = determine_fillcolor @opts[:pips].andand[uuid]
 
       rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
       if rsc
@@ -57,7 +58,7 @@ module ProvenanceHelper
             #puts "empty!"
             return "\"#{uuid}\" [label=\"(empty collection)\"];\n"
           end
-          puts "#{uuid.class} #{@pdata[uuid]}"
+          #puts "#{uuid.class} #{@pdata[uuid]}"
           if @pdata[uuid] 
             #puts @pdata[uuid]
             if @pdata[uuid][:name]
@@ -82,7 +83,10 @@ module ProvenanceHelper
                   label += "\\n⋮"
                 end
                 #puts "#{uuid} #{label} #{files}"
-                return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor}];\n"
+                extra_s = @node_extra[uuid].andand.map { |k,v|
+                  "#{k}=\"#{v}\""
+                }.andand.join ","
+                return "\"#{uuid}\" [label=\"#{label}\",href=\"#{href}\",shape=oval,#{bgcolor},#{extra_s}];\n"
               end
             end  
           end
@@ -189,27 +193,31 @@ module ProvenanceHelper
         @visited[uuid] = true
       end
 
-      #puts "visiting #{uuid}"
+      #puts "visiting #{uuid.inspect}"
 
-      if m  
+      if m
         # uuid is a collection
-        gr += describe_node(uuid)
-
-        if m == :"d41d8cd98f00b204e9800998ecf8427e+0"
-          # empty collection, don't follow any further
-          return gr
-        end
-
-        @pdata.each do |k, job|
-          if job[:output] == uuid.to_s
-            gr += edge(uuid, job_uuid(job), {:label => "output"})
-            gr += generate_provenance_edges(job[:uuid])
-          end
-          if job[:log] == uuid.to_s
-            gr += edge(uuid, job_uuid(job), {:label => "log"})
-            gr += generate_provenance_edges(job[:uuid])
+        if uuid != :"d41d8cd98f00b204e9800998ecf8427e+0"
+          # not the empty collection
+
+          @pdata.each do |k, job|
+            if job[:output] == uuid.to_s
+              extra = { label: 'output' }
+              if job[:output_is_persistent]
+                extra[:label] += ' (persistent)'
+                @node_extra[uuid] ||= {}
+                @node_extra[uuid][:penwidth] = 4
+              end
+              gr += edge(uuid, job_uuid(job), extra)
+              gr += generate_provenance_edges(job[:uuid])
+            end
+            if job[:log] == uuid.to_s
+              gr += edge(uuid, job_uuid(job), {:label => "log"})
+              gr += generate_provenance_edges(job[:uuid])
+            end
           end
         end
+        gr += describe_node(uuid)
       else
         # uuid is something else
         rsc = ArvadosBase::resource_class_for_uuid uuid.to_s
@@ -313,7 +321,7 @@ edge [fontsize=10];
     gr += "}"
     svg = ""
 
-    puts gr
+    #puts gr
     
     require 'open3'
 

commit 381f3ce2529a027cc0eb1c402b94135711658f6b
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Mar 24 00:55:04 2014 -0400

    Make sure persistence links get added for persistent components, even
    when reusing jobs that are not marked persistent or belong to other
    users.

diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec
index 5551fec..c43e3b8 100644
--- a/sdk/cli/arvados-cli.gemspec
+++ b/sdk/cli/arvados-cli.gemspec
@@ -18,6 +18,7 @@ Gem::Specification.new do |s|
   s.executables << "arv-run-pipeline-instance"
   s.executables << "arv-crunch-job"
   s.executables << "arv-tag"
+  s.add_runtime_dependency 'arvados', '~> 0.1.0'
   s.add_runtime_dependency 'google-api-client', '~> 0.6.3'
   s.add_runtime_dependency 'activesupport', '~> 3.2', '>= 3.2.13'
   s.add_runtime_dependency 'json', '~> 1.7', '>= 1.7.7'
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 09f894a..7578abc 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -79,6 +79,7 @@ $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or
   abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set."
 
 begin
+  require 'arvados'
   require 'rubygems'
   require 'json'
   require 'pp'
@@ -89,7 +90,7 @@ rescue LoadError => l
   abort <<-EOS
 #{$0}: fatal: #{l.message}
 Some runtime dependencies may be missing.
-Try: gem install pp google-api-client json trollop
+Try: gem install arvados pp google-api-client json trollop
   EOS
 end
 
@@ -225,6 +226,7 @@ $client ||= Google::APIClient.
       :application_name => File.split($0).last,
       :application_version => $application_version.to_s)
 $arvados = $client.discovered_api('arvados', $arvados_api_version)
+$arv = Arvados.new api_version: 'v1'
 
 
 class PipelineInstance
@@ -433,6 +435,9 @@ class WhRunPipelineInstance
       moretodo = false
       @components.each do |cname, c|
         job = nil
+        c_already_finished = (c[:job] &&
+                              c[:job][:uuid] &&
+                              !c[:job][:success].nil?)
         if !c[:job] and
             c[:script_parameters].select { |pname, p| p.is_a? Hash }.empty?
           # Job is fully specified (all parameter values are present) but
@@ -524,6 +529,41 @@ class WhRunPipelineInstance
                 end
               end
             end
+            unless c_already_finished
+              if c[:output_is_persistent]
+                # This is my first time discovering that the job
+                # succeeded. I need to make sure a resources/wants
+                # link is in place to protect the output from garbage
+                # collection. (Normally Crunch does this for me, but
+                # here I might be reusing the output of someone else's
+                # job and I need to make sure it's understood that the
+                # output is valuable to me, too.)
+                wanted = c[:job][:output]
+                debuglog "checking for existing persistence link for #{wanted}"
+                @my_user_uuid ||= $arv.user.current[:uuid]
+                links = $arv.link.list(limit: 1,
+                                       filters:
+                                       [%w(link_class = resources),
+                                        %w(name = wants),
+                                        %w(tail_uuid =) + [@my_user_uuid],
+                                        %w(head_uuid =) + [wanted]
+                                       ])[:items]
+                if links.any?
+                  debuglog "link already exists, uuid #{links.first[:uuid]}"
+                else
+                  newlink = $arv.link.create link: \
+                  {
+                    link_class: 'resources',
+                    name: 'wants',
+                    tail_kind: 'arvados#user',
+                    tail_uuid: @my_user_uuid,
+                    head_kind: 'arvados#collection',
+                    head_uuid: wanted
+                  }
+                  debuglog "added link, uuid #{newlink[:uuid]}"
+                end
+              end
+            end
           elsif c[:job][:running] ||
               (!c[:job][:started_at] && !c[:job][:cancelled_at])
             moretodo = true

commit 6a8e3decba6bf4cd890bf98241646d0d070990b7
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Mar 23 23:07:40 2014 -0400

    Propagate output_is_persistent flag from pipeline to job.

diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 1e62a52..09f894a 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -494,7 +494,8 @@ class WhRunPipelineInstance
               job = JobCache.create(:script => c[:script],
                                     :script_parameters => c[:script_parameters],
                                     :runtime_constraints => c[:runtime_constraints] || {},
-                                    :script_version => c[:script_version] || 'master')
+                                    :script_version => c[:script_version] || 'master',
+                                    :output_is_persistent => c[:output_is_persistent] || false)
               if job
                 debuglog "component #{cname} new job #{job[:uuid]}"
                 c[:job] = job

commit 73e9bb7585add05c044a80c6811681d58b6148b4
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Mar 23 23:03:32 2014 -0400

    Add output_is_persistent flags to example pipeline template.

diff --git a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json b/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
index 3668f00..f84ad3f 100644
--- a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
+++ b/doc/examples/pipeline_templates/gatk-exome-fq-snp.json
@@ -11,7 +11,8 @@
     ],
     "input":"d237a90bae3870b3b033aea1e99de4a9+10820+K at qr1hi"
    },
-   "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153"
+   "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
+   "output_is_persistent":false
   },
   "bwa-index":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -24,7 +25,8 @@
      "value":"8b6e2c4916133e1d859c9e812861ce13+70",
      "required":true
     }
-   }
+   },
+   "output_is_persistent":false
   },
   "bwa-aln":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -48,7 +50,8 @@
    },
    "runtime_constraints":{
     "max_tasks_per_node":1
-   }
+   },
+   "output_is_persistent":false
   },
   "picard-gatk2-prep":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -67,7 +70,8 @@
    },
    "runtime_constraints":{
     "max_tasks_per_node":1
-   }
+   },
+   "output_is_persistent":false
   },
   "GATK2-realign":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -97,7 +101,8 @@
    },
    "runtime_constraints":{
     "max_tasks_per_node":2
-   }
+   },
+   "output_is_persistent":false
   },
   "GATK2-bqsr":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -118,7 +123,8 @@
      "value":"7e0a277d6d2353678a11f56bab3b13f2+87",
      "required":true
     }
-   }
+   },
+   "output_is_persistent":false
   },
   "GATK2-merge-call":{
    "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
@@ -155,7 +161,8 @@
       "200"
      ]
     }
-   }
+   },
+   "output_is_persistent":true
   }
  }
 }

commit 69d0e98d74c221ba2161b4851c51dd47ee7a9409
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Mar 23 23:02:43 2014 -0400

    Add output_is_persistent attribute to job model.

diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index 1f0ef75..a3fe08f 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -27,6 +27,7 @@ class Job < ArvadosModel
     t.add :started_at
     t.add :finished_at
     t.add :output
+    t.add :output_is_persistent
     t.add :success
     t.add :running
     t.add :is_locked_by_uuid
diff --git a/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb b/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb
new file mode 100644
index 0000000..04a03c0
--- /dev/null
+++ b/services/api/db/migrate/20140324024606_add_output_is_persistent_to_job.rb
@@ -0,0 +1,5 @@
+class AddOutputIsPersistentToJob < ActiveRecord::Migration
+  def change
+    add_column :jobs, :output_is_persistent, :boolean, null: false, default: false
+  end
+end
diff --git a/services/api/db/schema.rb b/services/api/db/schema.rb
index df6ea9b..5c2fe29 100644
--- a/services/api/db/schema.rb
+++ b/services/api/db/schema.rb
@@ -11,7 +11,7 @@
 #
 # It's strongly recommended to check this file into your version control system.
 
-ActiveRecord::Schema.define(:version => 20140129184311) do
+ActiveRecord::Schema.define(:version => 20140324024606) do
 
   create_table "api_client_authorizations", :force => true do |t|
     t.string   "api_token",                                           :null => false
@@ -182,13 +182,14 @@ ActiveRecord::Schema.define(:version => 20140129184311) do
     t.boolean  "running"
     t.boolean  "success"
     t.string   "output"
-    t.datetime "created_at",               :null => false
-    t.datetime "updated_at",               :null => false
+    t.datetime "created_at",                                  :null => false
+    t.datetime "updated_at",                                  :null => false
     t.string   "priority"
     t.string   "is_locked_by_uuid"
     t.string   "log"
     t.text     "tasks_summary"
     t.text     "runtime_constraints"
+    t.boolean  "output_is_persistent",     :default => false, :null => false
   end
 
   add_index "jobs", ["created_at"], :name => "index_jobs_on_created_at"
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
index f68cbc2..7920bce 100644
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -14,6 +14,8 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     new_job = JSON.parse(@response.body)
     assert_not_nil new_job['uuid']
     assert_not_nil new_job['script_version'].match(/^[0-9a-f]{40}$/)
+    # Default: not persistent
+    assert_equal false, new_job['output_is_persistent']
   end
 
   test "normalize output and log uuids when creating job" do

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list