[ARVADOS] updated: 2a64eae3cf8363c596feda5337ea20ce356ca11f

git at public.curoverse.com git at public.curoverse.com
Wed Jun 4 10:00:01 EDT 2014


Summary of changes:
 sdk/cli/bin/arv-run-pipeline-instance  |  2 +-
 services/api/script/crunch-dispatch.rb | 48 +++++++++++++++++++++++++---------
 2 files changed, 37 insertions(+), 13 deletions(-)

       via  2a64eae3cf8363c596feda5337ea20ce356ca11f (commit)
       via  114df81b90be76e6921b9f20c9ddb272567c82e1 (commit)
      from  a276e40691a8f96b321879de2279159ef08b804f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 2a64eae3cf8363c596feda5337ea20ce356ca11f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Jun 2 16:44:26 2014 -0400

    2955: crunch-dispatch now sends a clean environment to crunch-job. (cherry-picked from #2882)
    
    Conflicts:
    	services/api/script/crunch-dispatch.rb

diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 0e9e069..f49f21b 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -1,5 +1,7 @@
 #!/usr/bin/env ruby
 
+require 'trollop'
+
 include Process
 
 $warned = {}
@@ -20,6 +22,10 @@ if ENV["CRUNCH_DISPATCH_LOCKFILE"]
   end
 end
 
+$trollopts = Trollop::options do
+    opt :use_env, "Pass selected environment variables (PATH, PYTHONPATH, RUBYLIB, GEM_PATH, PERLLIB) to crunch-job"
+end
+
 ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
 
 require File.dirname(__FILE__) + '/../config/boot'
@@ -152,9 +158,23 @@ class Dispatcher
       end
 
       if Server::Application.config.crunch_job_user
-        cmd_args.unshift("sudo", "-E", "-u",
-                         Server::Application.config.crunch_job_user,
-                         "PERLLIB=#{ENV['PERLLIB']}")
+        cmd_args.unshift("sudo", "-E", "-u", Server::Application.config.crunch_job_user)
+      end
+
+      cmd_args << "HOME=/dev/null"
+      cmd_args << "ARVADOS_API_HOST=#{ENV['ARVADOS_API_HOST']}"
+      cmd_args << "ARVADOS_API_HOST_INSECURE=#{ENV['ARVADOS_API_HOST_INSECURE']}" if ENV['ARVADOS_API_HOST_INSECURE']
+
+      ENV.each do |k, v|
+        cmd_args << "#{k}=#{v}" if k.starts_with? "CRUNCH_"
+      end
+
+      if $trollopts.use_env
+        cmd_args << "PATH=#{ENV['PATH']}"
+        cmd_args << "PYTHONPATH=#{ENV['PYTHONPATH']}"
+        cmd_args << "PERLLIB=#{ENV['PERLLIB']}"
+        cmd_args << "RUBYLIB=#{ENV['RUBYLIB']}"
+        cmd_args << "GEM_PATH=#{ENV['GEM_PATH']}"
       end
 
       job_auth = ApiClientAuthorization.
@@ -194,10 +214,10 @@ class Dispatcher
       cmd_args << '--git-dir'
       cmd_args << arvados_internal
 
-      $stderr.puts "dispatch: #{cmd_args.join ' '}"
+      $stderr.puts "dispatch: #{cmd_args}"
 
       begin
-        i, o, e, t = Open3.popen3(*cmd_args)
+        i, o, e, t = Open3.popen3({}, *cmd_args, { :unsetenv_others => true})
       rescue
         $stderr.puts "dispatch: popen3: #{$!}"
         sleep 1

commit 114df81b90be76e6921b9f20c9ddb272567c82e1
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Jun 4 09:55:24 2014 -0400

    2955: Added check that an orphan job hasn't produces any logs for 5 minutes
    before automatically failing it.  arv-run-pipeline-instance will identify jobs
    that are running=false and success=false as failed even if finished_at is null.

diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index fc636df..e9a7654 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -578,7 +578,7 @@ class WhRunPipelineInstance
     failed = 0
     @components.each do |cname, c|
       if c[:job]
-        if c[:job][:finished_at]
+        if c[:job][:finished_at] or (c[:job][:running] == false and c[:job][:success] == false)
           ended += 1
           if c[:job][:success] == true
             succeeded += 1
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index bde9b67..0e9e069 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -38,13 +38,17 @@ class Dispatcher
   def refresh_running
     Job.running.each do |jobrecord|
       if !@running[jobrecord.uuid]
-        # job is marked running, but not actually running. so fail it
-        jobrecord.running = false
-        jobrecord.finished_at ||= Time.now
-        if jobrecord.success.nil?
-          jobrecord.success = false
+        f = Log.filter(["object_uuid", "=", jobrecord.uuid]).limit(1).order("created_at desc").results.first
+        if (Time.now - f.created_at) > 300
+          # job is marked running, but not known to crunch-dispatcher, and
+          # hasn't produced any log entries for 5 minutes, so mark it as failed.
+          jobrecord.running = false
+          jobrecord.finished_at ||= Time.now
+          if jobrecord.success.nil?
+            jobrecord.success = false
+          end
+          jobrecord.save!
         end
-        jobrecord.save!
       end
     end
   end
@@ -382,7 +386,7 @@ class Dispatcher
           end
         end
       else
-        refresh_running unless did_recently(:refresh_running, 30.0)
+        refresh_running unless did_recently(:refresh_running, 60.0)
         refresh_todo unless did_recently(:refresh_todo, 1.0)
         update_node_status
         unless @todo.empty? or did_recently(:start_jobs, 1.0) or $signal[:term]

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list