[ARVADOS] updated: a46f0152c44fe20eba4db38858eaa2f99bae83f2
git at public.curoverse.com
git at public.curoverse.com
Thu Jun 5 12:43:14 EDT 2014
Summary of changes:
services/api/script/clean_orphan_jobs.rb | 37 ++++++++++++++++++++++++++++++++
services/api/script/crunch-dispatch.rb | 22 -------------------
2 files changed, 37 insertions(+), 22 deletions(-)
create mode 100755 services/api/script/clean_orphan_jobs.rb
via a46f0152c44fe20eba4db38858eaa2f99bae83f2 (commit)
from 1ec252c8087c1f167d969e26c584ff346f4ac457 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a46f0152c44fe20eba4db38858eaa2f99bae83f2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jun 5 12:43:09 2014 -0400
2955: Moved logic to clean jobs table from crunch-dispatch into a standalone
script.
diff --git a/services/api/script/clean_orphan_jobs.rb b/services/api/script/clean_orphan_jobs.rb
new file mode 100755
index 0000000..35673c3
--- /dev/null
+++ b/services/api/script/clean_orphan_jobs.rb
@@ -0,0 +1,37 @@
+#!/usr/bin/env ruby
+
+if ENV["CRUNCH_DISPATCH_LOCKFILE"]
+ lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
+ lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
+ unless lockfile.flock File::LOCK_EX|File::LOCK_NB
+ abort "Lock unavailable on #{lockfilename} - exit"
+ end
+end
+
+ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
+
+require File.dirname(__FILE__) + '/../config/boot'
+require File.dirname(__FILE__) + '/../config/environment'
+
+def refresh_running
+ Job.running.each do |jobrecord|
+ f = Log.where("object_uuid=?", jobrecord.uuid).limit(1).order("created_at desc").first
+ if f
+ age = (Time.now - f.created_at)
+ if age > 300
+ $stderr.puts "dispatch: failing orphan job #{jobrecord.uuid}, last log is #{age} seconds old"
+ # job is marked running, but not known to crunch-dispatcher, and
+ # hasn't produced any log entries for 5 minutes, so mark it as failed.
+ jobrecord.running = false
+ jobrecord.cancelled_at ||= Time.now
+ jobrecord.finished_at ||= Time.now
+ if jobrecord.success.nil?
+ jobrecord.success = false
+ end
+ jobrecord.save!
+ end
+ end
+ end
+end
+
+refresh_running
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 9a8280f..87acb65 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -41,27 +41,6 @@ class Dispatcher
return act_as_system_user
end
- def refresh_running
- Job.running.each do |jobrecord|
- if !@running[jobrecord.uuid]
- f = Log.where("object_uuid=?", jobrecord.uuid).limit(1).order("created_at desc").first
- age = (Time.now - f.created_at)
- if age > 300
- $stderr.puts "dispatch: failing orphan job #{jobrecord.uuid}, last log is #{age} seconds old"
- # job is marked running, but not known to crunch-dispatcher, and
- # hasn't produced any log entries for 5 minutes, so mark it as failed.
- jobrecord.running = false
- jobrecord.canceled_at ||= Time.now
- jobrecord.finished_at ||= Time.now
- if jobrecord.success.nil?
- jobrecord.success = false
- end
- jobrecord.save!
- end
- end
- end
- end
-
def refresh_todo
@todo = Job.queue.select do |j| j.repository end
@todo_pipelines = PipelineInstance.queue
@@ -409,7 +388,6 @@ class Dispatcher
end
end
else
- refresh_running unless did_recently(:refresh_running, 60.0)
refresh_todo unless did_recently(:refresh_todo, 1.0)
update_node_status
unless @todo.empty? or did_recently(:start_jobs, 1.0) or $signal[:term]
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list