[ARVADOS] updated: e7d6f7ade14cd2c513ad10d39da23b926e47b679
git at public.curoverse.com
git at public.curoverse.com
Wed Aug 13 17:10:57 EDT 2014
Summary of changes:
services/api/script/crunch-dispatch.rb | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
via e7d6f7ade14cd2c513ad10d39da23b926e47b679 (commit)
from 7482410350df49dbddab05448b167bf61f14d71e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit e7d6f7ade14cd2c513ad10d39da23b926e47b679
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Aug 13 17:09:08 2014 -0400
Report unexpected slurm states as "down". No issue #
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 154fcf3..ddc0f3a 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -68,25 +68,28 @@ class Dispatcher
begin
sinfo.split("\n").
each do |line|
- re = line.match /(\S+?):+(idle|alloc|down)/
+ re = line.match /(\S+?):+(idle|alloc|down)?/
next if !re
+ _, node_name, node_state = *re
+ node_state = 'down' unless %w(idle alloc down).include? node_state
+
# sinfo tells us about a node N times if it is shared by N partitions
- next if node_seen[re[1]]
- node_seen[re[1]] = true
+ next if node_seen[node_name]
+ node_seen[node_name] = true
# update our database (and cache) when a node's state changes
- if @node_state[re[1]] != re[2]
- @node_state[re[1]] = re[2]
- node = Node.where('hostname=?', re[1]).order(:last_ping_at).last
+ if @node_state[node_name] != node_state
+ @node_state[node_name] = node_state
+ node = Node.where('hostname=?', node_name).order(:last_ping_at).last
if node
- $stderr.puts "dispatch: update #{re[1]} state to #{re[2]}"
- node.info['slurm_state'] = re[2]
+ $stderr.puts "dispatch: update #{node_name} state to #{node_state}"
+ node.info['slurm_state'] = node_state
if not node.save
$stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
end
- elsif re[2] != 'down'
- $stderr.puts "dispatch: sinfo reports '#{re[1]}' is not down, but no node has that name"
+ elsif node_state != 'down'
+ $stderr.puts "dispatch: sinfo reports '#{node_name}' is not down, but no node has that name"
end
end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list