[ARVADOS] created: 51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4
git at public.curoverse.com
git at public.curoverse.com
Tue Aug 12 16:41:44 EDT 2014
at 51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4 (commit)
commit 51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4
Author: Ward Vandewege <ward at curoverse.com>
Date: Tue Aug 12 16:40:38 2014 -0400
update_node_attributes: be aware of more possible slurm states
refs #3239
diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 3fac8de..b6b1720 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -41,7 +41,8 @@ while true
nodenames.each do |nodename|
if @slurm_state[nodename] != nodestate
- has_no_job = ! ['alloc','comp'].index(nodestate)
+ has_no_job = ! ['alloc','comp','draining','unknown'].index(nodestate)
+
node = Node.
where('slot_number=? and hostname=?',
nodename.match(/(\d+)$/)[1].to_i,
commit 1adc360342488fa931ccf7518ff9a04f462a36fe
Author: Ward Vandewege <ward at curoverse.com>
Date: Tue Aug 12 16:38:50 2014 -0400
crunch-dispatch: be aware of more possible slurm states
refs #3239
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index f7d01f0..f1d7aa4 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -68,10 +68,10 @@ class Dispatcher
begin
sinfo.split("\n").
each do |line|
- re = line.match /(\S+?):+(idle|alloc|down)/
+ re = line.match /(\S+?):+(idle|alloc|down|draining|drained|unknown)/
next if !re
- has_no_job = ! ['alloc','comp'].index(re[2])
+ has_no_job = ! ['alloc','comp','draining','unknown'].index(re[2])
# sinfo tells us about a node N times if it is shared by N partitions
next if node_seen[re[1]]
commit 3b613232f752df1b5ba520b5478cc776cea507b8
Author: Ward Vandewege <ward at curoverse.com>
Date: Mon Aug 11 16:33:57 2014 -0400
crunch_dispatch: when a Node object state is changed to not alloc/comp,
make sure to change the running_job_uuid field to nil.
refs #3239
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 5a990f0..f7d01f0 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -71,6 +71,8 @@ class Dispatcher
re = line.match /(\S+?):+(idle|alloc|down)/
next if !re
+ has_no_job = ! ['alloc','comp'].index(re[2])
+
# sinfo tells us about a node N times if it is shared by N partitions
next if node_seen[re[1]]
node_seen[re[1]] = true
@@ -82,6 +84,7 @@ class Dispatcher
if node
$stderr.puts "dispatch: update #{re[1]} state to #{re[2]}"
node.info['slurm_state'] = re[2]
+ node.info['running_job_uuid'] = nil if has_no_job
if not node.save
$stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
end
commit 96f923788ee215d0ed1aa58e1652afd78c342d2e
Author: Ward Vandewege <ward at curoverse.com>
Date: Mon Aug 11 15:39:28 2014 -0400
Non-existant node object is not a fatal state; node manager will create
those objects as needed.
refs #3239
diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 2d6bd0d..3fac8de 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -47,7 +47,7 @@ while true
nodename.match(/(\d+)$/)[1].to_i,
nodename).
first
- raise "Fatal: Node does not exist: #{nodename}" if !node
+ next if !node
puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} state #{nodestate}#{' (has_no_job)' if has_no_job}"
node_info_was = node.info.dup
commit eb430e776034afae40b175e3ed3e97a76dcd67da
Author: Ward Vandewege <ward at curoverse.com>
Date: Mon Aug 11 15:37:41 2014 -0400
Only symbols in the info hash.
refs #3239
diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 2a0bccc..2d6bd0d 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -51,8 +51,8 @@ while true
puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} state #{nodestate}#{' (has_no_job)' if has_no_job}"
node_info_was = node.info.dup
- node.info[:slurm_state] = nodestate
- node.info[:running_job_uuid] = nil if has_no_job
+ node.info['slurm_state'] = nodestate
+ node.info['running_job_uuid'] = nil if has_no_job
if node_info_was != node.info and not node.save
raise "Fail: update node #{node.uuid} state #{nodestate}"
end
@@ -91,8 +91,8 @@ while true
first
raise "Fatal: Node does not exist: #{nodename}" if !node
puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} running_job_uuid #{running_job_uuid}"
- if node.info[:running_job_uuid] != running_job_uuid
- node.info[:running_job_uuid] = running_job_uuid
+ if node.info['running_job_uuid'] != running_job_uuid
+ node.info['running_job_uuid'] = running_job_uuid
if not node.save
raise "Fail: update node #{node.uuid} running_job_uuid #{running_job_uuid}"
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list