[ARVADOS] created: 51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4

git at public.curoverse.com git at public.curoverse.com
Tue Aug 12 16:41:44 EDT 2014


        at  51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4 (commit)


commit 51422b1ccfa3b736a3ebdf2a8fc61695cbc64eb4
Author: Ward Vandewege <ward at curoverse.com>
Date:   Tue Aug 12 16:40:38 2014 -0400

    update_node_attributes: be aware of more possible slurm states
    
    refs #3239

diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 3fac8de..b6b1720 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -41,7 +41,8 @@ while true
 
     nodenames.each do |nodename|
       if @slurm_state[nodename] != nodestate
-        has_no_job = ! ['alloc','comp'].index(nodestate)
+        has_no_job = ! ['alloc','comp','draining','unknown'].index(nodestate)
+
         node = Node.
           where('slot_number=? and hostname=?',
                 nodename.match(/(\d+)$/)[1].to_i,

commit 1adc360342488fa931ccf7518ff9a04f462a36fe
Author: Ward Vandewege <ward at curoverse.com>
Date:   Tue Aug 12 16:38:50 2014 -0400

    crunch-dispatch: be aware of more possible slurm states
    
    refs #3239

diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index f7d01f0..f1d7aa4 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -68,10 +68,10 @@ class Dispatcher
       begin
         sinfo.split("\n").
           each do |line|
-          re = line.match /(\S+?):+(idle|alloc|down)/
+          re = line.match /(\S+?):+(idle|alloc|down|draining|drained|unknown)/
           next if !re
 
-          has_no_job = ! ['alloc','comp'].index(re[2])
+          has_no_job = ! ['alloc','comp','draining','unknown'].index(re[2])
 
           # sinfo tells us about a node N times if it is shared by N partitions
           next if node_seen[re[1]]

commit 3b613232f752df1b5ba520b5478cc776cea507b8
Author: Ward Vandewege <ward at curoverse.com>
Date:   Mon Aug 11 16:33:57 2014 -0400

    crunch_dispatch: when a Node object state is changed to not alloc/comp,
    make sure to change the running_job_uuid field to nil.
    
    refs #3239

diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 5a990f0..f7d01f0 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -71,6 +71,8 @@ class Dispatcher
           re = line.match /(\S+?):+(idle|alloc|down)/
           next if !re
 
+          has_no_job = ! ['alloc','comp'].index(re[2])
+
           # sinfo tells us about a node N times if it is shared by N partitions
           next if node_seen[re[1]]
           node_seen[re[1]] = true
@@ -82,6 +84,7 @@ class Dispatcher
             if node
               $stderr.puts "dispatch: update #{re[1]} state to #{re[2]}"
               node.info['slurm_state'] = re[2]
+              node.info['running_job_uuid'] = nil if has_no_job
               if not node.save
                 $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
               end

commit 96f923788ee215d0ed1aa58e1652afd78c342d2e
Author: Ward Vandewege <ward at curoverse.com>
Date:   Mon Aug 11 15:39:28 2014 -0400

    Non-existant node object is not a fatal state; node manager will create
    those objects as needed.
    
    refs #3239

diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 2d6bd0d..3fac8de 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -47,7 +47,7 @@ while true
                 nodename.match(/(\d+)$/)[1].to_i,
                 nodename).
           first
-        raise "Fatal: Node does not exist: #{nodename}" if !node
+        next if !node
 
         puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} state #{nodestate}#{' (has_no_job)' if has_no_job}"
         node_info_was = node.info.dup

commit eb430e776034afae40b175e3ed3e97a76dcd67da
Author: Ward Vandewege <ward at curoverse.com>
Date:   Mon Aug 11 15:37:41 2014 -0400

    Only symbols in the info hash.
    
    refs #3239

diff --git a/services/api/script/update_node_attributes.rb b/services/api/script/update_node_attributes.rb
index 2a0bccc..2d6bd0d 100755
--- a/services/api/script/update_node_attributes.rb
+++ b/services/api/script/update_node_attributes.rb
@@ -51,8 +51,8 @@ while true
 
         puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} state #{nodestate}#{' (has_no_job)' if has_no_job}"
         node_info_was = node.info.dup
-        node.info[:slurm_state] = nodestate
-        node.info[:running_job_uuid] = nil if has_no_job
+        node.info['slurm_state'] = nodestate
+        node.info['running_job_uuid'] = nil if has_no_job
         if node_info_was != node.info and not node.save
           raise "Fail: update node #{node.uuid} state #{nodestate}"
         end
@@ -91,8 +91,8 @@ while true
           first
         raise "Fatal: Node does not exist: #{nodename}" if !node
         puts "Node #{node.uuid} slot #{node.slot_number} name #{node.hostname} running_job_uuid #{running_job_uuid}"
-        if node.info[:running_job_uuid] != running_job_uuid
-          node.info[:running_job_uuid] = running_job_uuid
+        if node.info['running_job_uuid'] != running_job_uuid
+          node.info['running_job_uuid'] = running_job_uuid
           if not node.save
             raise "Fail: update node #{node.uuid} running_job_uuid #{running_job_uuid}"
           end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list