[ARVADOS] created: 19ad5c59b064088c58136f5387fdf029b754ee36

Git user git at public.curoverse.com
Mon Sep 12 15:17:31 EDT 2016


        at  19ad5c59b064088c58136f5387fdf029b754ee36 (commit)


commit 19ad5c59b064088c58136f5387fdf029b754ee36
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Sep 12 15:17:23 2016 -0400

    10004: Add check_sinfo() to end srun_sync() if any of the allocated nodes are
    no longer "alloc" according to sinfo.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 39238b0..48f9669 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -355,6 +355,7 @@ my @jobstep_done = ();
 my @jobstep_tomerge = ();
 my $jobstep_tomerge_level = 0;
 my $squeue_checked = 0;
+my $sinfo_checked = 0;
 my $latest_refresh = scalar time;
 
 
@@ -1401,6 +1402,29 @@ sub check_squeue
   }
 }
 
+sub check_sinfo
+{
+  my $last_sinfo_check = $sinfo_checked;
+
+  # Do not call `sinfo` more than once every 15 seconds.
+  return if $last_sinfo_check > time - 15;
+  $sinfo_checked = time;
+
+  my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
+  if ($? != 0)
+  {
+    Log(undef, "warning: sinfo exit status $? ($!)");
+    return;
+  }
+  chop @sinfo;
+
+  foreach (@sinfo)
+  {
+    if ($_ != "alloc" && $_ != "alloc*") {
+      $main::please_freeze = 1;
+    }
+  }
+}
 
 sub release_allocation
 {
@@ -1906,7 +1930,6 @@ sub freezeunquote
   return $s;
 }
 
-
 sub srun_sync
 {
   my $srunargs = shift;
@@ -1961,6 +1984,7 @@ sub srun_sync
     if (!$busy || ($latest_refresh + 2 < scalar time)) {
       check_refresh_wanted();
       check_squeue();
+      check_sinfo();
     }
     if (!$busy) {
       select(undef, undef, undef, 0.1);

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list