[ARVADOS] updated: b43c667b8457fbc10a7a46e5e0d7da9e5d21d28e

git at public.curoverse.com git at public.curoverse.com
Mon Jun 1 13:54:02 EDT 2015


Summary of changes:
 sdk/cli/bin/crunch-job | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

       via  b43c667b8457fbc10a7a46e5e0d7da9e5d21d28e (commit)
       via  6fefc722b116a713c068e8f91cb97910d4c0c972 (commit)
      from  a1b5da5e536e8bfc58187965d11312d1fe883972 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b43c667b8457fbc10a7a46e5e0d7da9e5d21d28e
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun May 31 05:48:22 2015 -0400

    6146: Better log message.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 33407fb..786e18f 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1284,7 +1284,11 @@ sub check_squeue
         && $jobstep->{killtime} <= time
         && $jobstep->{stderr_at} < $last_squeue_check)
     {
-      Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task disappeared from slurm queue)");
+      my $sincewhen = "";
+      if ($jobstep->{stderr_at}) {
+        $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
+      }
+      Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
       killem ($pid);
     }
   }

commit 6fefc722b116a713c068e8f91cb97910d4c0c972
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun May 31 05:32:55 2015 -0400

    6146: Use new SLURM_JOB_ID env var instead of old SLURM_JOBID

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index c536da6..33407fb 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -139,7 +139,7 @@ if (defined $job_api_token) {
   $ENV{ARVADOS_API_TOKEN} = $job_api_token;
 }
 
-my $have_slurm = exists $ENV{SLURM_JOBID} && exists $ENV{SLURM_NODELIST};
+my $have_slurm = exists $ENV{SLURM_JOB_ID} && exists $ENV{SLURM_NODELIST};
 
 
 $SIG{'USR1'} = sub
@@ -1296,7 +1296,7 @@ sub check_squeue
   }
 
   # get a list of steps still running
-  my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOBID}\E --steps --format='%i %j' --noheader`;
+  my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOB_ID}\E --steps --format='%i %j' --noheader`;
   if ($? != 0)
   {
     Log(undef, "warning: squeue exit status $? ($!)");
@@ -1310,7 +1310,7 @@ sub check_squeue
   {
     if (/^(\d+)\.(\d+) (\S+)/)
     {
-      if ($1 eq $ENV{SLURM_JOBID})
+      if ($1 eq $ENV{SLURM_JOB_ID})
       {
 	$ok{$3} = 1;
       }
@@ -1344,7 +1344,7 @@ sub release_allocation
   if ($have_slurm)
   {
     Log (undef, "release job allocation");
-    system "scancel $ENV{SLURM_JOBID}";
+    system "scancel $ENV{SLURM_JOB_ID}";
   }
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list