[ARVADOS] updated: 5d981be8cc26f09868904d41b0ebbff12bba1d3b

Git user git at public.curoverse.com
Fri Apr 1 15:40:47 EDT 2016


Summary of changes:
 sdk/cli/bin/crunch-job | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

       via  5d981be8cc26f09868904d41b0ebbff12bba1d3b (commit)
      from  a925c8936cba33e6d2504bd984b52750fafc638a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 5d981be8cc26f09868904d41b0ebbff12bba1d3b
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Apr 1 15:40:13 2016 -0400

    8811: Fixup preprocess_stderr bypass.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 168f238..05164ac 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1473,6 +1473,9 @@ sub readfrompipes
 sub preprocess_stderr
 {
   my $jobstepidx = shift;
+  # slotindex is only defined for children running Arvados job tasks.
+  # Be prepared to handle the undef case (for setup srun calls, etc.).
+  my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
 
   while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
     my $line = $1;
@@ -1482,19 +1485,16 @@ sub preprocess_stderr
       # whoa.
       $main::please_freeze = 1;
     }
-    elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
-      # Skip the following tempfail checks if this srun proc isn't
-      # attached to a particular worker slot.
-    }
     elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
-      my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
-      $slot[$job_slot_index]->{node}->{fail_count}++;
       $jobstep[$jobstepidx]->{tempfail} = 1;
-      ban_node_by_slot($job_slot_index);
+      if (defined($job_slot_index)) {
+        $slot[$job_slot_index]->{node}->{fail_count}++;
+        ban_node_by_slot($job_slot_index);
+      }
     }
     elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
       $jobstep[$jobstepidx]->{tempfail} = 1;
-      ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
+      ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
     }
     elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
       $jobstep[$jobstepidx]->{tempfail} = 1;

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list