[ARVADOS] updated: 5d981be8cc26f09868904d41b0ebbff12bba1d3b
Git user
git at public.curoverse.com
Fri Apr 1 15:40:47 EDT 2016
Summary of changes:
sdk/cli/bin/crunch-job | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
via 5d981be8cc26f09868904d41b0ebbff12bba1d3b (commit)
from a925c8936cba33e6d2504bd984b52750fafc638a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 5d981be8cc26f09868904d41b0ebbff12bba1d3b
Author: Brett Smith <brett at curoverse.com>
Date: Fri Apr 1 15:40:13 2016 -0400
8811: Fixup preprocess_stderr bypass.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 168f238..05164ac 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1473,6 +1473,9 @@ sub readfrompipes
sub preprocess_stderr
{
my $jobstepidx = shift;
+ # slotindex is only defined for children running Arvados job tasks.
+ # Be prepared to handle the undef case (for setup srun calls, etc.).
+ my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
my $line = $1;
@@ -1482,19 +1485,16 @@ sub preprocess_stderr
# whoa.
$main::please_freeze = 1;
}
- elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
- # Skip the following tempfail checks if this srun proc isn't
- # attached to a particular worker slot.
- }
elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
- my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
- $slot[$job_slot_index]->{node}->{fail_count}++;
$jobstep[$jobstepidx]->{tempfail} = 1;
- ban_node_by_slot($job_slot_index);
+ if (defined($job_slot_index)) {
+ $slot[$job_slot_index]->{node}->{fail_count}++;
+ ban_node_by_slot($job_slot_index);
+ }
}
elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
$jobstep[$jobstepidx]->{tempfail} = 1;
- ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
+ ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
}
elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
$jobstep[$jobstepidx]->{tempfail} = 1;
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list