[ARVADOS] created: bc8139e01e7c5d08bf045a2c1604e6e2dc6b4572
Git user
git at public.curoverse.com
Mon Sep 12 22:13:17 EDT 2016
at bc8139e01e7c5d08bf045a2c1604e6e2dc6b4572 (commit)
commit bc8139e01e7c5d08bf045a2c1604e6e2dc6b4572
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Sep 12 22:13:11 2016 -0400
9924: Set $working_slot_count = 0 on "Unable to confirm allocation" error to
exit EX_RETRY_UNLOCKED instead of exit 1 (simple failure.)
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index e0aff31..948b77e 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1512,6 +1512,7 @@ sub preprocess_stderr
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
# whoa.
$main::please_freeze = 1;
+ $working_slot_count = 0;
}
elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
$jobstep[$jobstepidx]->{tempfail} = 1;
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list