[ARVADOS] created: 3365d47ab4f504a1e849852691313cddd89d0f15
git at public.curoverse.com
git at public.curoverse.com
Wed Mar 18 12:19:57 EDT 2015
at 3365d47ab4f504a1e849852691313cddd89d0f15 (commit)
commit 3365d47ab4f504a1e849852691313cddd89d0f15
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Mar 18 12:23:21 2015 -0400
5500: Add SLURM "Communication connection failure" to pattern of temporary node
failures. "pip install" failure returns temporary error status (111) so the
task can be retried.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index cc47bbe..d40df90 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1232,7 +1232,7 @@ sub preprocess_stderr
# whoa.
$main::please_freeze = 1;
}
- elsif ($line =~ /srun: error: (Node failure on|Unable to create job step) /) {
+ elsif ($line =~ /srun: error: (Node failure on|Unable to create job step|.*: Communication connection failure)/) {
$jobstep[$job]->{node_fail} = 1;
ban_node_by_slot($jobstep[$job]->{slotindex});
}
@@ -1876,9 +1876,9 @@ if (@ARGV) {
my $venv_dir = "$job_work/.arvados.venv";
my $venv_built = -e "$venv_dir/bin/activate";
if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
- shell_or_die("virtualenv", "--quiet", "--system-site-packages",
+ shell_or_die(undef, "virtualenv", "--quiet", "--system-site-packages",
"--python=python2.7", $venv_dir);
- shell_or_die("$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src);
+ shell_or_die(111, "$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src);
$venv_built = 1;
$Log->("Built Python SDK virtualenv");
}
@@ -1974,12 +1974,12 @@ if ((-d $python_dir) and can_run("python2.7") and
}
if (-e "$destdir/crunch_scripts/install") {
- shell_or_die ("$destdir/crunch_scripts/install", $install_dir);
+ shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir);
} elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
# Old version
- shell_or_die ("./tests/autotests.sh", $install_dir);
+ shell_or_die (undef, "./tests/autotests.sh", $install_dir);
} elsif (-e "./install.sh") {
- shell_or_die ("./install.sh", $install_dir);
+ shell_or_die (undef, "./install.sh", $install_dir);
}
if ($commit) {
@@ -2000,15 +2000,24 @@ sub can_run {
sub shell_or_die
{
+ my $tempfail = shift;
+
if ($ENV{"DEBUG"}) {
print STDERR "@_\n";
}
if (system (@_) != 0) {
my $err = $!;
- my $exitstatus = sprintf("exit %d signal %d", $? >> 8, $? & 0x7f);
+ my $code = $?;
+ my $exitstatus = sprintf("exit %d signal %d", $code >> 8, $code & 0x7f);
open STDERR, ">&STDERR_ORIG";
system ("cat $destdir.log >&2");
- die "@_ failed ($err): $exitstatus";
+ print STDERR "@_ failed ($err): $exitstatus";
+ if ($tempfail) {
+ exit $tempfail;
+ }
+ else {
+ exit ($code >> 8);
+ }
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list