[ARVADOS] created: 1.1.1-213-g91b19e3
Git user
git at public.curoverse.com
Mon Dec 11 12:45:42 EST 2017
at 91b19e3f339dcf1e8edfdd2feff8fb399dbb4067 (commit)
commit 91b19e3f339dcf1e8edfdd2feff8fb399dbb4067
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Dec 11 12:42:57 2017 -0500
12550: Fix race: read buffered data from stderr after child exits.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index f2e9fc2..86a1da8 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1188,6 +1188,8 @@ sub reapchildren
. $slot[$proc{$pid}->{slot}]->{cpu});
my $jobstepidx = $proc{$pid}->{jobstepidx};
+ readfrompipes_after_exit ($jobstepidx);
+
$children_reaped++;
my $elapsed = time - $proc{$pid}->{time};
my $Jobstep = $jobstep[$jobstepidx];
@@ -1259,7 +1261,6 @@ sub reapchildren
$Jobstep->{finishtime} = time;
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
- process_stderr_final ($jobstepidx);
Log ($jobstepidx, sprintf("task output (%d bytes): %s",
length($Jobstep->{'arvados_task'}->{output}),
$Jobstep->{'arvados_task'}->{output}));
@@ -1562,15 +1563,32 @@ sub preprocess_stderr
}
-sub process_stderr_final
+# Read whatever is still available on its stderr+stdout pipes after
+# the given child process has exited.
+sub readfrompipes_after_exit
{
- my $jobstepidx = shift;
- preprocess_stderr ($jobstepidx);
+ my $jobstepidx = shift;
+
+ # The fact that the child has exited allows some convenient
+ # simplifications: (1) all data must have already been written, so
+ # there's no need to wait for more once sysread returns 0; (2) the
+ # total amount of data available is bounded by the pipe buffer size,
+ # so it's safe to read everything into one string.
+ while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) {
+ $jobstep[$jobstepidx]->{stderr_at} = time;
+ $jobstep[$jobstepidx]->{stderr} .= $buf;
+ }
+ if ($jobstep[$jobstepidx]->{stdout_r}) {
+ while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) {
+ $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
+ }
+ }
+ preprocess_stderr ($jobstepidx);
- map {
- Log ($jobstepidx, "stderr $_");
- } split ("\n", $jobstep[$jobstepidx]->{stderr});
- $jobstep[$jobstepidx]->{stderr} = '';
+ map {
+ Log ($jobstepidx, "stderr $_");
+ } split ("\n", $jobstep[$jobstepidx]->{stderr});
+ $jobstep[$jobstepidx]->{stderr} = '';
}
sub fetch_block
@@ -2036,8 +2054,7 @@ sub srun_sync
}
my $exited = $?;
- 1 while readfrompipes();
- process_stderr_final ($jobstepidx);
+ readfrompipes_after_exit ($jobstepidx);
Log (undef, "$label: exit ".exit_status_s($exited));
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list