[ARVADOS] created: 1.1.1-213-g91b19e3

Git user git at public.curoverse.com
Mon Dec 11 12:45:42 EST 2017


        at  91b19e3f339dcf1e8edfdd2feff8fb399dbb4067 (commit)


commit 91b19e3f339dcf1e8edfdd2feff8fb399dbb4067
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Dec 11 12:42:57 2017 -0500

    12550: Fix race: read buffered data from stderr after child exits.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index f2e9fc2..86a1da8 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1188,6 +1188,8 @@ sub reapchildren
                     . $slot[$proc{$pid}->{slot}]->{cpu});
     my $jobstepidx = $proc{$pid}->{jobstepidx};
 
+    readfrompipes_after_exit ($jobstepidx);
+
     $children_reaped++;
     my $elapsed = time - $proc{$pid}->{time};
     my $Jobstep = $jobstep[$jobstepidx];
@@ -1259,7 +1261,6 @@ sub reapchildren
     $Jobstep->{finishtime} = time;
     $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
     retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-    process_stderr_final ($jobstepidx);
     Log ($jobstepidx, sprintf("task output (%d bytes): %s",
                               length($Jobstep->{'arvados_task'}->{output}),
                               $Jobstep->{'arvados_task'}->{output}));
@@ -1562,15 +1563,32 @@ sub preprocess_stderr
 }
 
 
-sub process_stderr_final
+# Read whatever is still available on its stderr+stdout pipes after
+# the given child process has exited.
+sub readfrompipes_after_exit
 {
-  my $jobstepidx = shift;
-  preprocess_stderr ($jobstepidx);
+    my $jobstepidx = shift;
+
+    # The fact that the child has exited allows some convenient
+    # simplifications: (1) all data must have already been written, so
+    # there's no need to wait for more once sysread returns 0; (2) the
+    # total amount of data available is bounded by the pipe buffer size,
+    # so it's safe to read everything into one string.
+    while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) {
+        $jobstep[$jobstepidx]->{stderr_at} = time;
+        $jobstep[$jobstepidx]->{stderr} .= $buf;
+    }
+    if ($jobstep[$jobstepidx]->{stdout_r}) {
+        while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) {
+            $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
+        }
+    }
+    preprocess_stderr ($jobstepidx);
 
-  map {
-    Log ($jobstepidx, "stderr $_");
-  } split ("\n", $jobstep[$jobstepidx]->{stderr});
-  $jobstep[$jobstepidx]->{stderr} = '';
+    map {
+        Log ($jobstepidx, "stderr $_");
+    } split ("\n", $jobstep[$jobstepidx]->{stderr});
+    $jobstep[$jobstepidx]->{stderr} = '';
 }
 
 sub fetch_block
@@ -2036,8 +2054,7 @@ sub srun_sync
   }
   my $exited = $?;
 
-  1 while readfrompipes();
-  process_stderr_final ($jobstepidx);
+  readfrompipes_after_exit ($jobstepidx);
 
   Log (undef, "$label: exit ".exit_status_s($exited));
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list