[ARVADOS] updated: 386faadf691e444b71d6c96e7c00792d9a0ba2c7
Git user
git at public.curoverse.com
Thu Mar 16 13:25:32 EDT 2017
Summary of changes:
services/crunch-run/crunchrun.go | 35 ++++++++++++++++++++++-------------
services/crunch-run/crunchrun_test.go | 6 ++++--
2 files changed, 26 insertions(+), 15 deletions(-)
via 386faadf691e444b71d6c96e7c00792d9a0ba2c7 (commit)
from 8513e042b0033599146546bd3a2ad903c67c9ff5 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 386faadf691e444b71d6c96e7c00792d9a0ba2c7
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Mar 16 13:24:58 2017 -0400
10218: Wait for container to be started (not just created) before trying to cancel it.
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index f71ec4d..9ca4f80 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -91,8 +91,6 @@ type ContainerRunner struct {
CleanupTempDir []string
Binds []string
OutputPDH *string
- CancelLock sync.Mutex
- Cancelled bool
SigChan chan os.Signal
ArvMountExit chan error
finalState string
@@ -115,6 +113,10 @@ type ContainerRunner struct {
// parent to be X" feature even on sites where the "specify
// cgroup parent" feature breaks.
setCgroupParent string
+
+ cStateLock sync.Mutex
+ cStarted bool // StartContainer() succeeded
+ cCancelled bool // StopContainer() invoked
}
// SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -127,6 +129,7 @@ func (runner *ContainerRunner) SetupSignals() {
go func(sig chan os.Signal) {
<-sig
+ log.Print("signal handler calling runner.stop()")
runner.stop()
signal.Stop(sig)
}(runner.SigChan)
@@ -134,13 +137,13 @@ func (runner *ContainerRunner) SetupSignals() {
// stop the underlying Docker container.
func (runner *ContainerRunner) stop() {
- runner.CancelLock.Lock()
- defer runner.CancelLock.Unlock()
- if runner.Cancelled {
+ runner.cStateLock.Lock()
+ defer runner.cStateLock.Unlock()
+ if runner.cCancelled {
return
}
- runner.Cancelled = true
- if runner.ContainerID != "" {
+ runner.cCancelled = true
+ if runner.cStarted {
err := runner.Docker.StopContainer(runner.ContainerID, 10)
if err != nil {
log.Printf("StopContainer failed: %s", err)
@@ -648,10 +651,16 @@ func (runner *ContainerRunner) CreateContainer() error {
// StartContainer starts the docker container created by CreateContainer.
func (runner *ContainerRunner) StartContainer() error {
runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
+ runner.cStateLock.Lock()
+ defer runner.cStateLock.Unlock()
+ if runner.cCancelled {
+ return ErrCancelled
+ }
err := runner.Docker.StartContainer(runner.ContainerID, &runner.HostConfig)
if err != nil {
return fmt.Errorf("could not start container: %v", err)
}
+ runner.cStarted = true
return nil
}
@@ -896,9 +905,9 @@ func (runner *ContainerRunner) CommitLogs() error {
// UpdateContainerRunning updates the container state to "Running"
func (runner *ContainerRunner) UpdateContainerRunning() error {
- runner.CancelLock.Lock()
- defer runner.CancelLock.Unlock()
- if runner.Cancelled {
+ runner.cStateLock.Lock()
+ defer runner.cStateLock.Unlock()
+ if runner.cCancelled {
return ErrCancelled
}
return runner.ArvClient.Update("containers", runner.Container.UUID,
@@ -942,9 +951,9 @@ func (runner *ContainerRunner) UpdateContainerFinal() error {
// IsCancelled returns the value of Cancelled, with goroutine safety.
func (runner *ContainerRunner) IsCancelled() bool {
- runner.CancelLock.Lock()
- defer runner.CancelLock.Unlock()
- return runner.Cancelled
+ runner.cStateLock.Lock()
+ defer runner.cStateLock.Unlock()
+ return runner.cCancelled
}
// NewArvLogWriter creates an ArvLogWriter
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 1286659..fcf2456 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -11,6 +11,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "runtime/pprof"
"sort"
"strings"
"sync"
@@ -525,7 +526,7 @@ func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
api := &ArvTestClient{}
kc := &KeepTestClient{}
cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
- cr.Cancelled = true
+ cr.cCancelled = true
cr.finalState = "Cancelled"
err := cr.UpdateContainerFinal()
@@ -748,7 +749,7 @@ func (s *TestSuite) TestFullRunSetCwd(c *C) {
func (s *TestSuite) TestStopOnSignal(c *C) {
s.testStopContainer(c, func(cr *ContainerRunner) {
go func() {
- for cr.ContainerID == "" {
+ for !cr.cStarted {
time.Sleep(time.Millisecond)
}
cr.SigChan <- syscall.SIGINT
@@ -802,6 +803,7 @@ func (s *TestSuite) testStopContainer(c *C, setup func(cr *ContainerRunner)) {
}()
select {
case <-time.After(20 * time.Second):
+ pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
c.Fatal("timed out")
case err = <-done:
c.Check(err, IsNil)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list