[arvados] created: 2.7.0-5522-gb015c9e45f
git repository hosting
git at public.arvados.org
Wed Dec 6 19:20:49 UTC 2023
at b015c9e45f2a81b7069e5ecde3e0e9e0c5c619fa (commit)
commit b015c9e45f2a81b7069e5ecde3e0e9e0c5c619fa
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 6 14:01:00 2023 -0500
21258: Fix "guaranteed broken" stub instance not broken.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index e2f0db1efb..51c2c3d6a3 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -217,6 +217,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
return test.CapacityError{InstanceTypeSpecific: true}
}
n := atomic.AddInt32(&vmCount, 1)
+ c.Logf("SetupVM: instance %s n=%d", stubvm.Instance(), n)
stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
stubvm.ExecuteContainer = executeContainer
@@ -238,7 +239,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
// 1 instance is completely broken, ensuring
// the boot_outcomes{outcome="failure"} metric
// is not zero
- stubvm.CrunchRunCrashRate = 1
+ stubvm.Broken = time.Now()
default:
stubvm.CrunchRunCrashRate = 0.1
stubvm.ArvMountDeadlockRate = 0.1
commit 95e0edffd5914e136fe9baac16c99858306aeeba
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 6 12:47:53 2023 -0500
21258: Fix "container completed twice" testing bug.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 0a74d97606..2265be6e16 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -239,6 +239,8 @@ type StubVM struct {
killing map[string]bool
lastPID int64
deadlocked string
+ stubprocs sync.WaitGroup
+ destroying bool
sync.Mutex
}
@@ -267,6 +269,17 @@ func (svm *StubVM) Instance() stubInstance {
}
func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+ // Ensure we don't start any new stubprocs after Destroy()
+ // has started Wait()ing for stubprocs to end.
+ svm.Lock()
+ if svm.destroying {
+ svm.Unlock()
+ return 1
+ }
+ svm.stubprocs.Add(1)
+ defer svm.stubprocs.Done()
+ svm.Unlock()
+
stdinData, err := ioutil.ReadAll(stdin)
if err != nil {
fmt.Fprintf(stderr, "error reading stdin: %s\n", err)
@@ -304,7 +317,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
pid := svm.lastPID
svm.running[uuid] = stubProcess{pid: pid}
svm.Unlock()
+
time.Sleep(svm.CrunchRunDetachDelay)
+
+ svm.Lock()
+ defer svm.Unlock()
+ if svm.destroying {
+ fmt.Fprint(stderr, "crunch-run: killed by system shutdown\n")
+ return 9
+ }
fmt.Fprintf(stderr, "starting %s\n", uuid)
logger := svm.sis.logger.WithFields(logrus.Fields{
"Instance": svm.id,
@@ -312,13 +333,18 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
"PID": pid,
})
logger.Printf("[test] starting crunch-run stub")
+ svm.stubprocs.Add(1)
go func() {
+ defer svm.stubprocs.Done()
var ctr arvados.Container
var started, completed bool
defer func() {
logger.Print("[test] exiting crunch-run stub")
svm.Lock()
defer svm.Unlock()
+ if svm.destroying {
+ return
+ }
if svm.running[uuid].pid != pid {
bugf := svm.sis.driver.Bugf
if bugf == nil {
@@ -358,8 +384,10 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
svm.Lock()
killed := svm.killing[uuid]
+ delete(svm.killing, uuid)
+ destroying := svm.destroying
svm.Unlock()
- if killed || wantCrashEarly {
+ if killed || wantCrashEarly || destroying {
return
}
@@ -451,6 +479,10 @@ func (si stubInstance) Destroy() error {
if math_rand.Float64() < si.svm.sis.driver.ErrorRateDestroy {
return errors.New("instance could not be destroyed")
}
+ si.svm.Lock()
+ si.svm.destroying = true
+ si.svm.Unlock()
+ si.svm.stubprocs.Wait()
si.svm.SSHService.Close()
sis.mtx.Lock()
defer sis.mtx.Unlock()
commit 5f8b831515460a7fda4068cb3b2d8a4fa2aaccce
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 6 10:22:24 2023 -0500
21258: Fix flaky boot_outcomes metric test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 33d7f4e9ac..e2f0db1efb 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -208,7 +208,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
return int(rand.Uint32() & 0x3)
}
var countCapacityErrors int64
- n := 0
+ vmCount := int32(0)
s.stubDriver.Queue = queue
s.stubDriver.SetupVM = func(stubvm *test.StubVM) error {
if pt := stubvm.Instance().ProviderType(); pt == test.InstanceType(6).ProviderType {
@@ -216,7 +216,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
atomic.AddInt64(&countCapacityErrors, 1)
return test.CapacityError{InstanceTypeSpecific: true}
}
- n++
+ n := atomic.AddInt32(&vmCount, 1)
stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
stubvm.ExecuteContainer = executeContainer
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list