[arvados] updated: 2.6.0-470-g4c36c4004

git repository hosting git at public.arvados.org
Wed Aug 9 21:09:03 UTC 2023


Summary of changes:
 lib/dispatchcloud/dispatcher_test.go          |  2 ++
 lib/dispatchcloud/scheduler/run_queue_test.go |  6 ------
 lib/dispatchcloud/test/stub_driver.go         | 11 +++++++++++
 lib/dispatchcloud/worker/pool.go              |  4 ++++
 lib/dispatchcloud/worker/worker.go            |  2 ++
 5 files changed, 19 insertions(+), 6 deletions(-)

       via  4c36c40047be255a2819adeca44c5f1fea860bad (commit)
       via  e5394906b154b630699c0edd4add36eca34611b3 (commit)
      from  9a857a4f86dc4cbe5d8214bfb3ea9e8d4dac6a76 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 4c36c40047be255a2819adeca44c5f1fea860bad
Author: Tom Clegg <tom at curii.com>
Date:   Wed Aug 9 17:07:35 2023 -0400

    20457: Fix flaky test.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index ea2611959..a981d83a2 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -368,6 +368,7 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
 	sr := getInstances()
 	c.Check(len(sr.Items), check.Equals, 0)
 
+	s.stubDriver.ErrorRateCreate = 0
 	ch := s.disp.pool.Subscribe()
 	defer s.disp.pool.Unsubscribe(ch)
 	ok := s.disp.pool.Create(test.InstanceType(1))

commit e5394906b154b630699c0edd4add36eca34611b3
Author: Tom Clegg <tom at curii.com>
Date:   Wed Aug 9 17:06:40 2023 -0400

    20457: Exercise quota handling in dispatcher chaos test.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 4583a596e..ea2611959 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -52,6 +52,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
 		ErrorRateCreate:           0.1,
 		ErrorRateDestroy:          0.1,
 		MinTimeBetweenCreateCalls: time.Millisecond,
+		QuotaMaxInstances:         10,
 	}
 
 	// We need the postgresql connection info from the integration
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index f407ac848..4359ae03b 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -29,12 +29,6 @@ var (
 	}()
 )
 
-type stubQuotaError struct {
-	error
-}
-
-func (stubQuotaError) IsQuotaError() bool { return true }
-
 type stubPool struct {
 	notify    <-chan struct{}
 	unalloc   map[arvados.InstanceType]int // idle+booting+unknown
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 5ca83d263..826e5c1af 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -54,6 +54,8 @@ type StubDriver struct {
 	MinTimeBetweenCreateCalls    time.Duration
 	MinTimeBetweenInstancesCalls time.Duration
 
+	QuotaMaxInstances int
+
 	// If true, Create and Destroy calls block until Release() is
 	// called.
 	HoldCloudOps bool
@@ -124,6 +126,9 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
 	if math_rand.Float64() < sis.driver.ErrorRateCreate {
 		return nil, fmt.Errorf("StubInstanceSet: rand < ErrorRateCreate %f", sis.driver.ErrorRateCreate)
 	}
+	if max := sis.driver.QuotaMaxInstances; max > 0 && len(sis.servers) >= max {
+		return nil, QuotaError{fmt.Errorf("StubInstanceSet: reached QuotaMaxInstances %d", max)}
+	}
 	sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
 	ak := sis.driver.AuthorizedKeys
 	if authKey != nil {
@@ -489,3 +494,9 @@ func copyTags(src cloud.InstanceTags) cloud.InstanceTags {
 func (si stubInstance) PriceHistory(arvados.InstanceType) []cloud.InstancePrice {
 	return nil
 }
+
+type QuotaError struct {
+	error
+}
+
+func (QuotaError) IsQuotaError() bool { return true }
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index f79bad98f..15b0dbcde 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -1053,6 +1053,10 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 	}
 
 	if wp.atQuotaUntilFewerInstances > len(wp.workers)+len(wp.creating) {
+		// After syncing, there are fewer instances (including
+		// pending creates) than there were last time we saw a
+		// quota error.  This might mean it's now possible to
+		// create new instances.  Reset our "at quota" state.
 		wp.atQuotaUntilFewerInstances = 0
 	}
 
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 8b4be1a3c..7d94146cf 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -639,10 +639,12 @@ func (wkr *worker) Close() {
 	for uuid, rr := range wkr.running {
 		wkr.logger.WithField("ContainerUUID", uuid).Info("crunch-run process abandoned")
 		rr.Close()
+		delete(wkr.running, uuid)
 	}
 	for uuid, rr := range wkr.starting {
 		wkr.logger.WithField("ContainerUUID", uuid).Info("crunch-run process abandoned")
 		rr.Close()
+		delete(wkr.starting, uuid)
 	}
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list