[arvados] created: 2.7.0-5593-g9251549928

git repository hosting git at public.arvados.org
Thu Dec 7 19:01:29 UTC 2023


        at  9251549928dd5206d4a14e5f9811caa66aa64c65 (commit)


commit 9251549928dd5206d4a14e5f9811caa66aa64c65
Author: Tom Clegg <tom at curii.com>
Date:   Thu Dec 7 14:01:09 2023 -0500

    21254: Fix racy keep-balance test.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index b7b3fb6123..fefd2c6c1b 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -15,6 +15,7 @@ import (
 	"os"
 	"strings"
 	"sync"
+	"syscall"
 	"time"
 
 	"git.arvados.org/arvados.git/lib/config"
@@ -639,7 +640,7 @@ func (s *runSuite) TestRunForever(c *check.C) {
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
-	s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
+	s.config.Collections.BalancePeriod = arvados.Duration(100 * time.Millisecond)
 	srv := s.newServer(&opts)
 
 	done := make(chan bool)
@@ -648,14 +649,34 @@ func (s *runSuite) TestRunForever(c *check.C) {
 		close(done)
 	}()
 
+	procself, err := os.FindProcess(os.Getpid())
+	c.Assert(err, check.IsNil)
+
 	// Each run should send 4 pull lists + 4 trash lists. The
 	// first run should also send 4 empty trash lists at
 	// startup. We should complete all four runs in much less than
 	// a second.
+	completedRuns := 0
 	for t0 := time.Now(); time.Since(t0) < 10*time.Second; {
-		if pullReqs.Count() >= 16 && trashReqs.Count() == pullReqs.Count()+4 {
+		pulls := pullReqs.Count()
+		if pulls >= 16 && trashReqs.Count() == pulls+4 {
 			break
 		}
+		if pulls > 4 {
+			// Once the 2nd run has started automatically
+			// (indicating that our BalancePeriod is
+			// working) we switch to a long wait time to
+			// effectively stop the timed runs, and
+			// instead start sending a single SIGUSR1 at
+			// the end of each (2nd or 3rd) run, to ensure
+			// we get exactly 4 runs in total.
+			srv.Cluster.Collections.BalancePeriod = arvados.Duration(time.Minute)
+			if pulls%4 == 0 && pulls <= 12 && pulls/4 > completedRuns {
+				completedRuns = pulls / 4
+				c.Logf("completed run %d, sending SIGUSR1 to trigger next run", completedRuns)
+				procself.Signal(syscall.SIGUSR1)
+			}
+		}
 		time.Sleep(time.Millisecond)
 	}
 	cancel()
diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go
index 480791ffa2..7a59c1e8c0 100644
--- a/services/keep-balance/server.go
+++ b/services/keep-balance/server.go
@@ -100,6 +100,7 @@ func (srv *Server) runForever(ctx context.Context) error {
 
 	sigUSR1 := make(chan os.Signal, 1)
 	signal.Notify(sigUSR1, syscall.SIGUSR1)
+	defer signal.Stop(sigUSR1)
 
 	logger.Info("acquiring service lock")
 	dblock.KeepBalanceService.Lock(ctx, func(context.Context) (*sqlx.DB, error) { return srv.DB, nil })
@@ -126,7 +127,6 @@ func (srv *Server) runForever(ctx context.Context) error {
 
 		select {
 		case <-ctx.Done():
-			signal.Stop(sigUSR1)
 			return nil
 		case <-ticker.C:
 			logger.Print("timer went off")
@@ -135,8 +135,7 @@ func (srv *Server) runForever(ctx context.Context) error {
 			// Reset the timer so we don't start the N+1st
 			// run too soon after the Nth run is triggered
 			// by SIGUSR1.
-			ticker.Stop()
-			ticker = time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
+			ticker.Reset(time.Duration(srv.Cluster.Collections.BalancePeriod))
 		}
 		logger.Print("starting next run")
 	}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list