[arvados] created: 2.5.0-291-g9f4f850c2

git repository hosting git at public.arvados.org
Thu Mar 23 19:04:47 UTC 2023


        at  9f4f850c2818cbf5eea45e30d27b13c20bd2be0c (commit)


commit 9f4f850c2818cbf5eea45e30d27b13c20bd2be0c
Author: Tom Clegg <tom at curii.com>
Date:   Thu Mar 23 14:36:55 2023 -0400

    20242: Trash only one when identical replicas are eligible to trash.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index 33c907c20..215c5e1b5 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -829,19 +829,49 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
 	}
 	blockState := computeBlockState(slots, nil, len(blk.Replicas), 0)
 
-	var lost bool
-	var changes []string
+	// Sort the slots by rendezvous order. This ensures "trash the
+	// first of N replicas with identical timestamps" is
+	// predictable (helpful for testing) and well distributed
+	// across servers.
+	sort.Slice(slots, func(i, j int) bool {
+		si, sj := slots[i], slots[j]
+		if orderi, orderj := srvRendezvous[si.mnt.KeepService], srvRendezvous[sj.mnt.KeepService]; orderi != orderj {
+			return orderi < orderj
+		} else {
+			return rendezvousLess(si.mnt.UUID, sj.mnt.UUID, blkid)
+		}
+	})
+
+	var (
+		lost         bool
+		changes      []string
+		trashedMtime = make(map[int64]bool, len(slots))
+	)
 	for _, slot := range slots {
 		// TODO: request a Touch if Mtime is duplicated.
 		var change int
 		switch {
 		case !slot.want && slot.repl != nil && slot.repl.Mtime < bal.MinMtime:
-			slot.mnt.KeepService.AddTrash(Trash{
-				SizedDigest: blkid,
-				Mtime:       slot.repl.Mtime,
-				From:        slot.mnt,
-			})
-			change = changeTrash
+			if trashedMtime[slot.repl.Mtime] {
+				// Don't trash multiple replicas with
+				// identical timestamps. If they are
+				// multiple views of the same backing
+				// storage, asking both servers to
+				// trash is redundant and can cause
+				// races (see #20242). If they are
+				// distinct replicas that happen to
+				// have identical timestamps, we'll
+				// get this one on the next sweep.
+				change = changeNone
+			} else {
+				slot.mnt.KeepService.AddTrash(Trash{
+					SizedDigest: blkid,
+					Mtime:       slot.repl.Mtime,
+					From:        slot.mnt,
+				})
+				change = changeTrash
+				trashedMtime[slot.repl.Mtime] = true
+			}
 		case slot.repl == nil && slot.want && len(blk.Replicas) == 0:
 			lost = true
 			change = changeNone
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index 6626609b5..f9fca1431 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -321,6 +321,35 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
 		desired:    map[string]int{"default": 2},
 		current:    slots{0, 1, 2},
 		timestamps: []int64{12345678, 10000000, 10000000}})
+	bal.try(c, tester{
+		desired:     map[string]int{"default": 0},
+		current:     slots{0, 1, 2},
+		timestamps:  []int64{12345678, 12345678, 12345678},
+		shouldTrash: slots{0},
+		shouldTrashMounts: []string{
+			bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
+	bal.try(c, tester{
+		desired:     map[string]int{"default": 2},
+		current:     slots{0, 1, 2, 5, 6},
+		timestamps:  []int64{12345678, 12345679, 10000000, 10000000, 10000000},
+		shouldTrash: slots{2},
+		shouldTrashMounts: []string{
+			bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
+	bal.try(c, tester{
+		desired:     map[string]int{"default": 2},
+		current:     slots{0, 1, 2, 5, 6},
+		timestamps:  []int64{12345678, 12345679, 12345671, 10000000, 10000000},
+		shouldTrash: slots{2, 5},
+		shouldTrashMounts: []string{
+			bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
+			bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
+	bal.try(c, tester{
+		desired:     map[string]int{"default": 2},
+		current:     slots{0, 1, 2, 5, 6},
+		timestamps:  []int64{12345678, 12345679, 12345679, 10000000, 10000000},
+		shouldTrash: slots{5},
+		shouldTrashMounts: []string{
+			bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
 }
 
 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list