[arvados] created: 2.5.0-291-g9f4f850c2
git repository hosting
git at public.arvados.org
Thu Mar 23 19:04:47 UTC 2023
at 9f4f850c2818cbf5eea45e30d27b13c20bd2be0c (commit)
commit 9f4f850c2818cbf5eea45e30d27b13c20bd2be0c
Author: Tom Clegg <tom at curii.com>
Date: Thu Mar 23 14:36:55 2023 -0400
20242: Trash only one when identical replicas are eligible to trash.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index 33c907c20..215c5e1b5 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -829,19 +829,49 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
}
blockState := computeBlockState(slots, nil, len(blk.Replicas), 0)
- var lost bool
- var changes []string
+ // Sort the slots by rendezvous order. This ensures "trash the
+ // first of N replicas with identical timestamps" is
+ // predictable (helpful for testing) and well distributed
+ // across servers.
+ sort.Slice(slots, func(i, j int) bool {
+ si, sj := slots[i], slots[j]
+ if orderi, orderj := srvRendezvous[si.mnt.KeepService], srvRendezvous[sj.mnt.KeepService]; orderi != orderj {
+ return orderi < orderj
+ } else {
+ return rendezvousLess(si.mnt.UUID, sj.mnt.UUID, blkid)
+ }
+ })
+
+ var (
+ lost bool
+ changes []string
+ trashedMtime = make(map[int64]bool, len(slots))
+ )
for _, slot := range slots {
// TODO: request a Touch if Mtime is duplicated.
var change int
switch {
case !slot.want && slot.repl != nil && slot.repl.Mtime < bal.MinMtime:
- slot.mnt.KeepService.AddTrash(Trash{
- SizedDigest: blkid,
- Mtime: slot.repl.Mtime,
- From: slot.mnt,
- })
- change = changeTrash
+ if trashedMtime[slot.repl.Mtime] {
+ // Don't trash multiple replicas with
+ // identical timestamps. If they are
+ // multiple views of the same backing
+ // storage, asking both servers to
+ // trash is redundant and can cause
+ // races (see #20242). If they are
+ // distinct replicas that happen to
+ // have identical timestamps, we'll
+ // get this one on the next sweep.
+ change = changeNone
+ } else {
+ slot.mnt.KeepService.AddTrash(Trash{
+ SizedDigest: blkid,
+ Mtime: slot.repl.Mtime,
+ From: slot.mnt,
+ })
+ change = changeTrash
+ trashedMtime[slot.repl.Mtime] = true
+ }
case slot.repl == nil && slot.want && len(blk.Replicas) == 0:
lost = true
change = changeNone
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index 6626609b5..f9fca1431 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -321,6 +321,35 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
desired: map[string]int{"default": 2},
current: slots{0, 1, 2},
timestamps: []int64{12345678, 10000000, 10000000}})
+ bal.try(c, tester{
+ desired: map[string]int{"default": 0},
+ current: slots{0, 1, 2},
+ timestamps: []int64{12345678, 12345678, 12345678},
+ shouldTrash: slots{0},
+ shouldTrashMounts: []string{
+ bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
+ bal.try(c, tester{
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 2, 5, 6},
+ timestamps: []int64{12345678, 12345679, 10000000, 10000000, 10000000},
+ shouldTrash: slots{2},
+ shouldTrashMounts: []string{
+ bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
+ bal.try(c, tester{
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 2, 5, 6},
+ timestamps: []int64{12345678, 12345679, 12345671, 10000000, 10000000},
+ shouldTrash: slots{2, 5},
+ shouldTrashMounts: []string{
+ bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
+ bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
+ bal.try(c, tester{
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 2, 5, 6},
+ timestamps: []int64{12345678, 12345679, 12345679, 10000000, 10000000},
+ shouldTrash: slots{5},
+ shouldTrashMounts: []string{
+ bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
}
func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list