[ARVADOS] created: 1.1.4-352-gc02297f
Git user
git at public.curoverse.com
Wed Jun 6 13:15:37 EDT 2018
at c02297f8766827d827c34260f75e438e806e9c9e (commit)
commit c02297f8766827d827c34260f75e438e806e9c9e
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Jun 6 13:14:26 2018 -0400
13427: Handle same backend device mounted RW in multiple places.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index 92ee3c2..5aad8e6 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -576,11 +576,12 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
}
})
- // Servers and mounts (with or without existing
+ // Servers/mounts/devices (with or without existing
// replicas) that are part of the best achievable
// layout for this storage class.
wantSrv := map[*KeepService]bool{}
wantMnt := map[*KeepMount]bool{}
+ wantDev := map[string]bool{}
// Positions (with existing replicas) that have been
// protected (via unsafeToDelete) to ensure we don't
// reduce replication below desired level when
@@ -592,6 +593,12 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
// and returns true if all requirements are met.
trySlot := func(i int) bool {
slot := slots[i]
+ if wantDev[slot.mnt.DeviceID] {
+ // Already allocated a replica to this
+ // backend device, possibly on a
+ // different server.
+ return false
+ }
if len(protMnt) < desired && slot.repl != nil {
unsafeToDelete[slot.repl.Mtime] = true
protMnt[slot.mnt] = true
@@ -600,6 +607,9 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
slots[i].want = true
wantSrv[slot.mnt.KeepService] = true
wantMnt[slot.mnt] = true
+ if slot.mnt.DeviceID != "" {
+ wantDev[slot.mnt.DeviceID] = true
+ }
}
return len(protMnt) >= desired && len(wantMnt) >= desired
}
@@ -644,6 +654,16 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
cs.unachievable = true
classState[class] = cs
}
+
+ // Avoid deleting wanted replicas from devices that
+ // are mounted on multiple servers -- even if they
+ // haven't already been added to unsafeToDelete
+ // because the servers report different Mtimes.
+ for _, slot := range slots {
+ if slot.repl != nil && wantDev[slot.mnt.DeviceID] {
+ unsafeToDelete[slot.repl.Mtime] = true
+ }
+ }
}
// TODO: If multiple replicas are trashable, prefer the oldest
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index 94c4cd2..602c42e 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -259,6 +259,57 @@ func (bal *balancerSuite) TestDedupDevices(c *check.C) {
shouldPull: slots{2}})
}
+func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
+ bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+ // block 0 belongs on servers 3 and e, which have different
+ // device IDs.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1},
+ shouldPull: slots{0}})
+ // block 1 belongs on servers 0 and 9, which both report
+ // having a replica, but the replicas are on the same device
+ // ID -- so we should pull to the third position (7).
+ bal.try(c, tester{
+ known: 1,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1},
+ shouldPull: slots{2}})
+ // block 1 can be pulled to the doubly-mounted device, but the
+ // pull should only be done on the first of the two servers.
+ bal.try(c, tester{
+ known: 1,
+ desired: map[string]int{"default": 2},
+ current: slots{2},
+ shouldPull: slots{0}})
+ // block 0 has one replica on a single device mounted on two
+ // servers (e,9 at positions 1,9). Trashing the replica on 9
+ // would lose the block.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1, 9},
+ shouldPull: slots{0}})
+ // block 0 is overreplicated, but the second and third
+ // replicas are the same replica according to DeviceID
+ // (despite different Mtimes). Don't trash the third replica.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 9}})
+ // block 0 is overreplicated; the third and fifth replicas are
+ // extra, but the fourth is another view of the second and
+ // shouldn't be trashed.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 5, 9, 12},
+ shouldTrash: slots{5, 12}})
+}
+
func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
// For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
// probe order. For these tests we give it two mounts, one
commit c4aaa3492128b1f438d4992223f65ddf0aec084d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Jun 4 22:10:37 2018 -0400
13427: Ignore readonly devices mounted read-write elsewhere.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index d6a2dde..92ee3c2 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -95,6 +95,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
return
}
}
+ bal.dedupDevices()
if err = bal.CheckSanityEarly(&config.Client); err != nil {
return
@@ -169,6 +170,30 @@ func (bal *Balancer) DiscoverKeepServices(c *arvados.Client, okTypes []string) e
})
}
+func (bal *Balancer) dedupDevices() {
+ rwdev := map[string]*KeepService{}
+ for _, srv := range bal.KeepServices {
+ for _, mnt := range srv.mounts {
+ if !mnt.ReadOnly && mnt.DeviceID != "" {
+ rwdev[mnt.DeviceID] = srv
+ }
+ }
+ }
+ // Drop the readonly mounts whose device is mounted RW
+ // elsewhere.
+ for _, srv := range bal.KeepServices {
+ var dedup []*KeepMount
+ for _, mnt := range srv.mounts {
+ if mnt.ReadOnly && rwdev[mnt.DeviceID] != nil {
+ bal.logf("skipping srv %s readonly mount %q because same device %q is mounted read-write on srv %s", srv, mnt.UUID, mnt.DeviceID, rwdev[mnt.DeviceID])
+ } else {
+ dedup = append(dedup, mnt)
+ }
+ }
+ srv.mounts = dedup
+ }
+}
+
// CheckSanityEarly checks for configuration and runtime errors that
// can be detected before GetCurrentState() and ComputeChangeSets()
// are called.
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index cfdd47f..94c4cd2 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -245,6 +245,20 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
shouldTrash: slots{2}})
}
+func (bal *balancerSuite) TestDedupDevices(c *check.C) {
+ bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
+ bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+ c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
+ bal.dedupDevices()
+ c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1},
+ shouldPull: slots{2}})
+}
+
func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
// For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
// probe order. For these tests we give it two mounts, one
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list