[ARVADOS] created: 2.1.0-1696-ge16866d0f

Git user git at public.arvados.org
Fri Dec 3 15:55:10 UTC 2021


        at  e16866d0f398f6c61f11e2ecdf473d47100329c0 (commit)


commit e16866d0f398f6c61f11e2ecdf473d47100329c0
Author: Tom Clegg <tom at curii.com>
Date:   Fri Dec 3 10:54:59 2021 -0500

    18547: Use volume UUID instead of DeviceID to deduplicate mounts.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index bb590e13b..fa01d512b 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -217,8 +217,8 @@ func (bal *Balancer) cleanupMounts() {
 	rwdev := map[string]*KeepService{}
 	for _, srv := range bal.KeepServices {
 		for _, mnt := range srv.mounts {
-			if !mnt.ReadOnly && mnt.DeviceID != "" {
-				rwdev[mnt.DeviceID] = srv
+			if !mnt.ReadOnly {
+				rwdev[mnt.UUID] = srv
 			}
 		}
 	}
@@ -227,8 +227,8 @@ func (bal *Balancer) cleanupMounts() {
 	for _, srv := range bal.KeepServices {
 		var dedup []*KeepMount
 		for _, mnt := range srv.mounts {
-			if mnt.ReadOnly && rwdev[mnt.DeviceID] != nil {
-				bal.logf("skipping srv %s readonly mount %q because same device %q is mounted read-write on srv %s", srv, mnt.UUID, mnt.DeviceID, rwdev[mnt.DeviceID])
+			if mnt.ReadOnly && rwdev[mnt.UUID] != nil {
+				bal.logf("skipping srv %s readonly mount %q because same volume is mounted read-write on srv %s", srv, mnt.UUID, rwdev[mnt.UUID])
 			} else {
 				dedup = append(dedup, mnt)
 			}
@@ -357,12 +357,10 @@ func (bal *Balancer) GetCurrentState(ctx context.Context, c *arvados.Client, pag
 	deviceMount := map[string]*KeepMount{}
 	for _, srv := range bal.KeepServices {
 		for _, mnt := range srv.mounts {
-			equiv := deviceMount[mnt.DeviceID]
+			equiv := deviceMount[mnt.UUID]
 			if equiv == nil {
 				equiv = mnt
-				if mnt.DeviceID != "" {
-					deviceMount[mnt.DeviceID] = equiv
-				}
+				deviceMount[mnt.UUID] = equiv
 			}
 			equivMount[equiv] = append(equivMount[equiv], mnt)
 		}
@@ -667,7 +665,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
 				// new/remaining replicas uniformly
 				// across qualifying mounts on a given
 				// server.
-				return rendezvousLess(si.mnt.DeviceID, sj.mnt.DeviceID, blkid)
+				return rendezvousLess(si.mnt.UUID, sj.mnt.UUID, blkid)
 			}
 		})
 
@@ -692,7 +690,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
 		// and returns true if all requirements are met.
 		trySlot := func(i int) bool {
 			slot := slots[i]
-			if wantMnt[slot.mnt] || wantDev[slot.mnt.DeviceID] {
+			if wantMnt[slot.mnt] || wantDev[slot.mnt.UUID] {
 				// Already allocated a replica to this
 				// backend device, possibly on a
 				// different server.
@@ -707,9 +705,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
 				slots[i].want = true
 				wantSrv[slot.mnt.KeepService] = true
 				wantMnt[slot.mnt] = true
-				if slot.mnt.DeviceID != "" {
-					wantDev[slot.mnt.DeviceID] = true
-				}
+				wantDev[slot.mnt.UUID] = true
 				replWant += slot.mnt.Replication
 			}
 			return replProt >= desired && replWant >= desired
@@ -751,7 +747,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
 		// haven't already been added to unsafeToDelete
 		// because the servers report different Mtimes.
 		for _, slot := range slots {
-			if slot.repl != nil && wantDev[slot.mnt.DeviceID] {
+			if slot.repl != nil && wantDev[slot.mnt.UUID] {
 				unsafeToDelete[slot.repl.Mtime] = true
 			}
 		}
@@ -834,7 +830,7 @@ func computeBlockState(slots []slot, onlyCount map[*KeepMount]bool, have, needRe
 		if onlyCount != nil && !onlyCount[slot.mnt] {
 			continue
 		}
-		if countedDev[slot.mnt.DeviceID] {
+		if countedDev[slot.mnt.UUID] {
 			continue
 		}
 		switch {
@@ -848,9 +844,7 @@ func computeBlockState(slots []slot, onlyCount map[*KeepMount]bool, have, needRe
 			bbs.pulling++
 			repl += slot.mnt.Replication
 		}
-		if slot.mnt.DeviceID != "" {
-			countedDev[slot.mnt.DeviceID] = true
-		}
+		countedDev[slot.mnt.UUID] = true
 	}
 	if repl < needRepl {
 		bbs.unachievable = true
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index c529ac150..df04145b9 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -167,8 +167,8 @@ func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
 		srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
 		srv.mounts = append(srv.mounts, &KeepMount{
 			KeepMount: arvados.KeepMount{
-				DeviceID:       fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
-				UUID:           fmt.Sprintf("zzzzz-mount-%015x", i<<16),
+				DeviceID:       bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID,
+				UUID:           bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID,
 				ReadOnly:       readonly,
 				Replication:    1,
 				StorageClasses: map[string]bool{"default": true},
@@ -347,6 +347,7 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
 	bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
 	bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
+	bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID
 	bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
 	c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
 	bal.cleanupMounts()
@@ -485,32 +486,32 @@ func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
 }
 
 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
-	bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
-	bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
-	bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+	dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID
+	bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID
+	bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID
 	// block 0 belongs on servers 3 and e, which have different
-	// device IDs.
+	// UUIDs.
 	bal.try(c, tester{
 		known:      0,
 		desired:    map[string]int{"default": 2},
 		current:    slots{1},
 		shouldPull: slots{0}})
 	// block 1 belongs on servers 0 and 9, which both report
-	// having a replica, but the replicas are on the same device
-	// ID -- so we should pull to the third position (7).
+	// having a replica, but the replicas are on the same volume
+	// -- so we should pull to the third position (7).
 	bal.try(c, tester{
 		known:      1,
 		desired:    map[string]int{"default": 2},
 		current:    slots{0, 1},
 		shouldPull: slots{2}})
-	// block 1 can be pulled to the doubly-mounted device, but the
+	// block 1 can be pulled to the doubly-mounted volume, but the
 	// pull should only be done on the first of the two servers.
 	bal.try(c, tester{
 		known:      1,
 		desired:    map[string]int{"default": 2},
 		current:    slots{2},
 		shouldPull: slots{0}})
-	// block 0 has one replica on a single device mounted on two
+	// block 0 has one replica on a single volume mounted on two
 	// servers (e,9 at positions 1,9). Trashing the replica on 9
 	// would lose the block.
 	bal.try(c, tester{
@@ -523,7 +524,7 @@ func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
 			pulling: 1,
 		}})
 	// block 0 is overreplicated, but the second and third
-	// replicas are the same replica according to DeviceID
+	// replicas are the same replica according to volume UUID
 	// (despite different Mtimes). Don't trash the third replica.
 	bal.try(c, tester{
 		known:   0,
@@ -595,7 +596,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
 		desired:          map[string]int{"default": 2, "special": 1},
 		current:          slots{0, 1},
 		shouldPull:       slots{9},
-		shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
+		shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
 	// If some storage classes are not satisfied, don't trash any
 	// excess replicas. (E.g., if someone desires repl=1 on
 	// class=durable, and we have two copies on class=volatile, we
@@ -605,7 +606,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
 		desired:          map[string]int{"special": 1},
 		current:          slots{0, 1},
 		shouldPull:       slots{9},
-		shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
+		shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
 	// Once storage classes are satisfied, trash excess replicas
 	// that appear earlier in probe order but aren't needed to
 	// satisfy the desired classes.

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list