[ARVADOS] created: 1.3.0-827-g6c5852fb1

Git user git at public.curoverse.com
Fri Apr 26 20:07:51 UTC 2019


        at  6c5852fb18c0b6422c079c6fee66891a273ad089 (commit)


commit 6c5852fb18c0b6422c079c6fee66891a273ad089
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Apr 26 16:06:39 2019 -0400

    15148: Include list of affected PDHs in LostBlocksFile.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index fc5812cde..08a6c5881 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -450,7 +450,13 @@ func (bal *Balancer) addCollection(coll arvados.Collection) error {
 		repl = *coll.ReplicationDesired
 	}
 	debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
-	bal.BlockStateMap.IncreaseDesired(coll.StorageClassesDesired, repl, blkids)
+	// Pass pdh to IncreaseDesired only if LostBlocksFile is being
+	// written -- otherwise it's just a waste of memory.
+	pdh := ""
+	if bal.LostBlocksFile != "" {
+		pdh = coll.PortableDataHash
+	}
+	bal.BlockStateMap.IncreaseDesired(pdh, coll.StorageClassesDesired, repl, blkids)
 	return nil
 }
 
@@ -924,7 +930,11 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) {
 			s.lost.replicas -= surplus
 			s.lost.blocks++
 			s.lost.bytes += bytes * int64(-surplus)
-			fmt.Fprintf(bal.lostBlocks, "%s\n", strings.SplitN(string(result.blkid), "+", 2)[0])
+			fmt.Fprintf(bal.lostBlocks, "%s", strings.SplitN(string(result.blkid), "+", 2)[0])
+			for pdh := range result.blk.Refs {
+				fmt.Fprintf(bal.lostBlocks, " %s", pdh)
+			}
+			fmt.Fprint(bal.lostBlocks, "\n")
 		case surplus < 0:
 			s.underrep.replicas -= surplus
 			s.underrep.blocks++
diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index ee7aeb9c8..db530bc49 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -452,7 +452,7 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
 	c.Check(err, check.IsNil)
 	lost, err := ioutil.ReadFile(lostf.Name())
 	c.Assert(err, check.IsNil)
-	c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2\n")
+	c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
 }
 
 func (s *runSuite) TestDryRun(c *check.C) {
diff --git a/services/keep-balance/block_state.go b/services/keep-balance/block_state.go
index 46e69059c..d9338d0f9 100644
--- a/services/keep-balance/block_state.go
+++ b/services/keep-balance/block_state.go
@@ -23,6 +23,7 @@ type Replica struct {
 // replicas actually stored (according to the keepstore indexes we
 // know about).
 type BlockState struct {
+	Refs     map[string]bool // pdh => true (only tracked when len(Replicas)==0)
 	RefCount int
 	Replicas []Replica
 	Desired  map[string]int
@@ -40,9 +41,21 @@ var defaultClasses = []string{"default"}
 
 func (bs *BlockState) addReplica(r Replica) {
 	bs.Replicas = append(bs.Replicas, r)
+	// Free up memory wasted by tracking PDHs that will never be
+	// reported (see comment in increaseDesired)
+	bs.Refs = nil
 }
 
-func (bs *BlockState) increaseDesired(classes []string, n int) {
+func (bs *BlockState) increaseDesired(pdh string, classes []string, n int) {
+	if pdh != "" && len(bs.Replicas) == 0 {
+		// Note we only track PDHs if there's a possibility
+		// that we will report the list of referring PDHs,
+		// i.e., if we haven't yet seen a replica.
+		if bs.Refs == nil {
+			bs.Refs = map[string]bool{}
+		}
+		bs.Refs[pdh] = true
+	}
 	bs.RefCount++
 	if len(classes) == 0 {
 		classes = defaultClasses
@@ -109,11 +122,14 @@ func (bsm *BlockStateMap) AddReplicas(mnt *KeepMount, idx []arvados.KeepServiceI
 
 // IncreaseDesired updates the map to indicate the desired replication
 // for the given blocks in the given storage class is at least n.
-func (bsm *BlockStateMap) IncreaseDesired(classes []string, n int, blocks []arvados.SizedDigest) {
+//
+// If pdh is non-empty, it will be tracked and reported in the "lost
+// blocks" report.
+func (bsm *BlockStateMap) IncreaseDesired(pdh string, classes []string, n int, blocks []arvados.SizedDigest) {
 	bsm.mutex.Lock()
 	defer bsm.mutex.Unlock()
 
 	for _, blkid := range blocks {
-		bsm.get(blkid).increaseDesired(classes, n)
+		bsm.get(blkid).increaseDesired(pdh, classes, n)
 	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list