[ARVADOS] created: 1.3.0-827-g6c5852fb1
Git user
git at public.curoverse.com
Fri Apr 26 20:07:51 UTC 2019
at 6c5852fb18c0b6422c079c6fee66891a273ad089 (commit)
commit 6c5852fb18c0b6422c079c6fee66891a273ad089
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Apr 26 16:06:39 2019 -0400
15148: Include list of affected PDHs in LostBlocksFile.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index fc5812cde..08a6c5881 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -450,7 +450,13 @@ func (bal *Balancer) addCollection(coll arvados.Collection) error {
repl = *coll.ReplicationDesired
}
debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
- bal.BlockStateMap.IncreaseDesired(coll.StorageClassesDesired, repl, blkids)
+ // Pass pdh to IncreaseDesired only if LostBlocksFile is being
+ // written -- otherwise it's just a waste of memory.
+ pdh := ""
+ if bal.LostBlocksFile != "" {
+ pdh = coll.PortableDataHash
+ }
+ bal.BlockStateMap.IncreaseDesired(pdh, coll.StorageClassesDesired, repl, blkids)
return nil
}
@@ -924,7 +930,11 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) {
s.lost.replicas -= surplus
s.lost.blocks++
s.lost.bytes += bytes * int64(-surplus)
- fmt.Fprintf(bal.lostBlocks, "%s\n", strings.SplitN(string(result.blkid), "+", 2)[0])
+ fmt.Fprintf(bal.lostBlocks, "%s", strings.SplitN(string(result.blkid), "+", 2)[0])
+ for pdh := range result.blk.Refs {
+ fmt.Fprintf(bal.lostBlocks, " %s", pdh)
+ }
+ fmt.Fprint(bal.lostBlocks, "\n")
case surplus < 0:
s.underrep.replicas -= surplus
s.underrep.blocks++
diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index ee7aeb9c8..db530bc49 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -452,7 +452,7 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
c.Check(err, check.IsNil)
lost, err := ioutil.ReadFile(lostf.Name())
c.Assert(err, check.IsNil)
- c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2\n")
+ c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
}
func (s *runSuite) TestDryRun(c *check.C) {
diff --git a/services/keep-balance/block_state.go b/services/keep-balance/block_state.go
index 46e69059c..d9338d0f9 100644
--- a/services/keep-balance/block_state.go
+++ b/services/keep-balance/block_state.go
@@ -23,6 +23,7 @@ type Replica struct {
// replicas actually stored (according to the keepstore indexes we
// know about).
type BlockState struct {
+ Refs map[string]bool // pdh => true (only tracked when len(Replicas)==0)
RefCount int
Replicas []Replica
Desired map[string]int
@@ -40,9 +41,21 @@ var defaultClasses = []string{"default"}
func (bs *BlockState) addReplica(r Replica) {
bs.Replicas = append(bs.Replicas, r)
+ // Free up memory wasted by tracking PDHs that will never be
+ // reported (see comment in increaseDesired)
+ bs.Refs = nil
}
-func (bs *BlockState) increaseDesired(classes []string, n int) {
+func (bs *BlockState) increaseDesired(pdh string, classes []string, n int) {
+ if pdh != "" && len(bs.Replicas) == 0 {
+ // Note we only track PDHs if there's a possibility
+ // that we will report the list of referring PDHs,
+ // i.e., if we haven't yet seen a replica.
+ if bs.Refs == nil {
+ bs.Refs = map[string]bool{}
+ }
+ bs.Refs[pdh] = true
+ }
bs.RefCount++
if len(classes) == 0 {
classes = defaultClasses
@@ -109,11 +122,14 @@ func (bsm *BlockStateMap) AddReplicas(mnt *KeepMount, idx []arvados.KeepServiceI
// IncreaseDesired updates the map to indicate the desired replication
// for the given blocks in the given storage class is at least n.
-func (bsm *BlockStateMap) IncreaseDesired(classes []string, n int, blocks []arvados.SizedDigest) {
+//
+// If pdh is non-empty, it will be tracked and reported in the "lost
+// blocks" report.
+func (bsm *BlockStateMap) IncreaseDesired(pdh string, classes []string, n int, blocks []arvados.SizedDigest) {
bsm.mutex.Lock()
defer bsm.mutex.Unlock()
for _, blkid := range blocks {
- bsm.get(blkid).increaseDesired(classes, n)
+ bsm.get(blkid).increaseDesired(pdh, classes, n)
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list