[ARVADOS] updated: 8e1a832244c109351a20a7181da0f65c73e63987

git at public.curoverse.com git at public.curoverse.com
Mon Mar 16 17:15:50 EDT 2015


Summary of changes:
 sdk/go/blockdigest/blockdigest.go                  |   3 +-
 sdk/go/blockdigest/testing.go                      |  12 ++
 sdk/go/logger/main/testlogger.go                   |  29 ---
 services/datamanager/collection/collection.go      |  25 +--
 services/datamanager/collection/collection_test.go | 124 ++++++++++++
 services/datamanager/collection/testing.go         |  60 ++++++
 services/datamanager/datamanager.go                |  22 +--
 .../datamanager/summary/{summary.go => file.go}    |  54 +----
 services/datamanager/summary/summary.go            | 219 +++++++++++----------
 services/datamanager/summary/summary_test.go       | 213 ++++++++++++++++++++
 10 files changed, 551 insertions(+), 210 deletions(-)
 create mode 100644 sdk/go/blockdigest/testing.go
 delete mode 100644 sdk/go/logger/main/testlogger.go
 create mode 100644 services/datamanager/collection/collection_test.go
 create mode 100644 services/datamanager/collection/testing.go
 copy services/datamanager/summary/{summary.go => file.go} (59%)
 create mode 100644 services/datamanager/summary/summary_test.go

       via  8e1a832244c109351a20a7181da0f65c73e63987 (commit)
       via  290ddb6fb0776106cbd68a68f5753452437f357f (commit)
       via  b1233abce0628266a1805e52d9f9fc651c2a5c59 (commit)
       via  f09da35ed284ebc1ed1e941f3e3cb63b06a35d51 (commit)
       via  a56406d730f2a07dd442b9e99ef9dab7b7d81895 (commit)
      from  a5a4f79e91aa8bba1794394646808f6d4c444661 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 8e1a832244c109351a20a7181da0f65c73e63987
Author: mishaz <misha at curoverse.com>
Date:   Mon Mar 16 20:42:19 2015 +0000

    Added a couple helper methods to remove boilerplate from unittests, to make them more readable.

diff --git a/services/datamanager/collection/testing.go b/services/datamanager/collection/testing.go
index 1d3c2e8..f3c1f47 100644
--- a/services/datamanager/collection/testing.go
+++ b/services/datamanager/collection/testing.go
@@ -44,3 +44,17 @@ func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
 	}
 	return
 }
+
+// Returns a slice giving the collection index of each collection that
+// was passed in to MakeTestReadCollections. rc.Summarize() must be
+// called before this method, since Summarize() assigns an index to
+// each collection.
+func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
+	// TODO(misha): Assert that rc.Summarize() has been called.
+	numCollections := len(rc.CollectionIndexToUuid)
+	indices = make([]int, numCollections)
+	for i := 0; i < numCollections; i++ {
+		indices[i] = rc.CollectionUuidToIndex[fmt.Sprintf("col%d", i)]
+	}
+	return
+}
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
index c86d3f2..05db2fd 100644
--- a/services/datamanager/summary/summary_test.go
+++ b/services/datamanager/summary/summary_test.go
@@ -36,6 +36,17 @@ func (cis CollectionIndexSet) ToSlice() (ints []int) {
 	return
 }
 
+// Takes a map from block digest to replication level and represents
+// it in a keep.ReadServers structure.
+func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
+	rs.BlockToServers = make(map[blockdigest.BlockDigest][]keep.BlockServerInfo)
+	for digest, replication := range digestToReplication {
+		rs.BlockToServers[blockdigest.MakeTestBlockDigest(digest)] =
+			make([]keep.BlockServerInfo, replication)
+	}
+	return
+}
+
 // Verifies that
 // blocks.ToCollectionIndexSet(rc.BlockToCollectionIndices) returns
 // expectedCollections.
@@ -81,30 +92,12 @@ func TestToCollectionIndexSet(t *testing.T) {
 
 func TestSimpleSummary(t *testing.T) {
 	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-		collection.TestCollectionSpec{
-			ReplicationLevel: 1,
-			Blocks:           []int{1, 2},
-		},
+		collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
 	})
-
 	rc.Summarize()
+	cIndex := rc.CollectionIndicesForTesting()
 
-	// The internals aren't actually examined, so we can reuse the same one.
-	dummyBlockServerInfo := keep.BlockServerInfo{}
-
-	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
-
-	keepInfo := keep.ReadServers{
-		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
-			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockDigest2: []keep.BlockServerInfo{dummyBlockServerInfo},
-		},
-	}
-
-	returnedSummary := SummarizeReplication(rc, keepInfo)
-
-	c := rc.UuidToCollection["col0"]
+	keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1})
 
 	expectedSummary := ReplicationSummary{
 		CollectionBlocksNotInKeep:  BlockSet{},
@@ -113,13 +106,14 @@ func TestSimpleSummary(t *testing.T) {
 		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2}),
 		KeepBlocksNotInCollections: BlockSet{},
 
-		CollectionsNotFullyInKeep:  CollectionIndexSet{},
-		UnderReplicatedCollections: CollectionIndexSet{},
-		OverReplicatedCollections:  CollectionIndexSet{},
-		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		CollectionsNotFullyInKeep:      CollectionIndexSet{},
+		UnderReplicatedCollections:     CollectionIndexSet{},
+		OverReplicatedCollections:      CollectionIndexSet{},
+		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
 	}
 
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
 	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
 		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
 	}
@@ -127,28 +121,12 @@ func TestSimpleSummary(t *testing.T) {
 
 func TestMissingBlock(t *testing.T) {
 	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-		collection.TestCollectionSpec{
-			ReplicationLevel: 1,
-			Blocks:           []int{1, 2},
-		},
+		collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
 	})
-
 	rc.Summarize()
+	cIndex := rc.CollectionIndicesForTesting()
 
-	// The internals aren't actually examined, so we can reuse the same one.
-	dummyBlockServerInfo := keep.BlockServerInfo{}
-
-	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-
-	keepInfo := keep.ReadServers{
-		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
-			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
-		},
-	}
-
-	returnedSummary := SummarizeReplication(rc, keepInfo)
-
-	c := rc.UuidToCollection["col0"]
+	keepInfo := SpecifyReplication(map[int]int{1: 1})
 
 	expectedSummary := ReplicationSummary{
 		CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{2}),
@@ -157,44 +135,29 @@ func TestMissingBlock(t *testing.T) {
 		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1}),
 		KeepBlocksNotInCollections: BlockSet{},
 
-		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
 		UnderReplicatedCollections:     CollectionIndexSet{},
 		OverReplicatedCollections:      CollectionIndexSet{},
 		CorrectlyReplicatedCollections: CollectionIndexSet{},
 	}
 
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
 	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
+			expectedSummary,
+			returnedSummary)
 	}
 }
 
 func TestUnderAndOverReplicatedBlocks(t *testing.T) {
 	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-		collection.TestCollectionSpec{
-			ReplicationLevel: 2,
-			Blocks:           []int{1, 2},
-		},
+		collection.TestCollectionSpec{ReplicationLevel: 2, Blocks: []int{1, 2}},
 	})
-
 	rc.Summarize()
+	cIndex := rc.CollectionIndicesForTesting()
 
-	// The internals aren't actually examined, so we can reuse the same one.
-	dummyBlockServerInfo := keep.BlockServerInfo{}
-
-	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
-
-	keepInfo := keep.ReadServers{
-		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
-			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockDigest2: []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo, dummyBlockServerInfo},
-		},
-	}
-
-	returnedSummary := SummarizeReplication(rc, keepInfo)
-
-	c := rc.UuidToCollection["col0"]
+	keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 3})
 
 	expectedSummary := ReplicationSummary{
 		CollectionBlocksNotInKeep:  BlockSet{},
@@ -203,56 +166,31 @@ func TestUnderAndOverReplicatedBlocks(t *testing.T) {
 		CorrectlyReplicatedBlocks:  BlockSet{},
 		KeepBlocksNotInCollections: BlockSet{},
 
-		CollectionsNotFullyInKeep: CollectionIndexSet{},
-		UnderReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
-		OverReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		CollectionsNotFullyInKeep:      CollectionIndexSet{},
+		UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[0]}),
+		OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
 		CorrectlyReplicatedCollections: CollectionIndexSet{},
 	}
 
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
 	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
+			expectedSummary,
+			returnedSummary)
 	}
 }
 
 func TestMixedReplication(t *testing.T) {
 	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-		collection.TestCollectionSpec{
-			ReplicationLevel: 1,
-			Blocks:           []int{1, 2},
-		},
-		collection.TestCollectionSpec{
-			ReplicationLevel: 1,
-			Blocks:           []int{3, 4},
-		},
-		collection.TestCollectionSpec{
-			ReplicationLevel: 2,
-			Blocks:           []int{5, 6},
-		},
+		collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
+		collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{3, 4}},
+		collection.TestCollectionSpec{ReplicationLevel: 2, Blocks: []int{5, 6}},
 	})
-
 	rc.Summarize()
+	cIndex := rc.CollectionIndicesForTesting()
 
-	// The internals aren't actually examined, so we can reuse the same one.
-	dummyBlockServerInfo := keep.BlockServerInfo{}
-
-	keepInfo := keep.ReadServers{
-		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
-			blockdigest.MakeTestBlockDigest(1): []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockdigest.MakeTestBlockDigest(2): []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockdigest.MakeTestBlockDigest(3): []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockdigest.MakeTestBlockDigest(5): []keep.BlockServerInfo{dummyBlockServerInfo},
-			blockdigest.MakeTestBlockDigest(6): []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo, dummyBlockServerInfo},
-			blockdigest.MakeTestBlockDigest(7): []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo},
-		},
-	}
-
-	returnedSummary := SummarizeReplication(rc, keepInfo)
-
-	c0 := rc.UuidToCollection["col0"]
-	c1 := rc.UuidToCollection["col1"]
-	c2 := rc.UuidToCollection["col2"]
+	keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1, 3: 1, 5: 1, 6: 3, 7: 2})
 
 	expectedSummary := ReplicationSummary{
 		CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{4}),
@@ -261,16 +199,14 @@ func TestMixedReplication(t *testing.T) {
 		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2, 3}),
 		KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
 
-		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c1.Uuid]}),
-		UnderReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c2.Uuid]}),
-		OverReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c2.Uuid]}),
-		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice(
-			[]int{rc.CollectionUuidToIndex[c0.Uuid]}),
+		CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[1]}),
+		UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[2]}),
+		OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[2]}),
+		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
 	}
 
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
 	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
 		t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
 	}

commit 290ddb6fb0776106cbd68a68f5753452437f357f
Author: mishaz <misha at curoverse.com>
Date:   Mon Mar 16 18:44:08 2015 +0000

    Added some tests.

diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
index fb9e18b..c86d3f2 100644
--- a/services/datamanager/summary/summary_test.go
+++ b/services/datamanager/summary/summary_test.go
@@ -5,6 +5,7 @@ import (
 	"git.curoverse.com/arvados.git/services/datamanager/collection"
 	"git.curoverse.com/arvados.git/services/datamanager/keep"
 	"reflect"
+	"sort"
 	"testing"
 )
 
@@ -31,9 +32,13 @@ func (cis CollectionIndexSet) ToSlice() (ints []int) {
 		ints[i] = collectionIndex
 		i++
 	}
+	sort.Ints(ints)
 	return
 }
 
+// Verifies that
+// blocks.ToCollectionIndexSet(rc.BlockToCollectionIndices) returns
+// expectedCollections.
 func VerifyToCollectionIndexSet(
 	t *testing.T,
 	blocks []int,
@@ -62,11 +67,16 @@ func VerifyToCollectionIndexSet(
 }
 
 func TestToCollectionIndexSet(t *testing.T) {
+	VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{6: []int{0}}, []int{0})
 	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1}}, []int{1})
 	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1, 9}}, []int{1, 9})
 	VerifyToCollectionIndexSet(t, []int{5, 6},
 		map[int][]int{5: []int{2, 3}, 6: []int{3, 4}},
 		[]int{2, 3, 4})
+	VerifyToCollectionIndexSet(t, []int{5, 6},
+		map[int][]int{5: []int{8}, 6: []int{4}},
+		[]int{4, 8})
+	VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{5: []int{0}}, []int{})
 }
 
 func TestSimpleSummary(t *testing.T) {

commit b1233abce0628266a1805e52d9f9fc651c2a5c59
Author: mishaz <misha at curoverse.com>
Date:   Mon Mar 16 18:31:10 2015 +0000

    Cleaned up test.

diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
index ff73b87..fb9e18b 100644
--- a/services/datamanager/summary/summary_test.go
+++ b/services/datamanager/summary/summary_test.go
@@ -62,14 +62,13 @@ func VerifyToCollectionIndexSet(
 }
 
 func TestToCollectionIndexSet(t *testing.T) {
-	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4:[]int{1}}, []int{1})
-	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4:[]int{1,9}}, []int{1,9})
-	VerifyToCollectionIndexSet(t, []int{5,6},
-		map[int][]int{5:[]int{2,3}, 6:[]int{3,4}},
-		[]int{2,3,4})
+	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1}}, []int{1})
+	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1, 9}}, []int{1, 9})
+	VerifyToCollectionIndexSet(t, []int{5, 6},
+		map[int][]int{5: []int{2, 3}, 6: []int{3, 4}},
+		[]int{2, 3, 4})
 }
 
-
 func TestSimpleSummary(t *testing.T) {
 	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
 		collection.TestCollectionSpec{
@@ -98,10 +97,10 @@ func TestSimpleSummary(t *testing.T) {
 	c := rc.UuidToCollection["col0"]
 
 	expectedSummary := ReplicationSummary{
-		CollectionBlocksNotInKeep: BlockSet{},
-		UnderReplicatedBlocks:     BlockSet{},
-		OverReplicatedBlocks:      BlockSet{},
-		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1,2}),
+		CollectionBlocksNotInKeep:  BlockSet{},
+		UnderReplicatedBlocks:      BlockSet{},
+		OverReplicatedBlocks:       BlockSet{},
+		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2}),
 		KeepBlocksNotInCollections: BlockSet{},
 
 		CollectionsNotFullyInKeep:  CollectionIndexSet{},
@@ -142,10 +141,10 @@ func TestMissingBlock(t *testing.T) {
 	c := rc.UuidToCollection["col0"]
 
 	expectedSummary := ReplicationSummary{
-		CollectionBlocksNotInKeep: BlockSetFromSlice([]int{2}),
-		UnderReplicatedBlocks: BlockSet{},
-		OverReplicatedBlocks:  BlockSet{},
-		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1}),
+		CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{2}),
+		UnderReplicatedBlocks:      BlockSet{},
+		OverReplicatedBlocks:       BlockSet{},
+		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1}),
 		KeepBlocksNotInCollections: BlockSet{},
 
 		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
@@ -188,9 +187,9 @@ func TestUnderAndOverReplicatedBlocks(t *testing.T) {
 	c := rc.UuidToCollection["col0"]
 
 	expectedSummary := ReplicationSummary{
-		CollectionBlocksNotInKeep: BlockSet{},
-		UnderReplicatedBlocks: BlockSetFromSlice([]int{1}),
-		OverReplicatedBlocks: BlockSetFromSlice([]int{2}),
+		CollectionBlocksNotInKeep:  BlockSet{},
+		UnderReplicatedBlocks:      BlockSetFromSlice([]int{1}),
+		OverReplicatedBlocks:       BlockSetFromSlice([]int{2}),
 		CorrectlyReplicatedBlocks:  BlockSet{},
 		KeepBlocksNotInCollections: BlockSet{},
 
@@ -246,10 +245,10 @@ func TestMixedReplication(t *testing.T) {
 	c2 := rc.UuidToCollection["col2"]
 
 	expectedSummary := ReplicationSummary{
-		CollectionBlocksNotInKeep: BlockSetFromSlice([]int{4}),
-		UnderReplicatedBlocks: BlockSetFromSlice([]int{5}),
-		OverReplicatedBlocks: BlockSetFromSlice([]int{6}),
-		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1,2,3}),
+		CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{4}),
+		UnderReplicatedBlocks:      BlockSetFromSlice([]int{5}),
+		OverReplicatedBlocks:       BlockSetFromSlice([]int{6}),
+		CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2, 3}),
 		KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
 
 		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
@@ -262,12 +261,6 @@ func TestMixedReplication(t *testing.T) {
 			[]int{rc.CollectionUuidToIndex[c0.Uuid]}),
 	}
 
-	tempCis := make(CollectionIndexSet)
-	returnedSummary.CollectionBlocksNotInKeep.ToCollectionIndexSet(rc, &tempCis)
-	t.Logf("blocks not in keep: %v, collections not fully in keep: %v",
-		returnedSummary.CollectionBlocksNotInKeep,
-		tempCis)
-
 	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
 		t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
 	}

commit f09da35ed284ebc1ed1e941f3e3cb63b06a35d51
Author: mishaz <misha at curoverse.com>
Date:   Mon Mar 16 18:10:39 2015 +0000

    Fixed bug in BlockSet.ToCollectionIndexSet.
    Lots of unittests in summary_test.go.

diff --git a/services/datamanager/summary/file.go b/services/datamanager/summary/file.go
index 3a853bc..15c3699 100644
--- a/services/datamanager/summary/file.go
+++ b/services/datamanager/summary/file.go
@@ -14,6 +14,13 @@ import (
 	"os"
 )
 
+// Used to locally cache data read from servers to reduce execution
+// time when developing. Not for use in production.
+type serializedData struct {
+	ReadCollections collection.ReadCollections
+	KeepServerInfo  keep.ReadServers
+}
+
 var (
 	writeDataTo  string
 	readDataFrom string
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 71d1aff..37e9373 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -16,37 +16,20 @@ func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
 	bs[digest] = struct{}{}
 }
 
-func BlockSetFromSlice(digests []blockdigest.BlockDigest) (bs BlockSet) {
-	bs = make(BlockSet)
-	for _, digest := range digests {
-		bs.Insert(digest)
-	}
-	return
-}
-
-// We use the collection index to save space. To convert to & from the
-// uuid, use collection.ReadCollections' fields CollectionIndexToUuid
-// and CollectionUuidToIndex.
+// We use the collection index to save space. To convert to and from
+// the uuid, use collection.ReadCollections' fields
+// CollectionIndexToUuid and CollectionUuidToIndex.
 type CollectionIndexSet map[int]struct{}
 
 func (cis CollectionIndexSet) Insert(collectionIndex int) {
 	cis[collectionIndex] = struct{}{}
 }
 
-func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
-	cis = make(CollectionIndexSet)
-	for _, index := range indices {
-		cis.Insert(index)
-	}
-	return
-}
-
-
 func (bs BlockSet) ToCollectionIndexSet(
 	readCollections collection.ReadCollections,
 	collectionIndexSet *CollectionIndexSet) {
 	for block := range bs {
-		for collectionIndex := range readCollections.BlockToCollectionIndices[block] {
+		for _,collectionIndex := range readCollections.BlockToCollectionIndices[block] {
 			collectionIndexSet.Insert(collectionIndex)
 		}
 	}
@@ -65,6 +48,7 @@ type ReplicationSummary struct {
 	CorrectlyReplicatedCollections CollectionIndexSet
 }
 
+// This struct counts the elements in each set in ReplicationSummary.
 type ReplicationSummaryCounts struct {
 	CollectionBlocksNotInKeep      int
 	UnderReplicatedBlocks          int
@@ -77,12 +61,9 @@ type ReplicationSummaryCounts struct {
 	CorrectlyReplicatedCollections int
 }
 
-type serializedData struct {
-	ReadCollections collection.ReadCollections
-	KeepServerInfo  keep.ReadServers
-}
-
 func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
+	// TODO(misha): Consider replacing this brute-force approach by
+	// iterating through the fields using reflection.
 	rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
 	rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
 	rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
new file mode 100644
index 0000000..ff73b87
--- /dev/null
+++ b/services/datamanager/summary/summary_test.go
@@ -0,0 +1,274 @@
+package summary
+
+import (
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
+	"git.curoverse.com/arvados.git/services/datamanager/collection"
+	"git.curoverse.com/arvados.git/services/datamanager/keep"
+	"reflect"
+	"testing"
+)
+
+func BlockSetFromSlice(digests []int) (bs BlockSet) {
+	bs = make(BlockSet)
+	for _, digest := range digests {
+		bs.Insert(blockdigest.MakeTestBlockDigest(digest))
+	}
+	return
+}
+
+func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
+	cis = make(CollectionIndexSet)
+	for _, index := range indices {
+		cis.Insert(index)
+	}
+	return
+}
+
+func (cis CollectionIndexSet) ToSlice() (ints []int) {
+	ints = make([]int, len(cis))
+	i := 0
+	for collectionIndex := range cis {
+		ints[i] = collectionIndex
+		i++
+	}
+	return
+}
+
+func VerifyToCollectionIndexSet(
+	t *testing.T,
+	blocks []int,
+	blockToCollectionIndices map[int][]int,
+	expectedCollections []int) {
+
+	expected := CollectionIndexSetFromSlice(expectedCollections)
+
+	rc := collection.ReadCollections{
+		BlockToCollectionIndices: map[blockdigest.BlockDigest][]int{},
+	}
+	for digest, indices := range blockToCollectionIndices {
+		rc.BlockToCollectionIndices[blockdigest.MakeTestBlockDigest(digest)] = indices
+	}
+
+	returned := make(CollectionIndexSet)
+	BlockSetFromSlice(blocks).ToCollectionIndexSet(rc, &returned)
+
+	if !reflect.DeepEqual(returned, expected) {
+		t.Errorf("Expected %v.ToCollectionIndexSet(%v) to return \n %v \n but instead received \n %v",
+			blocks,
+			blockToCollectionIndices,
+			expectedCollections,
+			returned.ToSlice())
+	}
+}
+
+func TestToCollectionIndexSet(t *testing.T) {
+	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4:[]int{1}}, []int{1})
+	VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4:[]int{1,9}}, []int{1,9})
+	VerifyToCollectionIndexSet(t, []int{5,6},
+		map[int][]int{5:[]int{2,3}, 6:[]int{3,4}},
+		[]int{2,3,4})
+}
+
+
+func TestSimpleSummary(t *testing.T) {
+	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+		collection.TestCollectionSpec{
+			ReplicationLevel: 1,
+			Blocks:           []int{1, 2},
+		},
+	})
+
+	rc.Summarize()
+
+	// The internals aren't actually examined, so we can reuse the same one.
+	dummyBlockServerInfo := keep.BlockServerInfo{}
+
+	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
+	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+
+	keepInfo := keep.ReadServers{
+		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
+			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockDigest2: []keep.BlockServerInfo{dummyBlockServerInfo},
+		},
+	}
+
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
+	c := rc.UuidToCollection["col0"]
+
+	expectedSummary := ReplicationSummary{
+		CollectionBlocksNotInKeep: BlockSet{},
+		UnderReplicatedBlocks:     BlockSet{},
+		OverReplicatedBlocks:      BlockSet{},
+		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1,2}),
+		KeepBlocksNotInCollections: BlockSet{},
+
+		CollectionsNotFullyInKeep:  CollectionIndexSet{},
+		UnderReplicatedCollections: CollectionIndexSet{},
+		OverReplicatedCollections:  CollectionIndexSet{},
+		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+	}
+
+	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+	}
+}
+
+func TestMissingBlock(t *testing.T) {
+	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+		collection.TestCollectionSpec{
+			ReplicationLevel: 1,
+			Blocks:           []int{1, 2},
+		},
+	})
+
+	rc.Summarize()
+
+	// The internals aren't actually examined, so we can reuse the same one.
+	dummyBlockServerInfo := keep.BlockServerInfo{}
+
+	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
+
+	keepInfo := keep.ReadServers{
+		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
+			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
+		},
+	}
+
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
+	c := rc.UuidToCollection["col0"]
+
+	expectedSummary := ReplicationSummary{
+		CollectionBlocksNotInKeep: BlockSetFromSlice([]int{2}),
+		UnderReplicatedBlocks: BlockSet{},
+		OverReplicatedBlocks:  BlockSet{},
+		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1}),
+		KeepBlocksNotInCollections: BlockSet{},
+
+		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		UnderReplicatedCollections:     CollectionIndexSet{},
+		OverReplicatedCollections:      CollectionIndexSet{},
+		CorrectlyReplicatedCollections: CollectionIndexSet{},
+	}
+
+	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+	}
+}
+
+func TestUnderAndOverReplicatedBlocks(t *testing.T) {
+	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+		collection.TestCollectionSpec{
+			ReplicationLevel: 2,
+			Blocks:           []int{1, 2},
+		},
+	})
+
+	rc.Summarize()
+
+	// The internals aren't actually examined, so we can reuse the same one.
+	dummyBlockServerInfo := keep.BlockServerInfo{}
+
+	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
+	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+
+	keepInfo := keep.ReadServers{
+		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
+			blockDigest1: []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockDigest2: []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo, dummyBlockServerInfo},
+		},
+	}
+
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
+	c := rc.UuidToCollection["col0"]
+
+	expectedSummary := ReplicationSummary{
+		CollectionBlocksNotInKeep: BlockSet{},
+		UnderReplicatedBlocks: BlockSetFromSlice([]int{1}),
+		OverReplicatedBlocks: BlockSetFromSlice([]int{2}),
+		CorrectlyReplicatedBlocks:  BlockSet{},
+		KeepBlocksNotInCollections: BlockSet{},
+
+		CollectionsNotFullyInKeep: CollectionIndexSet{},
+		UnderReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		OverReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c.Uuid]}),
+		CorrectlyReplicatedCollections: CollectionIndexSet{},
+	}
+
+	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+		t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+	}
+}
+
+func TestMixedReplication(t *testing.T) {
+	rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+		collection.TestCollectionSpec{
+			ReplicationLevel: 1,
+			Blocks:           []int{1, 2},
+		},
+		collection.TestCollectionSpec{
+			ReplicationLevel: 1,
+			Blocks:           []int{3, 4},
+		},
+		collection.TestCollectionSpec{
+			ReplicationLevel: 2,
+			Blocks:           []int{5, 6},
+		},
+	})
+
+	rc.Summarize()
+
+	// The internals aren't actually examined, so we can reuse the same one.
+	dummyBlockServerInfo := keep.BlockServerInfo{}
+
+	keepInfo := keep.ReadServers{
+		BlockToServers: map[blockdigest.BlockDigest][]keep.BlockServerInfo{
+			blockdigest.MakeTestBlockDigest(1): []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockdigest.MakeTestBlockDigest(2): []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockdigest.MakeTestBlockDigest(3): []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockdigest.MakeTestBlockDigest(5): []keep.BlockServerInfo{dummyBlockServerInfo},
+			blockdigest.MakeTestBlockDigest(6): []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo, dummyBlockServerInfo},
+			blockdigest.MakeTestBlockDigest(7): []keep.BlockServerInfo{dummyBlockServerInfo, dummyBlockServerInfo},
+		},
+	}
+
+	returnedSummary := SummarizeReplication(rc, keepInfo)
+
+	c0 := rc.UuidToCollection["col0"]
+	c1 := rc.UuidToCollection["col1"]
+	c2 := rc.UuidToCollection["col2"]
+
+	expectedSummary := ReplicationSummary{
+		CollectionBlocksNotInKeep: BlockSetFromSlice([]int{4}),
+		UnderReplicatedBlocks: BlockSetFromSlice([]int{5}),
+		OverReplicatedBlocks: BlockSetFromSlice([]int{6}),
+		CorrectlyReplicatedBlocks: BlockSetFromSlice([]int{1,2,3}),
+		KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
+
+		CollectionsNotFullyInKeep: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c1.Uuid]}),
+		UnderReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c2.Uuid]}),
+		OverReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c2.Uuid]}),
+		CorrectlyReplicatedCollections: CollectionIndexSetFromSlice(
+			[]int{rc.CollectionUuidToIndex[c0.Uuid]}),
+	}
+
+	tempCis := make(CollectionIndexSet)
+	returnedSummary.CollectionBlocksNotInKeep.ToCollectionIndexSet(rc, &tempCis)
+	t.Logf("blocks not in keep: %v, collections not fully in keep: %v",
+		returnedSummary.CollectionBlocksNotInKeep,
+		tempCis)
+
+	if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+		t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
+	}
+}

commit a56406d730f2a07dd442b9e99ef9dab7b7d81895
Author: mishaz <misha at curoverse.com>
Date:   Sat Feb 28 02:36:46 2015 +0000

    Added lots of unit tests.
    Switched collections.ReadCollections.BlockToCollectionIndex to collections.ReadCollections.BlockToCollectionIndices since a block can belong to more than one collection.
    Made collection.Summarize a method of ReadCollections.
    Made a couple testing libraries in blockdigest and collection.
    Moved collection.MakeBlockDigest() to blockdigest.MakeTestBlockDigest() (in the testing library).
    Created collection.MakeTestReadCollections to simply writing and reading tests (in the testing library).
    Created the BlockSet and CollectionIndexSet types to hide some of the awkwardness of using maps as sets and added FromSlice methods.
    Moved functions for reading and writing data locally to separate file.
    Created separate ReplicationSummaryCounts struct and PrettyPrint method.
    Added stats for Collections in addition to Blocks.

diff --git a/sdk/go/blockdigest/blockdigest.go b/sdk/go/blockdigest/blockdigest.go
index ad2e365..5c29b90 100644
--- a/sdk/go/blockdigest/blockdigest.go
+++ b/sdk/go/blockdigest/blockdigest.go
@@ -1,5 +1,4 @@
-/* Stores a Block Locator Digest compactly. Can be used as a map key. */
-
+// Stores a Block Locator Digest compactly. Can be used as a map key.
 package blockdigest
 
 import (
diff --git a/sdk/go/blockdigest/testing.go b/sdk/go/blockdigest/testing.go
new file mode 100644
index 0000000..eae3f3c
--- /dev/null
+++ b/sdk/go/blockdigest/testing.go
@@ -0,0 +1,12 @@
+// Code used for testing only.
+
+package blockdigest
+
+import (
+	"fmt"
+)
+
+// Just used for testing when we need some distinct BlockDigests
+func MakeTestBlockDigest(i int) BlockDigest {
+	return AssertFromString(fmt.Sprintf("%032x", i))
+}
diff --git a/sdk/go/logger/main/testlogger.go b/sdk/go/logger/main/testlogger.go
deleted file mode 100644
index 6cd7dfb..0000000
--- a/sdk/go/logger/main/testlogger.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// This binary tests the logger package.
-// It's not a standard unit test. Instead it writes to the actual log
-// and you have to clean up after it.
-
-package main
-
-import (
-	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-	"git.curoverse.com/arvados.git/sdk/go/logger"
-	"log"
-)
-
-func main() {
-	arv, err := arvadosclient.MakeArvadosClient()
-	if err != nil {
-		log.Fatalf("Error setting up arvados client %v", err)
-	}
-
-	l := logger.NewLogger(logger.LoggerParams{Client: arv,
-		EventType: "experimental-logger-testing",
-		// No minimum write interval
-	})
-
-	{
-		properties, _ := l.Edit()
-		properties["Ninja"] = "Misha"
-	}
-	l.Record()
-}
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index a3f1e31..32cdcfb 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -1,4 +1,4 @@
-/* Deals with parsing Collection responses from API Server. */
+// Deals with parsing Collection responses from API Server.
 
 package collection
 
@@ -38,13 +38,13 @@ type Collection struct {
 }
 
 type ReadCollections struct {
-	ReadAllCollections     bool
-	UuidToCollection       map[string]Collection
-	OwnerToCollectionSize  map[string]int
-	BlockToReplication     map[blockdigest.BlockDigest]int
-	CollectionUuidToIndex  map[string]int
-	CollectionIndexToUuid  []string
-	BlockToCollectionIndex map[blockdigest.BlockDigest]int
+	ReadAllCollections       bool
+	UuidToCollection         map[string]Collection
+	OwnerToCollectionSize    map[string]int
+	BlockToReplication       map[blockdigest.BlockDigest]int
+	CollectionUuidToIndex    map[string]int
+	CollectionIndexToUuid    []string
+	BlockToCollectionIndices map[blockdigest.BlockDigest][]int
 }
 
 type GetCollectionsParams struct {
@@ -98,7 +98,7 @@ func WriteHeapProfile() {
 
 func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections) {
 	results = GetCollections(params)
-	Summarize(&results)
+	results.Summarize()
 
 	if params.Logger != nil {
 		params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
@@ -295,13 +295,13 @@ func ProcessCollections(arvLogger *logger.Logger,
 	return
 }
 
-func Summarize(readCollections *ReadCollections) {
+func (readCollections *ReadCollections) Summarize() {
 	readCollections.OwnerToCollectionSize = make(map[string]int)
 	readCollections.BlockToReplication = make(map[blockdigest.BlockDigest]int)
 	numCollections := len(readCollections.UuidToCollection)
 	readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
 	readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
-	readCollections.BlockToCollectionIndex = make(map[blockdigest.BlockDigest]int)
+	readCollections.BlockToCollectionIndices = make(map[blockdigest.BlockDigest][]int)
 
 	for _, coll := range readCollections.UuidToCollection {
 		collectionIndex := len(readCollections.CollectionIndexToUuid)
@@ -313,7 +313,8 @@ func Summarize(readCollections *ReadCollections) {
 			readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
 
 		for block, _ := range coll.BlockDigestToSize {
-			readCollections.BlockToCollectionIndex[block] = collectionIndex
+			readCollections.BlockToCollectionIndices[block] =
+				append(readCollections.BlockToCollectionIndices[block], collectionIndex)
 			storedReplication := readCollections.BlockToReplication[block]
 			if coll.ReplicationLevel > storedReplication {
 				readCollections.BlockToReplication[block] = coll.ReplicationLevel
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go
new file mode 100644
index 0000000..9117321
--- /dev/null
+++ b/services/datamanager/collection/collection_test.go
@@ -0,0 +1,124 @@
+package collection
+
+import (
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
+	"reflect"
+	"testing"
+)
+
+// This captures the result we expect from
+// ReadCollections.Summarize().  Because CollectionUuidToIndex is
+// indeterminate, we replace BlockToCollectionIndices with
+// BlockToCollectionUuids.
+type ExpectedSummary struct {
+	OwnerToCollectionSize  map[string]int
+	BlockToReplication     map[blockdigest.BlockDigest]int
+	BlockToCollectionUuids map[blockdigest.BlockDigest][]string
+}
+
+func CompareSummarizedReadCollections(t *testing.T,
+	summarized ReadCollections,
+	expected ExpectedSummary) {
+
+	if !reflect.DeepEqual(summarized.OwnerToCollectionSize,
+		expected.OwnerToCollectionSize) {
+		t.Fatalf("Expected summarized OwnerToCollectionSize to look like %+v but instead it is %+v",
+			expected.OwnerToCollectionSize,
+			summarized.OwnerToCollectionSize)
+	}
+
+	if !reflect.DeepEqual(summarized.BlockToReplication,
+		expected.BlockToReplication) {
+		t.Fatalf("Expected summarized BlockToReplication to look like %+v but instead it is %+v",
+			expected.BlockToReplication,
+			summarized.BlockToReplication)
+	}
+
+	summarizedBlockToCollectionUuids :=
+		make(map[blockdigest.BlockDigest]map[string]struct{})
+	for digest, indices := range summarized.BlockToCollectionIndices {
+		uuidSet := make(map[string]struct{})
+		summarizedBlockToCollectionUuids[digest] = uuidSet
+		for _, index := range indices {
+			uuidSet[summarized.CollectionIndexToUuid[index]] = struct{}{}
+		}
+	}
+
+	expectedBlockToCollectionUuids :=
+		make(map[blockdigest.BlockDigest]map[string]struct{})
+	for digest, uuidSlice := range expected.BlockToCollectionUuids {
+		uuidSet := make(map[string]struct{})
+		expectedBlockToCollectionUuids[digest] = uuidSet
+		for _, uuid := range uuidSlice {
+			uuidSet[uuid] = struct{}{}
+		}
+	}
+
+	if !reflect.DeepEqual(summarizedBlockToCollectionUuids,
+		expectedBlockToCollectionUuids) {
+		t.Fatalf("Expected summarized BlockToCollectionUuids to look like %+v but instead it is %+v", expectedBlockToCollectionUuids, summarizedBlockToCollectionUuids)
+	}
+}
+
+func TestSummarizeSimple(t *testing.T) {
+	rc := MakeTestReadCollections([]TestCollectionSpec{TestCollectionSpec{
+		ReplicationLevel: 5,
+		Blocks: []int{1, 2},
+	}})
+
+	rc.Summarize()
+
+	c := rc.UuidToCollection["col0"]
+
+	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
+	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+
+	expected := ExpectedSummary{
+		OwnerToCollectionSize:  map[string]int{c.OwnerUuid: c.TotalSize},
+		BlockToReplication:     map[blockdigest.BlockDigest]int{blockDigest1: 5, blockDigest2: 5},
+		BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+	}
+
+	CompareSummarizedReadCollections(t, rc, expected)
+}
+
+func TestSummarizeOverlapping(t *testing.T) {
+	rc := MakeTestReadCollections([]TestCollectionSpec{
+		TestCollectionSpec{
+			ReplicationLevel: 5,
+			Blocks: []int{1, 2},
+		},
+		TestCollectionSpec{
+			ReplicationLevel: 8,
+			Blocks: []int{2, 3},
+		},
+	})
+
+	rc.Summarize()
+
+	c0 := rc.UuidToCollection["col0"]
+	c1 := rc.UuidToCollection["col1"]
+
+	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
+	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+	blockDigest3 := blockdigest.MakeTestBlockDigest(3)
+
+	expected := ExpectedSummary{
+		OwnerToCollectionSize: map[string]int{
+			c0.OwnerUuid: c0.TotalSize,
+			c1.OwnerUuid: c1.TotalSize,
+		},
+		BlockToReplication: map[blockdigest.BlockDigest]int{
+			blockDigest1: 5,
+			blockDigest2: 8,
+			blockDigest3: 8,
+		},
+		BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{
+			blockDigest1: []string{c0.Uuid},
+			blockDigest2: []string{c0.Uuid, c1.Uuid},
+			blockDigest3: []string{c1.Uuid},
+		},
+	}
+
+	CompareSummarizedReadCollections(t, rc, expected)
+}
diff --git a/services/datamanager/collection/testing.go b/services/datamanager/collection/testing.go
new file mode 100644
index 0000000..1d3c2e8
--- /dev/null
+++ b/services/datamanager/collection/testing.go
@@ -0,0 +1,46 @@
+// Code used for testing only.
+
+package collection
+
+import (
+	"fmt"
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
+)
+
+type TestCollectionSpec struct {
+	// The desired replication level
+	ReplicationLevel int
+	// Blocks this contains, represented by ints. Ints repeated will
+	// still only represent one block
+	Blocks []int
+}
+
+// Creates a ReadCollections object for testing based on the give
+// specs.  Only the ReadAllCollections and UuidToCollection fields are
+// populated.  To populate other fields call rc.Summarize().
+func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
+	rc = ReadCollections{
+		ReadAllCollections: true,
+		UuidToCollection:   map[string]Collection{},
+	}
+
+	for i, spec := range specs {
+		c := Collection{
+			Uuid:              fmt.Sprintf("col%d", i),
+			OwnerUuid:         fmt.Sprintf("owner%d", i),
+			ReplicationLevel:  spec.ReplicationLevel,
+			BlockDigestToSize: map[blockdigest.BlockDigest]int{},
+		}
+		rc.UuidToCollection[c.Uuid] = c
+		for _, j := range spec.Blocks {
+			c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
+		}
+		// We compute the size in a separate loop because the value
+		// computed in the above loop would be invalid if c.Blocks
+		// contained duplicates.
+		for _, size := range c.BlockDigestToSize {
+			c.TotalSize += size
+		}
+	}
+	return
+}
diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go
index ce8114a..cd141ee 100644
--- a/services/datamanager/datamanager.go
+++ b/services/datamanager/datamanager.go
@@ -104,23 +104,13 @@ func singlerun() {
 	summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
 
 	replicationSummary :=
-		summary.SummarizeReplication(arvLogger, readCollections, keepServerInfo)
-
-	log.Printf("Replication Counts:" +
-		"\nBlocks In Collections: %d, " +
-		"\nBlocks In Keep: %d, " +
-		"\nMissing From Keep: %d, " +
-		"\nUnder Replicated: %d, " +
-		"\nOver Replicated: %d, " +
-		"\nReplicated Just Right: %d, " +
-		"\nNot In Any Collection: %d.",
+		summary.SummarizeReplication(readCollections, keepServerInfo)
+
+	log.Printf("Blocks In Collections: %d, " +
+		"\nBlocks In Keep: %d.",
 		len(readCollections.BlockToReplication),
-		len(keepServerInfo.BlockToServers),
-		len(replicationSummary.CollectionBlocksNotInKeep),
-		len(replicationSummary.UnderReplicatedBlocks),
-		len(replicationSummary.OverReplicatedBlocks),
-		len(replicationSummary.CorrectlyReplicatedBlocks),
-		len(replicationSummary.KeepBlocksNotInCollections))
+		len(keepServerInfo.BlockToServers))
+	log.Println(replicationSummary.ComputeCounts().PrettyPrint())
 
 	// Log that we're finished. We force the recording, since go will
 	// not wait for the timer before exiting.
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/file.go
similarity index 57%
copy from services/datamanager/summary/summary.go
copy to services/datamanager/summary/file.go
index 6698062..3a853bc 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/file.go
@@ -1,4 +1,4 @@
-/* Computes Summary based on data read from API server. */
+// Handles writing data to and reading data from disk to speed up development.
 
 package summary
 
@@ -6,7 +6,6 @@ import (
 	"encoding/gob"
 	"flag"
 	"fmt"
-	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"git.curoverse.com/arvados.git/sdk/go/logger"
 	"git.curoverse.com/arvados.git/services/datamanager/collection"
 	"git.curoverse.com/arvados.git/services/datamanager/keep"
@@ -15,19 +14,6 @@ import (
 	"os"
 )
 
-type ReplicationSummary struct {
-	CollectionBlocksNotInKeep  map[blockdigest.BlockDigest]struct{}
-	UnderReplicatedBlocks      map[blockdigest.BlockDigest]struct{}
-	OverReplicatedBlocks       map[blockdigest.BlockDigest]struct{}
-	CorrectlyReplicatedBlocks  map[blockdigest.BlockDigest]struct{}
-	KeepBlocksNotInCollections map[blockdigest.BlockDigest]struct{}
-}
-
-type serializedData struct {
-	ReadCollections collection.ReadCollections
-	KeepServerInfo  keep.ReadServers
-}
-
 var (
 	writeDataTo  string
 	readDataFrom string
@@ -46,7 +32,8 @@ func init() {
 
 // Writes data we've read to a file.
 //
-// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
+// This is useful for development, so that we don't need to read all
+// our data from the network every time we tweak something.
 //
 // This should not be used outside of development, since you'll be
 // working with stale data.
@@ -77,9 +64,10 @@ func MaybeWriteData(arvLogger *logger.Logger,
 	}
 }
 
-// Reads data that we've read to a file.
+// Reads data that we've written to a file.
 //
-// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
+// This is useful for development, so that we don't need to read all
+// our data from the network every time we tweak something.
 //
 // This should not be used outside of development, since you'll be
 // working with stale data.
@@ -106,7 +94,7 @@ func MaybeReadData(arvLogger *logger.Logger,
 
 		// re-summarize data, so that we can update our summarizing
 		// functions without needing to do all our network i/o
-		collection.Summarize(&data.ReadCollections)
+		data.ReadCollections.Summarize()
 		keep.ComputeBlockReplicationCounts(&data.KeepServerInfo)
 
 		*readCollections = data.ReadCollections
@@ -115,34 +103,3 @@ func MaybeReadData(arvLogger *logger.Logger,
 		return true
 	}
 }
-
-func SummarizeReplication(arvLogger *logger.Logger,
-	readCollections collection.ReadCollections,
-	keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
-	rs.CollectionBlocksNotInKeep = make(map[blockdigest.BlockDigest]struct{})
-	rs.UnderReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.OverReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.CorrectlyReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.KeepBlocksNotInCollections = make(map[blockdigest.BlockDigest]struct{})
-
-	for block, requestedReplication := range readCollections.BlockToReplication {
-		actualReplication := len(keepServerInfo.BlockToServers[block])
-		if actualReplication == 0 {
-			rs.CollectionBlocksNotInKeep[block] = struct{}{}
-		} else if actualReplication < requestedReplication {
-			rs.UnderReplicatedBlocks[block] = struct{}{}
-		} else if actualReplication > requestedReplication {
-			rs.OverReplicatedBlocks[block] = struct{}{}
-		} else {
-			rs.CorrectlyReplicatedBlocks[block] = struct{}{}
-		}
-	}
-
-	for block, _ := range keepServerInfo.BlockToServers {
-		if 0 == readCollections.BlockToReplication[block] {
-			rs.KeepBlocksNotInCollections[block] = struct{}{}
-		}
-	}
-
-	return rs
-}
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 6698062..71d1aff 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -1,146 +1,172 @@
-/* Computes Summary based on data read from API server. */
-
+// Summarizes Collection Data and Keep Server Contents.
 package summary
 
+// TODO(misha): Check size of blocks as well as their digest.
+
 import (
-	"encoding/gob"
-	"flag"
 	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
-	"git.curoverse.com/arvados.git/sdk/go/logger"
 	"git.curoverse.com/arvados.git/services/datamanager/collection"
 	"git.curoverse.com/arvados.git/services/datamanager/keep"
-	"git.curoverse.com/arvados.git/services/datamanager/loggerutil"
-	"log"
-	"os"
 )
 
-type ReplicationSummary struct {
-	CollectionBlocksNotInKeep  map[blockdigest.BlockDigest]struct{}
-	UnderReplicatedBlocks      map[blockdigest.BlockDigest]struct{}
-	OverReplicatedBlocks       map[blockdigest.BlockDigest]struct{}
-	CorrectlyReplicatedBlocks  map[blockdigest.BlockDigest]struct{}
-	KeepBlocksNotInCollections map[blockdigest.BlockDigest]struct{}
+type BlockSet map[blockdigest.BlockDigest]struct{}
+
+func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
+	bs[digest] = struct{}{}
 }
 
-type serializedData struct {
-	ReadCollections collection.ReadCollections
-	KeepServerInfo  keep.ReadServers
+func BlockSetFromSlice(digests []blockdigest.BlockDigest) (bs BlockSet) {
+	bs = make(BlockSet)
+	for _, digest := range digests {
+		bs.Insert(digest)
+	}
+	return
 }
 
-var (
-	writeDataTo  string
-	readDataFrom string
-)
+// We use the collection index to save space. To convert to & from the
+// uuid, use collection.ReadCollections' fields CollectionIndexToUuid
+// and CollectionUuidToIndex.
+type CollectionIndexSet map[int]struct{}
+
+func (cis CollectionIndexSet) Insert(collectionIndex int) {
+	cis[collectionIndex] = struct{}{}
+}
 
-func init() {
-	flag.StringVar(&writeDataTo,
-		"write-data-to",
-		"",
-		"Write summary of data received to this file. Used for development only.")
-	flag.StringVar(&readDataFrom,
-		"read-data-from",
-		"",
-		"Avoid network i/o and read summary data from this file instead. Used for development only.")
+func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
+	cis = make(CollectionIndexSet)
+	for _, index := range indices {
+		cis.Insert(index)
+	}
+	return
 }
 
-// Writes data we've read to a file.
-//
-// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func MaybeWriteData(arvLogger *logger.Logger,
+
+func (bs BlockSet) ToCollectionIndexSet(
 	readCollections collection.ReadCollections,
-	keepServerInfo keep.ReadServers) bool {
-	if writeDataTo == "" {
-		return false
-	} else {
-		summaryFile, err := os.Create(writeDataTo)
-		if err != nil {
-			loggerutil.FatalWithMessage(arvLogger,
-				fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
-		}
-		defer summaryFile.Close()
-
-		enc := gob.NewEncoder(summaryFile)
-		data := serializedData{
-			ReadCollections: readCollections,
-			KeepServerInfo:  keepServerInfo}
-		err = enc.Encode(data)
-		if err != nil {
-			loggerutil.FatalWithMessage(arvLogger,
-				fmt.Sprintf("Failed to write summary data: %v", err))
+	collectionIndexSet *CollectionIndexSet) {
+	for block := range bs {
+		for collectionIndex := range readCollections.BlockToCollectionIndices[block] {
+			collectionIndexSet.Insert(collectionIndex)
 		}
-		log.Printf("Wrote summary data to: %s", writeDataTo)
-		return true
 	}
 }
 
-// Reads data that we've read to a file.
-//
-// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func MaybeReadData(arvLogger *logger.Logger,
-	readCollections *collection.ReadCollections,
-	keepServerInfo *keep.ReadServers) bool {
-	if readDataFrom == "" {
-		return false
-	} else {
-		summaryFile, err := os.Open(readDataFrom)
-		if err != nil {
-			loggerutil.FatalWithMessage(arvLogger,
-				fmt.Sprintf("Failed to open %s: %v", readDataFrom, err))
-		}
-		defer summaryFile.Close()
-
-		dec := gob.NewDecoder(summaryFile)
-		data := serializedData{}
-		err = dec.Decode(&data)
-		if err != nil {
-			loggerutil.FatalWithMessage(arvLogger,
-				fmt.Sprintf("Failed to read summary data: %v", err))
-		}
+type ReplicationSummary struct {
+	CollectionBlocksNotInKeep  BlockSet
+	UnderReplicatedBlocks      BlockSet
+	OverReplicatedBlocks       BlockSet
+	CorrectlyReplicatedBlocks  BlockSet
+	KeepBlocksNotInCollections BlockSet
+
+	CollectionsNotFullyInKeep      CollectionIndexSet
+	UnderReplicatedCollections     CollectionIndexSet
+	OverReplicatedCollections      CollectionIndexSet
+	CorrectlyReplicatedCollections CollectionIndexSet
+}
 
-		// re-summarize data, so that we can update our summarizing
-		// functions without needing to do all our network i/o
-		collection.Summarize(&data.ReadCollections)
-		keep.ComputeBlockReplicationCounts(&data.KeepServerInfo)
+type ReplicationSummaryCounts struct {
+	CollectionBlocksNotInKeep      int
+	UnderReplicatedBlocks          int
+	OverReplicatedBlocks           int
+	CorrectlyReplicatedBlocks      int
+	KeepBlocksNotInCollections     int
+	CollectionsNotFullyInKeep      int
+	UnderReplicatedCollections     int
+	OverReplicatedCollections      int
+	CorrectlyReplicatedCollections int
+}
 
-		*readCollections = data.ReadCollections
-		*keepServerInfo = data.KeepServerInfo
-		log.Printf("Read summary data from: %s", readDataFrom)
-		return true
-	}
+type serializedData struct {
+	ReadCollections collection.ReadCollections
+	KeepServerInfo  keep.ReadServers
 }
 
-func SummarizeReplication(arvLogger *logger.Logger,
-	readCollections collection.ReadCollections,
+func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
+	rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
+	rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
+	rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
+	rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
+	rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
+	rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
+	rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
+	rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
+	rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
+	return rsc
+}
+
+func (rsc ReplicationSummaryCounts) PrettyPrint() string {
+	return fmt.Sprintf("Replication Block Counts:"+
+		"\n Missing From Keep: %d, "+
+		"\n Under Replicated: %d, "+
+		"\n Over Replicated: %d, "+
+		"\n Replicated Just Right: %d, "+
+		"\n Not In Any Collection: %d. "+
+		"\nReplication Collection Counts:"+
+		"\n Missing From Keep: %d, "+
+		"\n Under Replicated: %d, "+
+		"\n Over Replicated: %d, "+
+		"\n Replicated Just Right: %d.",
+		rsc.CollectionBlocksNotInKeep,
+		rsc.UnderReplicatedBlocks,
+		rsc.OverReplicatedBlocks,
+		rsc.CorrectlyReplicatedBlocks,
+		rsc.KeepBlocksNotInCollections,
+		rsc.CollectionsNotFullyInKeep,
+		rsc.UnderReplicatedCollections,
+		rsc.OverReplicatedCollections,
+		rsc.CorrectlyReplicatedCollections)
+}
+
+func SummarizeReplication(readCollections collection.ReadCollections,
 	keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
-	rs.CollectionBlocksNotInKeep = make(map[blockdigest.BlockDigest]struct{})
-	rs.UnderReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.OverReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.CorrectlyReplicatedBlocks = make(map[blockdigest.BlockDigest]struct{})
-	rs.KeepBlocksNotInCollections = make(map[blockdigest.BlockDigest]struct{})
+	rs.CollectionBlocksNotInKeep = make(BlockSet)
+	rs.UnderReplicatedBlocks = make(BlockSet)
+	rs.OverReplicatedBlocks = make(BlockSet)
+	rs.CorrectlyReplicatedBlocks = make(BlockSet)
+	rs.KeepBlocksNotInCollections = make(BlockSet)
+	rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
+	rs.UnderReplicatedCollections = make(CollectionIndexSet)
+	rs.OverReplicatedCollections = make(CollectionIndexSet)
+	rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
 
 	for block, requestedReplication := range readCollections.BlockToReplication {
 		actualReplication := len(keepServerInfo.BlockToServers[block])
 		if actualReplication == 0 {
-			rs.CollectionBlocksNotInKeep[block] = struct{}{}
+			rs.CollectionBlocksNotInKeep.Insert(block)
 		} else if actualReplication < requestedReplication {
-			rs.UnderReplicatedBlocks[block] = struct{}{}
+			rs.UnderReplicatedBlocks.Insert(block)
 		} else if actualReplication > requestedReplication {
-			rs.OverReplicatedBlocks[block] = struct{}{}
+			rs.OverReplicatedBlocks.Insert(block)
 		} else {
-			rs.CorrectlyReplicatedBlocks[block] = struct{}{}
+			rs.CorrectlyReplicatedBlocks.Insert(block)
 		}
 	}
 
 	for block, _ := range keepServerInfo.BlockToServers {
 		if 0 == readCollections.BlockToReplication[block] {
-			rs.KeepBlocksNotInCollections[block] = struct{}{}
+			rs.KeepBlocksNotInCollections.Insert(block)
+		}
+	}
+
+	rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
+		&rs.CollectionsNotFullyInKeep)
+	// Since different collections can specify different replication
+	// levels, the fact that a block is under-replicated does not imply
+	// that all collections that it belongs to are under-replicated, but
+	// we'll ignore that for now.
+	// TODO(misha): Fix this and report the correct set of collections.
+	rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
+		&rs.UnderReplicatedCollections)
+	rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
+		&rs.OverReplicatedCollections)
+
+	for i := range readCollections.CollectionIndexToUuid {
+		if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
+		} else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
+		} else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
+		} else {
+			rs.CorrectlyReplicatedCollections.Insert(i)
 		}
 	}
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list