[ARVADOS] created: c63ff55687f32dfdff01b9827b411b3757d48ee7

git at public.curoverse.com git at public.curoverse.com
Thu Jun 4 17:17:56 EDT 2015


        at  c63ff55687f32dfdff01b9827b411b3757d48ee7 (commit)


commit c63ff55687f32dfdff01b9827b411b3757d48ee7
Author: mishaz <misha at curoverse.com>
Date:   Thu Jun 4 21:11:01 2015 +0000

    Added size to block locators, touching most of the code.
    Moved BlockLocator (and associated parsing code) from sdk/go/manifest to sdk/go/blockdigest.
    Added some helper methods, some for testing.
    Some gofmt cleanup.

diff --git a/sdk/go/blockdigest/blockdigest.go b/sdk/go/blockdigest/blockdigest.go
index 5c29b90..d2f1c60 100644
--- a/sdk/go/blockdigest/blockdigest.go
+++ b/sdk/go/blockdigest/blockdigest.go
@@ -4,9 +4,14 @@ package blockdigest
 import (
 	"fmt"
 	"log"
+	"regexp"
 	"strconv"
+	"strings"
 )
 
+var LocatorPattern = regexp.MustCompile(
+	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
 // Stores a Block Locator Digest compactly, up to 128 bits.
 // Can be used as a map key.
 type BlockDigest struct {
@@ -14,10 +19,25 @@ type BlockDigest struct {
 	L uint64
 }
 
+type DigestWithSize struct {
+	Digest BlockDigest
+	Size   uint32
+}
+
+type BlockLocator struct {
+	Digest BlockDigest
+	Size   int
+	Hints  []string
+}
+
 func (d BlockDigest) String() string {
 	return fmt.Sprintf("%016x%016x", d.H, d.L)
 }
 
+func (w DigestWithSize) String() string {
+	return fmt.Sprintf("%s+%d", w.Digest.String(), w.Size)
+}
+
 // Will create a new BlockDigest unless an error is encountered.
 func FromString(s string) (dig BlockDigest, err error) {
 	if len(s) != 32 {
@@ -46,3 +66,34 @@ func AssertFromString(s string) BlockDigest {
 	}
 	return d
 }
+
+func IsBlockLocator(s string) bool {
+	return LocatorPattern.MatchString(s)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+	if !LocatorPattern.MatchString(s) {
+		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+			"\"%s\".",
+			s,
+			LocatorPattern.String())
+	} else {
+		tokens := strings.Split(s, "+")
+		var blockSize int64
+		var blockDigest BlockDigest
+		// We expect both of the following to succeed since LocatorPattern
+		// restricts the strings appropriately.
+		blockDigest, err = FromString(tokens[0])
+		if err != nil {
+			return
+		}
+		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+		if err != nil {
+			return
+		}
+		b.Digest = blockDigest
+		b.Size = int(blockSize)
+		b.Hints = tokens[2:]
+	}
+	return
+}
diff --git a/sdk/go/blockdigest/blockdigest_test.go b/sdk/go/blockdigest/blockdigest_test.go
index 068a138..017aaa4 100644
--- a/sdk/go/blockdigest/blockdigest_test.go
+++ b/sdk/go/blockdigest/blockdigest_test.go
@@ -2,10 +2,37 @@ package blockdigest
 
 import (
 	"fmt"
+	"runtime"
 	"strings"
 	"testing"
 )
 
+func getStackTrace() string {
+	buf := make([]byte, 1000)
+	bytes_written := runtime.Stack(buf, false)
+	return "Stack Trace:\n" + string(buf[:bytes_written])
+}
+
+func expectEqual(t *testing.T, actual interface{}, expected interface{}) {
+	if actual != expected {
+		t.Fatalf("Expected %v but received %v instead. %s",
+			expected,
+			actual,
+			getStackTrace())
+	}
+}
+
+func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
+	if len(actual) != len(expected) {
+		t.Fatalf("Expected %v (length %d), but received %v (length %d) instead. %s", expected, len(expected), actual, len(actual), getStackTrace())
+	}
+	for i := range actual {
+		if actual[i] != expected[i] {
+			t.Fatalf("Expected %v but received %v instead (first disagreement at position %d). %s", expected, actual, i, getStackTrace())
+		}
+	}
+}
+
 func expectValidDigestString(t *testing.T, s string) {
 	bd, err := FromString(s)
 	if err != nil {
@@ -13,7 +40,7 @@ func expectValidDigestString(t *testing.T, s string) {
 	}
 
 	expected := strings.ToLower(s)
-		
+
 	if expected != bd.String() {
 		t.Fatalf("Expected %s to be returned by FromString(%s).String() but instead we received %s", expected, s, bd.String())
 	}
@@ -26,6 +53,26 @@ func expectInvalidDigestString(t *testing.T, s string) {
 	}
 }
 
+func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+	expectEqual(t, actual.Digest, expected.Digest)
+	expectEqual(t, actual.Size, expected.Size)
+	expectStringSlicesEqual(t, actual.Hints, expected.Hints)
+}
+
+func expectLocatorPatternMatch(t *testing.T, s string) {
+	if !LocatorPattern.MatchString(s) {
+		t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
+			s)
+	}
+}
+
+func expectLocatorPatternFail(t *testing.T, s string) {
+	if LocatorPattern.MatchString(s) {
+		t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
+			s)
+	}
+}
+
 func TestValidDigestStrings(t *testing.T) {
 	expectValidDigestString(t, "01234567890123456789abcdefabcdef")
 	expectValidDigestString(t, "01234567890123456789ABCDEFABCDEF")
@@ -49,7 +96,7 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
 	input := "01234567890123456789abcdefabcdef"
 	prettyPrinted := fmt.Sprintf("%v", AssertFromString(input))
 	if prettyPrinted != input {
-		t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+		t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
 			"\"%s\", but instead it was printed as %s",
 			input, input, prettyPrinted)
 	}
@@ -58,13 +105,13 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
 func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
 	input := "01234567890123456789abcdefabcdef"
 	value := 42
-	nested := struct{
+	nested := struct {
 		// Fun trivia fact: If this field was called "digest" instead of
 		// "Digest", then it would not be exported and String() would
 		// never get called on it and our output would look very
 		// different.
 		Digest BlockDigest
-		value int
+		value  int
 	}{
 		AssertFromString(input),
 		value,
@@ -72,8 +119,44 @@ func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
 	prettyPrinted := fmt.Sprintf("%+v", nested)
 	expected := fmt.Sprintf("{Digest:%s value:%d}", input, value)
 	if prettyPrinted != expected {
-		t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+		t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
 			"\"%s\", but instead it was printed as %s",
 			input, expected, prettyPrinted)
 	}
 }
+
+func TestLocatorPatternBasic(t *testing.T) {
+	expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
+	expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
+	expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
+	expectLocatorPatternMatch(t,
+		"12345678901234567890123456789012+12345+A1+B123wxyz at _-")
+	expectLocatorPatternMatch(t,
+		"12345678901234567890123456789012+12345+A1+B123wxyz at _-+C@")
+
+	expectLocatorPatternFail(t, "12345678901234567890123456789012")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+")
+	expectLocatorPatternFail(t, "1234567890123456789012345678901+12345")
+	expectLocatorPatternFail(t, "123456789012345678901234567890123+12345")
+	expectLocatorPatternFail(t, "g2345678901234abcdefababdeffdfdf+12345")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345 ")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1A")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+a1")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+B")
+	expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A+B2")
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+	if err != nil {
+		t.Fatalf("Unexpected error parsing block locator: %v", err)
+	}
+	expectBlockLocator(t, b, BlockLocator{Digest: AssertFromString("365f83f5f808896ec834c8b595288735"),
+		Size: 2310,
+		Hints: []string{"K at qr1hi",
+			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
+}
diff --git a/sdk/go/blockdigest/testing.go b/sdk/go/blockdigest/testing.go
index eae3f3c..40f08ce 100644
--- a/sdk/go/blockdigest/testing.go
+++ b/sdk/go/blockdigest/testing.go
@@ -2,11 +2,15 @@
 
 package blockdigest
 
-import (
-	"fmt"
-)
-
 // Just used for testing when we need some distinct BlockDigests
 func MakeTestBlockDigest(i int) BlockDigest {
-	return AssertFromString(fmt.Sprintf("%032x", i))
+	return BlockDigest{L: uint64(i)}
+}
+
+func MakeTestDigestSpecifySize(i int, s int) DigestWithSize {
+	return DigestWithSize{Digest: BlockDigest{L: uint64(i)}, Size: uint32(s)}
+}
+
+func MakeTestDigestWithSize(i int) DigestWithSize {
+	return MakeTestDigestSpecifySize(i, i)
 }
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index f6698c6..4e816cd 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,27 +5,15 @@
 package manifest
 
 import (
-	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"log"
-	"regexp"
-	"strconv"
 	"strings"
 )
 
-var LocatorPattern = regexp.MustCompile(
-	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
-
 type Manifest struct {
 	Text string
 }
 
-type BlockLocator struct {
-	Digest blockdigest.BlockDigest
-	Size   int
-	Hints  []string
-}
-
 // Represents a single line from a manifest.
 type ManifestStream struct {
 	StreamName string
@@ -33,40 +21,13 @@ type ManifestStream struct {
 	Files      []string
 }
 
-func ParseBlockLocator(s string) (b BlockLocator, err error) {
-	if !LocatorPattern.MatchString(s) {
-		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
-			"\"%s\".",
-			s,
-			LocatorPattern.String())
-	} else {
-		tokens := strings.Split(s, "+")
-		var blockSize int64
-		var blockDigest blockdigest.BlockDigest
-		// We expect both of the following to succeed since LocatorPattern
-		// restricts the strings appropriately.
-		blockDigest, err = blockdigest.FromString(tokens[0])
-		if err != nil {
-			return
-		}
-		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
-		if err != nil {
-			return
-		}
-		b.Digest = blockDigest
-		b.Size = int(blockSize)
-		b.Hints = tokens[2:]
-	}
-	return
-}
-
 func parseManifestStream(s string) (m ManifestStream) {
 	tokens := strings.Split(s, " ")
 	m.StreamName = tokens[0]
 	tokens = tokens[1:]
 	var i int
 	for i = range tokens {
-		if !LocatorPattern.MatchString(tokens[i]) {
+		if !blockdigest.IsBlockLocator(tokens[i]) {
 			break
 		}
 	}
@@ -100,12 +61,12 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
-func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator {
-	blockChannel := make(chan BlockLocator)
+func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator {
+	blockChannel := make(chan blockdigest.BlockLocator)
 	go func(streamChannel <-chan ManifestStream) {
 		for m := range streamChannel {
 			for _, block := range m.Blocks {
-				if b, err := ParseBlockLocator(block); err == nil {
+				if b, err := blockdigest.ParseBlockLocator(block); err == nil {
 					blockChannel <- b
 				} else {
 					log.Printf("ERROR: Failed to parse block: %v", err)
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index c1bfb14..8cfe3d9 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -7,14 +7,14 @@ import (
 	"testing"
 )
 
-func getStackTrace() (string) {
+func getStackTrace() string {
 	buf := make([]byte, 1000)
 	bytes_written := runtime.Stack(buf, false)
 	return "Stack Trace:\n" + string(buf[:bytes_written])
 }
 
 func expectFromChannel(t *testing.T, c <-chan string, expected string) {
-	actual, ok := <- c
+	actual, ok := <-c
 	if !ok {
 		t.Fatalf("Expected to receive %s but channel was closed. %s",
 			expected,
@@ -29,7 +29,7 @@ func expectFromChannel(t *testing.T, c <-chan string, expected string) {
 }
 
 func expectChannelClosed(t *testing.T, c <-chan interface{}) {
-	received, ok := <- c
+	received, ok := <-c
 	if ok {
 		t.Fatalf("Expected channel to be closed, but received %v instead. %s",
 			received,
@@ -63,67 +63,17 @@ func expectManifestStream(t *testing.T, actual ManifestStream, expected Manifest
 	expectStringSlicesEqual(t, actual.Files, expected.Files)
 }
 
-func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
 	expectEqual(t, actual.Digest, expected.Digest)
 	expectEqual(t, actual.Size, expected.Size)
 	expectStringSlicesEqual(t, actual.Hints, expected.Hints)
 }
 
-func expectLocatorPatternMatch(t *testing.T, s string) {
-	if !LocatorPattern.MatchString(s) {
-		t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
-			s)
-	}
-}
-
-func expectLocatorPatternFail(t *testing.T, s string) {
-	if LocatorPattern.MatchString(s) {
-		t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
-			s)
-	}
-}
-
-func TestLocatorPatternBasic(t *testing.T) {
-	expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
-	expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
-	expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
-	expectLocatorPatternMatch(t,
-		"12345678901234567890123456789012+12345+A1+B123wxyz at _-")
-	expectLocatorPatternMatch(t,
-		"12345678901234567890123456789012+12345+A1+B123wxyz at _-+C@")
-
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+")
-	expectLocatorPatternFail(t,  "1234567890123456789012345678901+12345")
-	expectLocatorPatternFail(t,  "123456789012345678901234567890123+12345")
-	expectLocatorPatternFail(t,  "g2345678901234abcdefababdeffdfdf+12345")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345 ")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1A")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+a1")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+B")
-	expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A+B2")
-}
-
 func TestParseManifestStreamSimple(t *testing.T) {
 	m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
 	expectManifestStream(t, m, ManifestStream{StreamName: ".",
 		Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-		Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
-}
-
-func TestParseBlockLocatorSimple(t *testing.T) {
-	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
-	if err != nil {
-		t.Fatalf("Unexpected error parsing block locator: %v", err)
-	}
-	expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
-		Size: 2310,
-		Hints: []string{"K at qr1hi",
-			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
+		Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -139,9 +89,9 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
 		firstStream,
 		ManifestStream{StreamName: ".",
 			Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
-			Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+			Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
-	received, ok := <- streamIter
+	received, ok := <-streamIter
 	if ok {
 		t.Fatalf("Expected streamIter to be closed, but received %v instead.",
 			received)
@@ -159,20 +109,20 @@ func TestBlockIterLongManifest(t *testing.T) {
 	firstBlock := <-blockChannel
 	expectBlockLocator(t,
 		firstBlock,
-		BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
-			Size: 15693477,
+		blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
+			Size:  15693477,
 			Hints: []string{"E2866e643690156651c03d876e638e674dcd79475 at 5441920c"}})
 	blocksRead := 1
-	var lastBlock BlockLocator
+	var lastBlock blockdigest.BlockLocator
 	for lastBlock = range blockChannel {
 		//log.Printf("Blocks Read: %d", blocksRead)
-	 	blocksRead++
+		blocksRead++
 	}
 	expectEqual(t, blocksRead, 853)
 
 	expectBlockLocator(t,
 		lastBlock,
-		BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
-			Size: 31367794,
+		blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
+			Size:  31367794,
 			Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
 }
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index ddc4f95..ed6df9d 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -42,10 +42,10 @@ type ReadCollections struct {
 	ReadAllCollections       bool
 	UuidToCollection         map[string]Collection
 	OwnerToCollectionSize    map[string]int
-	BlockToReplication       map[blockdigest.BlockDigest]int
+	BlockToReplication       map[blockdigest.DigestWithSize]int
 	CollectionUuidToIndex    map[string]int
 	CollectionIndexToUuid    []string
-	BlockToCollectionIndices map[blockdigest.BlockDigest][]int
+	BlockToCollectionIndices map[blockdigest.DigestWithSize][]int
 }
 
 type GetCollectionsParams struct {
@@ -283,11 +283,11 @@ func ProcessCollections(arvLogger *logger.Logger,
 
 func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
 	readCollections.OwnerToCollectionSize = make(map[string]int)
-	readCollections.BlockToReplication = make(map[blockdigest.BlockDigest]int)
+	readCollections.BlockToReplication = make(map[blockdigest.DigestWithSize]int)
 	numCollections := len(readCollections.UuidToCollection)
 	readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
 	readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
-	readCollections.BlockToCollectionIndices = make(map[blockdigest.BlockDigest][]int)
+	readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
 
 	for _, coll := range readCollections.UuidToCollection {
 		collectionIndex := len(readCollections.CollectionIndexToUuid)
@@ -298,12 +298,14 @@ func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
 		readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
 			readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
 
-		for block, _ := range coll.BlockDigestToSize {
-			readCollections.BlockToCollectionIndices[block] =
-				append(readCollections.BlockToCollectionIndices[block], collectionIndex)
-			storedReplication := readCollections.BlockToReplication[block]
+		for block, size := range coll.BlockDigestToSize {
+			locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
+			readCollections.BlockToCollectionIndices[locator] =
+				append(readCollections.BlockToCollectionIndices[locator],
+					collectionIndex)
+			storedReplication := readCollections.BlockToReplication[locator]
 			if coll.ReplicationLevel > storedReplication {
-				readCollections.BlockToReplication[block] = coll.ReplicationLevel
+				readCollections.BlockToReplication[locator] = coll.ReplicationLevel
 			}
 		}
 	}
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go
index 3dc8e37..4af5d4c 100644
--- a/services/datamanager/collection/collection_test.go
+++ b/services/datamanager/collection/collection_test.go
@@ -21,8 +21,8 @@ var _ = Suite(&MySuite{})
 // BlockToCollectionUuids.
 type ExpectedSummary struct {
 	OwnerToCollectionSize  map[string]int
-	BlockToReplication     map[blockdigest.BlockDigest]int
-	BlockToCollectionUuids map[blockdigest.BlockDigest][]string
+	BlockToReplication     map[blockdigest.DigestWithSize]int
+	BlockToCollectionUuids map[blockdigest.DigestWithSize][]string
 }
 
 func CompareSummarizedReadCollections(c *C,
@@ -36,7 +36,7 @@ func CompareSummarizedReadCollections(c *C,
 		expected.BlockToReplication)
 
 	summarizedBlockToCollectionUuids :=
-		make(map[blockdigest.BlockDigest]map[string]struct{})
+		make(map[blockdigest.DigestWithSize]map[string]struct{})
 	for digest, indices := range summarized.BlockToCollectionIndices {
 		uuidSet := make(map[string]struct{})
 		summarizedBlockToCollectionUuids[digest] = uuidSet
@@ -46,7 +46,7 @@ func CompareSummarizedReadCollections(c *C,
 	}
 
 	expectedBlockToCollectionUuids :=
-		make(map[blockdigest.BlockDigest]map[string]struct{})
+		make(map[blockdigest.DigestWithSize]map[string]struct{})
 	for digest, uuidSlice := range expected.BlockToCollectionUuids {
 		uuidSet := make(map[string]struct{})
 		expectedBlockToCollectionUuids[digest] = uuidSet
@@ -69,13 +69,13 @@ func (s *MySuite) TestSummarizeSimple(checker *C) {
 
 	c := rc.UuidToCollection["col0"]
 
-	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+	blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+	blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
 
 	expected := ExpectedSummary{
 		OwnerToCollectionSize:  map[string]int{c.OwnerUuid: c.TotalSize},
-		BlockToReplication:     map[blockdigest.BlockDigest]int{blockDigest1: 5, blockDigest2: 5},
-		BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+		BlockToReplication:     map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
+		BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
 	}
 
 	CompareSummarizedReadCollections(checker, rc, expected)
@@ -98,21 +98,21 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
 	c0 := rc.UuidToCollection["col0"]
 	c1 := rc.UuidToCollection["col1"]
 
-	blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-	blockDigest2 := blockdigest.MakeTestBlockDigest(2)
-	blockDigest3 := blockdigest.MakeTestBlockDigest(3)
+	blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+	blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
+	blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
 
 	expected := ExpectedSummary{
 		OwnerToCollectionSize: map[string]int{
 			c0.OwnerUuid: c0.TotalSize,
 			c1.OwnerUuid: c1.TotalSize,
 		},
-		BlockToReplication: map[blockdigest.BlockDigest]int{
+		BlockToReplication: map[blockdigest.DigestWithSize]int{
 			blockDigest1: 5,
 			blockDigest2: 8,
 			blockDigest3: 8,
 		},
-		BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{
+		BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
 			blockDigest1: []string{c0.Uuid},
 			blockDigest2: []string{c0.Uuid, c1.Uuid},
 			blockDigest3: []string{c1.Uuid},
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go
index e1c9c29..c666337 100644
--- a/services/datamanager/keep/keep.go
+++ b/services/datamanager/keep/keep.go
@@ -10,7 +10,6 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"git.curoverse.com/arvados.git/sdk/go/logger"
-	"git.curoverse.com/arvados.git/sdk/go/manifest"
 	"git.curoverse.com/arvados.git/services/datamanager/loggerutil"
 	"io/ioutil"
 	"log"
@@ -29,20 +28,18 @@ type ServerAddress struct {
 
 // Info about a particular block returned by the server
 type BlockInfo struct {
-	Digest blockdigest.BlockDigest
-	Size   int
+	Digest blockdigest.DigestWithSize
 	Mtime  int64 // TODO(misha): Replace this with a timestamp.
 }
 
 // Info about a specified block given by a server
 type BlockServerInfo struct {
 	ServerIndex int
-	Size        int
 	Mtime       int64 // TODO(misha): Replace this with a timestamp.
 }
 
 type ServerContents struct {
-	BlockDigestToInfo map[blockdigest.BlockDigest]BlockInfo
+	BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
 }
 
 type ServerResponse struct {
@@ -55,7 +52,7 @@ type ReadServers struct {
 	KeepServerIndexToAddress []ServerAddress
 	KeepServerAddressToIndex map[ServerAddress]int
 	ServerToContents         map[ServerAddress]ServerContents
-	BlockToServers           map[blockdigest.BlockDigest][]BlockServerInfo
+	BlockToServers           map[blockdigest.DigestWithSize][]BlockServerInfo
 	BlockReplicationCounts   map[int]int
 }
 
@@ -192,7 +189,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
 	}
 
 	results.ServerToContents = make(map[ServerAddress]ServerContents)
-	results.BlockToServers = make(map[blockdigest.BlockDigest][]BlockServerInfo)
+	results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
 
 	// Read all the responses
 	for i := range sdkResponse.KeepServers {
@@ -207,7 +204,6 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
 			results.BlockToServers[blockInfo.Digest] = append(
 				results.BlockToServers[blockInfo.Digest],
 				BlockServerInfo{ServerIndex: serverIndex,
-					Size:  blockInfo.Size,
 					Mtime: blockInfo.Mtime})
 		}
 	}
@@ -331,7 +327,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
 
 	response.Address = keepServer
 	response.Contents.BlockDigestToInfo =
-		make(map[blockdigest.BlockDigest]BlockInfo)
+		make(map[blockdigest.DigestWithSize]BlockInfo)
 	scanner := bufio.NewScanner(resp.Body)
 	numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
 	for scanner.Scan() {
@@ -348,33 +344,8 @@ func ReadServerResponse(arvLogger *logger.Logger,
 		if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
 			// This server returned multiple lines containing the same block digest.
 			numDuplicates += 1
-			if storedBlock.Size != blockInfo.Size {
-				numSizeDisagreements += 1
-				// TODO(misha): Consider failing here.
-				message := fmt.Sprintf("Saw different sizes for the same block "+
-					"on %s: %+v %+v",
-					keepServer.String(),
-					storedBlock,
-					blockInfo)
-				log.Println(message)
-				if arvLogger != nil {
-					arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-						keepInfo := logger.GetOrCreateMap(p, "keep_info")
-						serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
-						var error_list []string
-						read_error_list, has_list := serverInfo["error_list"]
-						if has_list {
-							error_list = read_error_list.([]string)
-						} // If we didn't have the list, error_list is already an empty list
-						serverInfo["error_list"] = append(error_list, message)
-					})
-				}
-			}
-			// Keep the block that is bigger, or the block that's newer in
-			// the case of a size tie.
-			if storedBlock.Size < blockInfo.Size ||
-				(storedBlock.Size == blockInfo.Size &&
-					storedBlock.Mtime < blockInfo.Mtime) {
+			// Keep the block that's newer.
+			if storedBlock.Mtime < blockInfo.Mtime {
 				response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
 			}
 		} else {
@@ -419,8 +390,8 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
 			tokens)
 	}
 
-	var locator manifest.BlockLocator
-	if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+	var locator blockdigest.BlockLocator
+	if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
 		err = fmt.Errorf("%v Received error while parsing line \"%s\"",
 			err, indexLine)
 		return
@@ -436,8 +407,9 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
 	if err != nil {
 		return
 	}
-	blockInfo.Digest = locator.Digest
-	blockInfo.Size = locator.Size
+	blockInfo.Digest =
+		blockdigest.DigestWithSize{Digest: locator.Digest,
+			Size: uint32(locator.Size)}
 	return
 }
 
diff --git a/services/datamanager/summary/pull_list.go b/services/datamanager/summary/pull_list.go
index 726f2c6..e9bd5d1 100644
--- a/services/datamanager/summary/pull_list.go
+++ b/services/datamanager/summary/pull_list.go
@@ -14,13 +14,11 @@ import (
 	"strings"
 )
 
-type Locator struct {
-	Digest blockdigest.BlockDigest
-	// TODO(misha): Add size field to the Locator (and MarshalJSON() below)
-}
+type Locator blockdigest.DigestWithSize
 
 func (l Locator) MarshalJSON() ([]byte, error) {
-	return []byte("\"" + l.Digest.String() + "\""), nil
+	//return []byte("\"" + l.Digest.String() + "\""), nil
+	return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
 }
 
 // One entry in the Pull List
@@ -32,15 +30,24 @@ type PullRequest struct {
 // The Pull List for a particular server
 type PullList []PullRequest
 
-// PullListByDigest implements sort.Interface for PullList based on
+// PullListByLocator implements sort.Interface for PullList based on
 // the Digest.
-type PullListByDigest PullList
+type PullListByLocator PullList
 
-func (a PullListByDigest) Len() int      { return len(a) }
-func (a PullListByDigest) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a PullListByDigest) Less(i, j int) bool {
+func (a PullListByLocator) Len() int      { return len(a) }
+func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a PullListByLocator) Less(i, j int) bool {
 	di, dj := a[i].Locator.Digest, a[j].Locator.Digest
-	return di.H < dj.H || (di.H == dj.H && di.L < dj.L)
+	if di.H < dj.H {
+		return true
+	} else if di.H == dj.H {
+		if di.L < dj.L {
+			return true
+		} else if di.L == dj.L {
+			return a[i].Locator.Size < a[j].Locator.Size
+		}
+	}
+	return false
 }
 
 // For a given under-replicated block, this structure represents which
@@ -55,7 +62,7 @@ type PullServers struct {
 // each under-replicated block.
 func ComputePullServers(kc *keepclient.KeepClient,
 	keepServerInfo *keep.ReadServers,
-	blockToDesiredReplication map[blockdigest.BlockDigest]int,
+	blockToDesiredReplication map[blockdigest.DigestWithSize]int,
 	underReplicated BlockSet) (m map[Locator]PullServers) {
 	m = map[Locator]PullServers{}
 	// We use CanonicalString to avoid filling memory with dupicate
@@ -91,7 +98,7 @@ func ComputePullServers(kc *keepclient.KeepClient,
 				roots := keepclient.NewRootSorter(kc.LocalRoots(),
 					block.String()).GetSortedRoots()
 
-				l := Locator{Digest: block}
+				l := Locator(block)
 				m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
 					roots, numCopiesMissing)
 			}
diff --git a/services/datamanager/summary/pull_list_test.go b/services/datamanager/summary/pull_list_test.go
index 692af5c..9628a32 100644
--- a/services/datamanager/summary/pull_list_test.go
+++ b/services/datamanager/summary/pull_list_test.go
@@ -29,13 +29,13 @@ func stringSet(slice ...string) (m map[string]struct{}) {
 
 func (s *MySuite) TestPullListPrintsJSONCorrectly(c *C) {
 	pl := PullList{PullRequest{
-		Locator: Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)},
+		Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
 		Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
 			"keep1.qr1hi.arvadosapi.com:25108"}}}
 
 	b, err := json.Marshal(pl)
 	c.Assert(err, IsNil)
-	expectedOutput := `[{"locator":"0000000000000000000000000badbeef",` +
+	expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
 		`"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
 		`"keep1.qr1hi.arvadosapi.com:25108"]}]`
 	c.Check(string(b), Equals, expectedOutput)
@@ -129,10 +129,10 @@ func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (
 	}
 
 	for _, v := range obtained {
-		sort.Sort(PullListByDigest(v))
+		sort.Sort(PullListByLocator(v))
 	}
 	for _, v := range expected {
-		sort.Sort(PullListByDigest(v))
+		sort.Sort(PullListByLocator(v))
 	}
 
 	return DeepEquals.Check(params, names)
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 8621d55..efb6061 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -11,10 +11,10 @@ import (
 	"sort"
 )
 
-type BlockSet map[blockdigest.BlockDigest]struct{}
+type BlockSet map[blockdigest.DigestWithSize]struct{}
 
 // Adds a single block to the set.
-func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
+func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
 	bs[digest] = struct{}{}
 }
 
@@ -112,7 +112,7 @@ func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
 // Adds a block to the set for a given replication level.
 func (rlbs ReplicationLevelBlockSetMap) Insert(
 	repLevels ReplicationLevels,
-	block blockdigest.BlockDigest) {
+	block blockdigest.DigestWithSize) {
 	rlbs.GetOrCreate(repLevels).Insert(block)
 }
 
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
index 04ca5a5..ea76df4 100644
--- a/services/datamanager/summary/summary_test.go
+++ b/services/datamanager/summary/summary_test.go
@@ -12,7 +12,7 @@ import (
 func BlockSetFromSlice(digests []int) (bs BlockSet) {
 	bs = make(BlockSet)
 	for _, digest := range digests {
-		bs.Insert(blockdigest.MakeTestBlockDigest(digest))
+		bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
 	}
 	return
 }
@@ -46,9 +46,9 @@ func SummarizeReplication(readCollections collection.ReadCollections,
 // Takes a map from block digest to replication level and represents
 // it in a keep.ReadServers structure.
 func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
-	rs.BlockToServers = make(map[blockdigest.BlockDigest][]keep.BlockServerInfo)
+	rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
 	for digest, replication := range digestToReplication {
-		rs.BlockToServers[blockdigest.MakeTestBlockDigest(digest)] =
+		rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
 			make([]keep.BlockServerInfo, replication)
 	}
 	return
@@ -66,10 +66,10 @@ func VerifyToCollectionIndexSet(
 	expected := CollectionIndexSetFromSlice(expectedCollections)
 
 	rc := collection.ReadCollections{
-		BlockToCollectionIndices: map[blockdigest.BlockDigest][]int{},
+		BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
 	}
 	for digest, indices := range blockToCollectionIndices {
-		rc.BlockToCollectionIndices[blockdigest.MakeTestBlockDigest(digest)] = indices
+		rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
 	}
 
 	returned := make(CollectionIndexSet)

commit 47d1bdc0960af5bfc8f2793c352f60483539c389
Author: mishaz <misha at curoverse.com>
Date:   Tue Jun 2 22:42:59 2015 +0000

    Added string to error message to help with debugging.

diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go
index 2c4eff5..e1c9c29 100644
--- a/services/datamanager/keep/keep.go
+++ b/services/datamanager/keep/keep.go
@@ -421,6 +421,8 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
 
 	var locator manifest.BlockLocator
 	if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+		err = fmt.Errorf("%v Received error while parsing line \"%s\"",
+			err, indexLine)
 		return
 	}
 	if len(locator.Hints) > 0 {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list