[ARVADOS] created: c63ff55687f32dfdff01b9827b411b3757d48ee7
git at public.curoverse.com
git at public.curoverse.com
Thu Jun 4 17:17:56 EDT 2015
at c63ff55687f32dfdff01b9827b411b3757d48ee7 (commit)
commit c63ff55687f32dfdff01b9827b411b3757d48ee7
Author: mishaz <misha at curoverse.com>
Date: Thu Jun 4 21:11:01 2015 +0000
Added size to block locators, touching most of the code.
Moved BlockLocator (and associated parsing code) from sdk/go/manifest to sdk/go/blockdigest.
Added some helper methods, some for testing.
Some gofmt cleanup.
diff --git a/sdk/go/blockdigest/blockdigest.go b/sdk/go/blockdigest/blockdigest.go
index 5c29b90..d2f1c60 100644
--- a/sdk/go/blockdigest/blockdigest.go
+++ b/sdk/go/blockdigest/blockdigest.go
@@ -4,9 +4,14 @@ package blockdigest
import (
"fmt"
"log"
+ "regexp"
"strconv"
+ "strings"
)
+var LocatorPattern = regexp.MustCompile(
+ "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
// Stores a Block Locator Digest compactly, up to 128 bits.
// Can be used as a map key.
type BlockDigest struct {
@@ -14,10 +19,25 @@ type BlockDigest struct {
L uint64
}
+type DigestWithSize struct {
+ Digest BlockDigest
+ Size uint32
+}
+
+type BlockLocator struct {
+ Digest BlockDigest
+ Size int
+ Hints []string
+}
+
func (d BlockDigest) String() string {
return fmt.Sprintf("%016x%016x", d.H, d.L)
}
+func (w DigestWithSize) String() string {
+ return fmt.Sprintf("%s+%d", w.Digest.String(), w.Size)
+}
+
// Will create a new BlockDigest unless an error is encountered.
func FromString(s string) (dig BlockDigest, err error) {
if len(s) != 32 {
@@ -46,3 +66,34 @@ func AssertFromString(s string) BlockDigest {
}
return d
}
+
+func IsBlockLocator(s string) bool {
+ return LocatorPattern.MatchString(s)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+ if !LocatorPattern.MatchString(s) {
+ err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+ "\"%s\".",
+ s,
+ LocatorPattern.String())
+ } else {
+ tokens := strings.Split(s, "+")
+ var blockSize int64
+ var blockDigest BlockDigest
+ // We expect both of the following to succeed since LocatorPattern
+ // restricts the strings appropriately.
+ blockDigest, err = FromString(tokens[0])
+ if err != nil {
+ return
+ }
+ blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+ if err != nil {
+ return
+ }
+ b.Digest = blockDigest
+ b.Size = int(blockSize)
+ b.Hints = tokens[2:]
+ }
+ return
+}
diff --git a/sdk/go/blockdigest/blockdigest_test.go b/sdk/go/blockdigest/blockdigest_test.go
index 068a138..017aaa4 100644
--- a/sdk/go/blockdigest/blockdigest_test.go
+++ b/sdk/go/blockdigest/blockdigest_test.go
@@ -2,10 +2,37 @@ package blockdigest
import (
"fmt"
+ "runtime"
"strings"
"testing"
)
+func getStackTrace() string {
+ buf := make([]byte, 1000)
+ bytes_written := runtime.Stack(buf, false)
+ return "Stack Trace:\n" + string(buf[:bytes_written])
+}
+
+func expectEqual(t *testing.T, actual interface{}, expected interface{}) {
+ if actual != expected {
+ t.Fatalf("Expected %v but received %v instead. %s",
+ expected,
+ actual,
+ getStackTrace())
+ }
+}
+
+func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
+ if len(actual) != len(expected) {
+ t.Fatalf("Expected %v (length %d), but received %v (length %d) instead. %s", expected, len(expected), actual, len(actual), getStackTrace())
+ }
+ for i := range actual {
+ if actual[i] != expected[i] {
+ t.Fatalf("Expected %v but received %v instead (first disagreement at position %d). %s", expected, actual, i, getStackTrace())
+ }
+ }
+}
+
func expectValidDigestString(t *testing.T, s string) {
bd, err := FromString(s)
if err != nil {
@@ -13,7 +40,7 @@ func expectValidDigestString(t *testing.T, s string) {
}
expected := strings.ToLower(s)
-
+
if expected != bd.String() {
t.Fatalf("Expected %s to be returned by FromString(%s).String() but instead we received %s", expected, s, bd.String())
}
@@ -26,6 +53,26 @@ func expectInvalidDigestString(t *testing.T, s string) {
}
}
+func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+ expectEqual(t, actual.Digest, expected.Digest)
+ expectEqual(t, actual.Size, expected.Size)
+ expectStringSlicesEqual(t, actual.Hints, expected.Hints)
+}
+
+func expectLocatorPatternMatch(t *testing.T, s string) {
+ if !LocatorPattern.MatchString(s) {
+ t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
+ s)
+ }
+}
+
+func expectLocatorPatternFail(t *testing.T, s string) {
+ if LocatorPattern.MatchString(s) {
+ t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
+ s)
+ }
+}
+
func TestValidDigestStrings(t *testing.T) {
expectValidDigestString(t, "01234567890123456789abcdefabcdef")
expectValidDigestString(t, "01234567890123456789ABCDEFABCDEF")
@@ -49,7 +96,7 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
input := "01234567890123456789abcdefabcdef"
prettyPrinted := fmt.Sprintf("%v", AssertFromString(input))
if prettyPrinted != input {
- t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+ t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
"\"%s\", but instead it was printed as %s",
input, input, prettyPrinted)
}
@@ -58,13 +105,13 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
input := "01234567890123456789abcdefabcdef"
value := 42
- nested := struct{
+ nested := struct {
// Fun trivia fact: If this field was called "digest" instead of
// "Digest", then it would not be exported and String() would
// never get called on it and our output would look very
// different.
Digest BlockDigest
- value int
+ value int
}{
AssertFromString(input),
value,
@@ -72,8 +119,44 @@ func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
prettyPrinted := fmt.Sprintf("%+v", nested)
expected := fmt.Sprintf("{Digest:%s value:%d}", input, value)
if prettyPrinted != expected {
- t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+ t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
"\"%s\", but instead it was printed as %s",
input, expected, prettyPrinted)
}
}
+
+func TestLocatorPatternBasic(t *testing.T) {
+ expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
+ expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
+ expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
+ expectLocatorPatternMatch(t,
+ "12345678901234567890123456789012+12345+A1+B123wxyz at _-")
+ expectLocatorPatternMatch(t,
+ "12345678901234567890123456789012+12345+A1+B123wxyz at _-+C@")
+
+ expectLocatorPatternFail(t, "12345678901234567890123456789012")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+")
+ expectLocatorPatternFail(t, "1234567890123456789012345678901+12345")
+ expectLocatorPatternFail(t, "123456789012345678901234567890123+12345")
+ expectLocatorPatternFail(t, "g2345678901234abcdefababdeffdfdf+12345")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345 ")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1A")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+a1")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+B")
+ expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A+B2")
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+ b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+ if err != nil {
+ t.Fatalf("Unexpected error parsing block locator: %v", err)
+ }
+ expectBlockLocator(t, b, BlockLocator{Digest: AssertFromString("365f83f5f808896ec834c8b595288735"),
+ Size: 2310,
+ Hints: []string{"K at qr1hi",
+ "Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
+}
diff --git a/sdk/go/blockdigest/testing.go b/sdk/go/blockdigest/testing.go
index eae3f3c..40f08ce 100644
--- a/sdk/go/blockdigest/testing.go
+++ b/sdk/go/blockdigest/testing.go
@@ -2,11 +2,15 @@
package blockdigest
-import (
- "fmt"
-)
-
// Just used for testing when we need some distinct BlockDigests
func MakeTestBlockDigest(i int) BlockDigest {
- return AssertFromString(fmt.Sprintf("%032x", i))
+ return BlockDigest{L: uint64(i)}
+}
+
+func MakeTestDigestSpecifySize(i int, s int) DigestWithSize {
+ return DigestWithSize{Digest: BlockDigest{L: uint64(i)}, Size: uint32(s)}
+}
+
+func MakeTestDigestWithSize(i int) DigestWithSize {
+ return MakeTestDigestSpecifySize(i, i)
}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index f6698c6..4e816cd 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,27 +5,15 @@
package manifest
import (
- "fmt"
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
"log"
- "regexp"
- "strconv"
"strings"
)
-var LocatorPattern = regexp.MustCompile(
- "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
-
type Manifest struct {
Text string
}
-type BlockLocator struct {
- Digest blockdigest.BlockDigest
- Size int
- Hints []string
-}
-
// Represents a single line from a manifest.
type ManifestStream struct {
StreamName string
@@ -33,40 +21,13 @@ type ManifestStream struct {
Files []string
}
-func ParseBlockLocator(s string) (b BlockLocator, err error) {
- if !LocatorPattern.MatchString(s) {
- err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
- "\"%s\".",
- s,
- LocatorPattern.String())
- } else {
- tokens := strings.Split(s, "+")
- var blockSize int64
- var blockDigest blockdigest.BlockDigest
- // We expect both of the following to succeed since LocatorPattern
- // restricts the strings appropriately.
- blockDigest, err = blockdigest.FromString(tokens[0])
- if err != nil {
- return
- }
- blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
- if err != nil {
- return
- }
- b.Digest = blockDigest
- b.Size = int(blockSize)
- b.Hints = tokens[2:]
- }
- return
-}
-
func parseManifestStream(s string) (m ManifestStream) {
tokens := strings.Split(s, " ")
m.StreamName = tokens[0]
tokens = tokens[1:]
var i int
for i = range tokens {
- if !LocatorPattern.MatchString(tokens[i]) {
+ if !blockdigest.IsBlockLocator(tokens[i]) {
break
}
}
@@ -100,12 +61,12 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
// Blocks may appear mulitple times within the same manifest if they
// are used by multiple files. In that case this Iterator will output
// the same block multiple times.
-func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator {
- blockChannel := make(chan BlockLocator)
+func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator {
+ blockChannel := make(chan blockdigest.BlockLocator)
go func(streamChannel <-chan ManifestStream) {
for m := range streamChannel {
for _, block := range m.Blocks {
- if b, err := ParseBlockLocator(block); err == nil {
+ if b, err := blockdigest.ParseBlockLocator(block); err == nil {
blockChannel <- b
} else {
log.Printf("ERROR: Failed to parse block: %v", err)
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index c1bfb14..8cfe3d9 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -7,14 +7,14 @@ import (
"testing"
)
-func getStackTrace() (string) {
+func getStackTrace() string {
buf := make([]byte, 1000)
bytes_written := runtime.Stack(buf, false)
return "Stack Trace:\n" + string(buf[:bytes_written])
}
func expectFromChannel(t *testing.T, c <-chan string, expected string) {
- actual, ok := <- c
+ actual, ok := <-c
if !ok {
t.Fatalf("Expected to receive %s but channel was closed. %s",
expected,
@@ -29,7 +29,7 @@ func expectFromChannel(t *testing.T, c <-chan string, expected string) {
}
func expectChannelClosed(t *testing.T, c <-chan interface{}) {
- received, ok := <- c
+ received, ok := <-c
if ok {
t.Fatalf("Expected channel to be closed, but received %v instead. %s",
received,
@@ -63,67 +63,17 @@ func expectManifestStream(t *testing.T, actual ManifestStream, expected Manifest
expectStringSlicesEqual(t, actual.Files, expected.Files)
}
-func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
expectEqual(t, actual.Digest, expected.Digest)
expectEqual(t, actual.Size, expected.Size)
expectStringSlicesEqual(t, actual.Hints, expected.Hints)
}
-func expectLocatorPatternMatch(t *testing.T, s string) {
- if !LocatorPattern.MatchString(s) {
- t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
- s)
- }
-}
-
-func expectLocatorPatternFail(t *testing.T, s string) {
- if LocatorPattern.MatchString(s) {
- t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
- s)
- }
-}
-
-func TestLocatorPatternBasic(t *testing.T) {
- expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
- expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
- expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
- expectLocatorPatternMatch(t,
- "12345678901234567890123456789012+12345+A1+B123wxyz at _-")
- expectLocatorPatternMatch(t,
- "12345678901234567890123456789012+12345+A1+B123wxyz at _-+C@")
-
- expectLocatorPatternFail(t, "12345678901234567890123456789012")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+")
- expectLocatorPatternFail(t, "1234567890123456789012345678901+12345")
- expectLocatorPatternFail(t, "123456789012345678901234567890123+12345")
- expectLocatorPatternFail(t, "g2345678901234abcdefababdeffdfdf+12345")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345 ")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1A")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+a1")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+B")
- expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A+B2")
-}
-
func TestParseManifestStreamSimple(t *testing.T) {
m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
expectManifestStream(t, m, ManifestStream{StreamName: ".",
Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
- Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
-}
-
-func TestParseBlockLocatorSimple(t *testing.T) {
- b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
- if err != nil {
- t.Fatalf("Unexpected error parsing block locator: %v", err)
- }
- expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
- Size: 2310,
- Hints: []string{"K at qr1hi",
- "Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
+ Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
}
func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -139,9 +89,9 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
firstStream,
ManifestStream{StreamName: ".",
Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
- Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+ Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
- received, ok := <- streamIter
+ received, ok := <-streamIter
if ok {
t.Fatalf("Expected streamIter to be closed, but received %v instead.",
received)
@@ -159,20 +109,20 @@ func TestBlockIterLongManifest(t *testing.T) {
firstBlock := <-blockChannel
expectBlockLocator(t,
firstBlock,
- BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
- Size: 15693477,
+ blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
+ Size: 15693477,
Hints: []string{"E2866e643690156651c03d876e638e674dcd79475 at 5441920c"}})
blocksRead := 1
- var lastBlock BlockLocator
+ var lastBlock blockdigest.BlockLocator
for lastBlock = range blockChannel {
//log.Printf("Blocks Read: %d", blocksRead)
- blocksRead++
+ blocksRead++
}
expectEqual(t, blocksRead, 853)
expectBlockLocator(t,
lastBlock,
- BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
- Size: 31367794,
+ blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
+ Size: 31367794,
Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
}
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index ddc4f95..ed6df9d 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -42,10 +42,10 @@ type ReadCollections struct {
ReadAllCollections bool
UuidToCollection map[string]Collection
OwnerToCollectionSize map[string]int
- BlockToReplication map[blockdigest.BlockDigest]int
+ BlockToReplication map[blockdigest.DigestWithSize]int
CollectionUuidToIndex map[string]int
CollectionIndexToUuid []string
- BlockToCollectionIndices map[blockdigest.BlockDigest][]int
+ BlockToCollectionIndices map[blockdigest.DigestWithSize][]int
}
type GetCollectionsParams struct {
@@ -283,11 +283,11 @@ func ProcessCollections(arvLogger *logger.Logger,
func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
readCollections.OwnerToCollectionSize = make(map[string]int)
- readCollections.BlockToReplication = make(map[blockdigest.BlockDigest]int)
+ readCollections.BlockToReplication = make(map[blockdigest.DigestWithSize]int)
numCollections := len(readCollections.UuidToCollection)
readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
- readCollections.BlockToCollectionIndices = make(map[blockdigest.BlockDigest][]int)
+ readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
for _, coll := range readCollections.UuidToCollection {
collectionIndex := len(readCollections.CollectionIndexToUuid)
@@ -298,12 +298,14 @@ func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
- for block, _ := range coll.BlockDigestToSize {
- readCollections.BlockToCollectionIndices[block] =
- append(readCollections.BlockToCollectionIndices[block], collectionIndex)
- storedReplication := readCollections.BlockToReplication[block]
+ for block, size := range coll.BlockDigestToSize {
+ locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
+ readCollections.BlockToCollectionIndices[locator] =
+ append(readCollections.BlockToCollectionIndices[locator],
+ collectionIndex)
+ storedReplication := readCollections.BlockToReplication[locator]
if coll.ReplicationLevel > storedReplication {
- readCollections.BlockToReplication[block] = coll.ReplicationLevel
+ readCollections.BlockToReplication[locator] = coll.ReplicationLevel
}
}
}
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go
index 3dc8e37..4af5d4c 100644
--- a/services/datamanager/collection/collection_test.go
+++ b/services/datamanager/collection/collection_test.go
@@ -21,8 +21,8 @@ var _ = Suite(&MySuite{})
// BlockToCollectionUuids.
type ExpectedSummary struct {
OwnerToCollectionSize map[string]int
- BlockToReplication map[blockdigest.BlockDigest]int
- BlockToCollectionUuids map[blockdigest.BlockDigest][]string
+ BlockToReplication map[blockdigest.DigestWithSize]int
+ BlockToCollectionUuids map[blockdigest.DigestWithSize][]string
}
func CompareSummarizedReadCollections(c *C,
@@ -36,7 +36,7 @@ func CompareSummarizedReadCollections(c *C,
expected.BlockToReplication)
summarizedBlockToCollectionUuids :=
- make(map[blockdigest.BlockDigest]map[string]struct{})
+ make(map[blockdigest.DigestWithSize]map[string]struct{})
for digest, indices := range summarized.BlockToCollectionIndices {
uuidSet := make(map[string]struct{})
summarizedBlockToCollectionUuids[digest] = uuidSet
@@ -46,7 +46,7 @@ func CompareSummarizedReadCollections(c *C,
}
expectedBlockToCollectionUuids :=
- make(map[blockdigest.BlockDigest]map[string]struct{})
+ make(map[blockdigest.DigestWithSize]map[string]struct{})
for digest, uuidSlice := range expected.BlockToCollectionUuids {
uuidSet := make(map[string]struct{})
expectedBlockToCollectionUuids[digest] = uuidSet
@@ -69,13 +69,13 @@ func (s *MySuite) TestSummarizeSimple(checker *C) {
c := rc.UuidToCollection["col0"]
- blockDigest1 := blockdigest.MakeTestBlockDigest(1)
- blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+ blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+ blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
expected := ExpectedSummary{
OwnerToCollectionSize: map[string]int{c.OwnerUuid: c.TotalSize},
- BlockToReplication: map[blockdigest.BlockDigest]int{blockDigest1: 5, blockDigest2: 5},
- BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+ BlockToReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
+ BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
}
CompareSummarizedReadCollections(checker, rc, expected)
@@ -98,21 +98,21 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
c0 := rc.UuidToCollection["col0"]
c1 := rc.UuidToCollection["col1"]
- blockDigest1 := blockdigest.MakeTestBlockDigest(1)
- blockDigest2 := blockdigest.MakeTestBlockDigest(2)
- blockDigest3 := blockdigest.MakeTestBlockDigest(3)
+ blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+ blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
+ blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
expected := ExpectedSummary{
OwnerToCollectionSize: map[string]int{
c0.OwnerUuid: c0.TotalSize,
c1.OwnerUuid: c1.TotalSize,
},
- BlockToReplication: map[blockdigest.BlockDigest]int{
+ BlockToReplication: map[blockdigest.DigestWithSize]int{
blockDigest1: 5,
blockDigest2: 8,
blockDigest3: 8,
},
- BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{
+ BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
blockDigest1: []string{c0.Uuid},
blockDigest2: []string{c0.Uuid, c1.Uuid},
blockDigest3: []string{c1.Uuid},
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go
index e1c9c29..c666337 100644
--- a/services/datamanager/keep/keep.go
+++ b/services/datamanager/keep/keep.go
@@ -10,7 +10,6 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
"git.curoverse.com/arvados.git/sdk/go/logger"
- "git.curoverse.com/arvados.git/sdk/go/manifest"
"git.curoverse.com/arvados.git/services/datamanager/loggerutil"
"io/ioutil"
"log"
@@ -29,20 +28,18 @@ type ServerAddress struct {
// Info about a particular block returned by the server
type BlockInfo struct {
- Digest blockdigest.BlockDigest
- Size int
+ Digest blockdigest.DigestWithSize
Mtime int64 // TODO(misha): Replace this with a timestamp.
}
// Info about a specified block given by a server
type BlockServerInfo struct {
ServerIndex int
- Size int
Mtime int64 // TODO(misha): Replace this with a timestamp.
}
type ServerContents struct {
- BlockDigestToInfo map[blockdigest.BlockDigest]BlockInfo
+ BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
}
type ServerResponse struct {
@@ -55,7 +52,7 @@ type ReadServers struct {
KeepServerIndexToAddress []ServerAddress
KeepServerAddressToIndex map[ServerAddress]int
ServerToContents map[ServerAddress]ServerContents
- BlockToServers map[blockdigest.BlockDigest][]BlockServerInfo
+ BlockToServers map[blockdigest.DigestWithSize][]BlockServerInfo
BlockReplicationCounts map[int]int
}
@@ -192,7 +189,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
}
results.ServerToContents = make(map[ServerAddress]ServerContents)
- results.BlockToServers = make(map[blockdigest.BlockDigest][]BlockServerInfo)
+ results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
// Read all the responses
for i := range sdkResponse.KeepServers {
@@ -207,7 +204,6 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
results.BlockToServers[blockInfo.Digest] = append(
results.BlockToServers[blockInfo.Digest],
BlockServerInfo{ServerIndex: serverIndex,
- Size: blockInfo.Size,
Mtime: blockInfo.Mtime})
}
}
@@ -331,7 +327,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
response.Address = keepServer
response.Contents.BlockDigestToInfo =
- make(map[blockdigest.BlockDigest]BlockInfo)
+ make(map[blockdigest.DigestWithSize]BlockInfo)
scanner := bufio.NewScanner(resp.Body)
numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
for scanner.Scan() {
@@ -348,33 +344,8 @@ func ReadServerResponse(arvLogger *logger.Logger,
if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
// This server returned multiple lines containing the same block digest.
numDuplicates += 1
- if storedBlock.Size != blockInfo.Size {
- numSizeDisagreements += 1
- // TODO(misha): Consider failing here.
- message := fmt.Sprintf("Saw different sizes for the same block "+
- "on %s: %+v %+v",
- keepServer.String(),
- storedBlock,
- blockInfo)
- log.Println(message)
- if arvLogger != nil {
- arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
- keepInfo := logger.GetOrCreateMap(p, "keep_info")
- serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
- var error_list []string
- read_error_list, has_list := serverInfo["error_list"]
- if has_list {
- error_list = read_error_list.([]string)
- } // If we didn't have the list, error_list is already an empty list
- serverInfo["error_list"] = append(error_list, message)
- })
- }
- }
- // Keep the block that is bigger, or the block that's newer in
- // the case of a size tie.
- if storedBlock.Size < blockInfo.Size ||
- (storedBlock.Size == blockInfo.Size &&
- storedBlock.Mtime < blockInfo.Mtime) {
+ // Keep the block that's newer.
+ if storedBlock.Mtime < blockInfo.Mtime {
response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
}
} else {
@@ -419,8 +390,8 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
tokens)
}
- var locator manifest.BlockLocator
- if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+ var locator blockdigest.BlockLocator
+ if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
err = fmt.Errorf("%v Received error while parsing line \"%s\"",
err, indexLine)
return
@@ -436,8 +407,9 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
if err != nil {
return
}
- blockInfo.Digest = locator.Digest
- blockInfo.Size = locator.Size
+ blockInfo.Digest =
+ blockdigest.DigestWithSize{Digest: locator.Digest,
+ Size: uint32(locator.Size)}
return
}
diff --git a/services/datamanager/summary/pull_list.go b/services/datamanager/summary/pull_list.go
index 726f2c6..e9bd5d1 100644
--- a/services/datamanager/summary/pull_list.go
+++ b/services/datamanager/summary/pull_list.go
@@ -14,13 +14,11 @@ import (
"strings"
)
-type Locator struct {
- Digest blockdigest.BlockDigest
- // TODO(misha): Add size field to the Locator (and MarshalJSON() below)
-}
+type Locator blockdigest.DigestWithSize
func (l Locator) MarshalJSON() ([]byte, error) {
- return []byte("\"" + l.Digest.String() + "\""), nil
+ //return []byte("\"" + l.Digest.String() + "\""), nil
+ return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
}
// One entry in the Pull List
@@ -32,15 +30,24 @@ type PullRequest struct {
// The Pull List for a particular server
type PullList []PullRequest
-// PullListByDigest implements sort.Interface for PullList based on
+// PullListByLocator implements sort.Interface for PullList based on
// the Digest.
-type PullListByDigest PullList
+type PullListByLocator PullList
-func (a PullListByDigest) Len() int { return len(a) }
-func (a PullListByDigest) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a PullListByDigest) Less(i, j int) bool {
+func (a PullListByLocator) Len() int { return len(a) }
+func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a PullListByLocator) Less(i, j int) bool {
di, dj := a[i].Locator.Digest, a[j].Locator.Digest
- return di.H < dj.H || (di.H == dj.H && di.L < dj.L)
+ if di.H < dj.H {
+ return true
+ } else if di.H == dj.H {
+ if di.L < dj.L {
+ return true
+ } else if di.L == dj.L {
+ return a[i].Locator.Size < a[j].Locator.Size
+ }
+ }
+ return false
}
// For a given under-replicated block, this structure represents which
@@ -55,7 +62,7 @@ type PullServers struct {
// each under-replicated block.
func ComputePullServers(kc *keepclient.KeepClient,
keepServerInfo *keep.ReadServers,
- blockToDesiredReplication map[blockdigest.BlockDigest]int,
+ blockToDesiredReplication map[blockdigest.DigestWithSize]int,
underReplicated BlockSet) (m map[Locator]PullServers) {
m = map[Locator]PullServers{}
// We use CanonicalString to avoid filling memory with dupicate
@@ -91,7 +98,7 @@ func ComputePullServers(kc *keepclient.KeepClient,
roots := keepclient.NewRootSorter(kc.LocalRoots(),
block.String()).GetSortedRoots()
- l := Locator{Digest: block}
+ l := Locator(block)
m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
roots, numCopiesMissing)
}
diff --git a/services/datamanager/summary/pull_list_test.go b/services/datamanager/summary/pull_list_test.go
index 692af5c..9628a32 100644
--- a/services/datamanager/summary/pull_list_test.go
+++ b/services/datamanager/summary/pull_list_test.go
@@ -29,13 +29,13 @@ func stringSet(slice ...string) (m map[string]struct{}) {
func (s *MySuite) TestPullListPrintsJSONCorrectly(c *C) {
pl := PullList{PullRequest{
- Locator: Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)},
+ Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
"keep1.qr1hi.arvadosapi.com:25108"}}}
b, err := json.Marshal(pl)
c.Assert(err, IsNil)
- expectedOutput := `[{"locator":"0000000000000000000000000badbeef",` +
+ expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
`"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
`"keep1.qr1hi.arvadosapi.com:25108"]}]`
c.Check(string(b), Equals, expectedOutput)
@@ -129,10 +129,10 @@ func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (
}
for _, v := range obtained {
- sort.Sort(PullListByDigest(v))
+ sort.Sort(PullListByLocator(v))
}
for _, v := range expected {
- sort.Sort(PullListByDigest(v))
+ sort.Sort(PullListByLocator(v))
}
return DeepEquals.Check(params, names)
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 8621d55..efb6061 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -11,10 +11,10 @@ import (
"sort"
)
-type BlockSet map[blockdigest.BlockDigest]struct{}
+type BlockSet map[blockdigest.DigestWithSize]struct{}
// Adds a single block to the set.
-func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
+func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
bs[digest] = struct{}{}
}
@@ -112,7 +112,7 @@ func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
// Adds a block to the set for a given replication level.
func (rlbs ReplicationLevelBlockSetMap) Insert(
repLevels ReplicationLevels,
- block blockdigest.BlockDigest) {
+ block blockdigest.DigestWithSize) {
rlbs.GetOrCreate(repLevels).Insert(block)
}
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
index 04ca5a5..ea76df4 100644
--- a/services/datamanager/summary/summary_test.go
+++ b/services/datamanager/summary/summary_test.go
@@ -12,7 +12,7 @@ import (
func BlockSetFromSlice(digests []int) (bs BlockSet) {
bs = make(BlockSet)
for _, digest := range digests {
- bs.Insert(blockdigest.MakeTestBlockDigest(digest))
+ bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
}
return
}
@@ -46,9 +46,9 @@ func SummarizeReplication(readCollections collection.ReadCollections,
// Takes a map from block digest to replication level and represents
// it in a keep.ReadServers structure.
func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
- rs.BlockToServers = make(map[blockdigest.BlockDigest][]keep.BlockServerInfo)
+ rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
for digest, replication := range digestToReplication {
- rs.BlockToServers[blockdigest.MakeTestBlockDigest(digest)] =
+ rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
make([]keep.BlockServerInfo, replication)
}
return
@@ -66,10 +66,10 @@ func VerifyToCollectionIndexSet(
expected := CollectionIndexSetFromSlice(expectedCollections)
rc := collection.ReadCollections{
- BlockToCollectionIndices: map[blockdigest.BlockDigest][]int{},
+ BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
}
for digest, indices := range blockToCollectionIndices {
- rc.BlockToCollectionIndices[blockdigest.MakeTestBlockDigest(digest)] = indices
+ rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
}
returned := make(CollectionIndexSet)
commit 47d1bdc0960af5bfc8f2793c352f60483539c389
Author: mishaz <misha at curoverse.com>
Date: Tue Jun 2 22:42:59 2015 +0000
Added string to error message to help with debugging.
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go
index 2c4eff5..e1c9c29 100644
--- a/services/datamanager/keep/keep.go
+++ b/services/datamanager/keep/keep.go
@@ -421,6 +421,8 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
var locator manifest.BlockLocator
if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+ err = fmt.Errorf("%v Received error while parsing line \"%s\"",
+ err, indexLine)
return
}
if len(locator.Hints) > 0 {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list