Added size to block locators, touching most of the code.
authormishaz <misha@curoverse.com>
Thu, 4 Jun 2015 21:11:01 +0000 (21:11 +0000)
committermishaz <misha@curoverse.com>
Thu, 4 Jun 2015 21:11:01 +0000 (21:11 +0000)
Moved BlockLocator (and associated parsing code) from sdk/go/manifest to sdk/go/blockdigest.
Added some helper methods, some for testing.
Some gofmt cleanup.

12 files changed:
sdk/go/blockdigest/blockdigest.go
sdk/go/blockdigest/blockdigest_test.go
sdk/go/blockdigest/testing.go
sdk/go/manifest/manifest.go
sdk/go/manifest/manifest_test.go
services/datamanager/collection/collection.go
services/datamanager/collection/collection_test.go
services/datamanager/keep/keep.go
services/datamanager/summary/pull_list.go
services/datamanager/summary/pull_list_test.go
services/datamanager/summary/summary.go
services/datamanager/summary/summary_test.go

index 5c29b902f2727b7353e66d1f5da2fe934eb4e8e6..d2f1c60ba93889614de18756dcf3ed6608f0b5fa 100644 (file)
@@ -4,9 +4,14 @@ package blockdigest
 import (
        "fmt"
        "log"
+       "regexp"
        "strconv"
+       "strings"
 )
 
+var LocatorPattern = regexp.MustCompile(
+       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
+
 // Stores a Block Locator Digest compactly, up to 128 bits.
 // Can be used as a map key.
 type BlockDigest struct {
@@ -14,10 +19,25 @@ type BlockDigest struct {
        L uint64
 }
 
+type DigestWithSize struct {
+       Digest BlockDigest
+       Size   uint32
+}
+
+type BlockLocator struct {
+       Digest BlockDigest
+       Size   int
+       Hints  []string
+}
+
 func (d BlockDigest) String() string {
        return fmt.Sprintf("%016x%016x", d.H, d.L)
 }
 
+func (w DigestWithSize) String() string {
+       return fmt.Sprintf("%s+%d", w.Digest.String(), w.Size)
+}
+
 // Will create a new BlockDigest unless an error is encountered.
 func FromString(s string) (dig BlockDigest, err error) {
        if len(s) != 32 {
@@ -46,3 +66,34 @@ func AssertFromString(s string) BlockDigest {
        }
        return d
 }
+
+func IsBlockLocator(s string) bool {
+       return LocatorPattern.MatchString(s)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+       if !LocatorPattern.MatchString(s) {
+               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+                       "\"%s\".",
+                       s,
+                       LocatorPattern.String())
+       } else {
+               tokens := strings.Split(s, "+")
+               var blockSize int64
+               var blockDigest BlockDigest
+               // We expect both of the following to succeed since LocatorPattern
+               // restricts the strings appropriately.
+               blockDigest, err = FromString(tokens[0])
+               if err != nil {
+                       return
+               }
+               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+               if err != nil {
+                       return
+               }
+               b.Digest = blockDigest
+               b.Size = int(blockSize)
+               b.Hints = tokens[2:]
+       }
+       return
+}
index 068a1385aef5e4f714e5282235da621fb4e970b4..017aaa47101c9a2e1971e7694abac24bc77f1e06 100644 (file)
@@ -2,10 +2,37 @@ package blockdigest
 
 import (
        "fmt"
+       "runtime"
        "strings"
        "testing"
 )
 
+func getStackTrace() string {
+       buf := make([]byte, 1000)
+       bytes_written := runtime.Stack(buf, false)
+       return "Stack Trace:\n" + string(buf[:bytes_written])
+}
+
+func expectEqual(t *testing.T, actual interface{}, expected interface{}) {
+       if actual != expected {
+               t.Fatalf("Expected %v but received %v instead. %s",
+                       expected,
+                       actual,
+                       getStackTrace())
+       }
+}
+
+func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
+       if len(actual) != len(expected) {
+               t.Fatalf("Expected %v (length %d), but received %v (length %d) instead. %s", expected, len(expected), actual, len(actual), getStackTrace())
+       }
+       for i := range actual {
+               if actual[i] != expected[i] {
+                       t.Fatalf("Expected %v but received %v instead (first disagreement at position %d). %s", expected, actual, i, getStackTrace())
+               }
+       }
+}
+
 func expectValidDigestString(t *testing.T, s string) {
        bd, err := FromString(s)
        if err != nil {
@@ -13,7 +40,7 @@ func expectValidDigestString(t *testing.T, s string) {
        }
 
        expected := strings.ToLower(s)
-               
+
        if expected != bd.String() {
                t.Fatalf("Expected %s to be returned by FromString(%s).String() but instead we received %s", expected, s, bd.String())
        }
@@ -26,6 +53,26 @@ func expectInvalidDigestString(t *testing.T, s string) {
        }
 }
 
+func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+       expectEqual(t, actual.Digest, expected.Digest)
+       expectEqual(t, actual.Size, expected.Size)
+       expectStringSlicesEqual(t, actual.Hints, expected.Hints)
+}
+
+func expectLocatorPatternMatch(t *testing.T, s string) {
+       if !LocatorPattern.MatchString(s) {
+               t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
+                       s)
+       }
+}
+
+func expectLocatorPatternFail(t *testing.T, s string) {
+       if LocatorPattern.MatchString(s) {
+               t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
+                       s)
+       }
+}
+
 func TestValidDigestStrings(t *testing.T) {
        expectValidDigestString(t, "01234567890123456789abcdefabcdef")
        expectValidDigestString(t, "01234567890123456789ABCDEFABCDEF")
@@ -49,7 +96,7 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
        input := "01234567890123456789abcdefabcdef"
        prettyPrinted := fmt.Sprintf("%v", AssertFromString(input))
        if prettyPrinted != input {
-               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
                        "\"%s\", but instead it was printed as %s",
                        input, input, prettyPrinted)
        }
@@ -58,13 +105,13 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
 func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
        input := "01234567890123456789abcdefabcdef"
        value := 42
-       nested := struct{
+       nested := struct {
                // Fun trivia fact: If this field was called "digest" instead of
                // "Digest", then it would not be exported and String() would
                // never get called on it and our output would look very
                // different.
                Digest BlockDigest
-               value int
+               value  int
        }{
                AssertFromString(input),
                value,
@@ -72,8 +119,44 @@ func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
        prettyPrinted := fmt.Sprintf("%+v", nested)
        expected := fmt.Sprintf("{Digest:%s value:%d}", input, value)
        if prettyPrinted != expected {
-               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
                        "\"%s\", but instead it was printed as %s",
                        input, expected, prettyPrinted)
        }
 }
+
+func TestLocatorPatternBasic(t *testing.T) {
+       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
+       expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
+       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
+       expectLocatorPatternMatch(t,
+               "12345678901234567890123456789012+12345+A1+B123wxyz@_-")
+       expectLocatorPatternMatch(t,
+               "12345678901234567890123456789012+12345+A1+B123wxyz@_-+C@")
+
+       expectLocatorPatternFail(t, "12345678901234567890123456789012")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+")
+       expectLocatorPatternFail(t, "1234567890123456789012345678901+12345")
+       expectLocatorPatternFail(t, "123456789012345678901234567890123+12345")
+       expectLocatorPatternFail(t, "g2345678901234abcdefababdeffdfdf+12345")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345 ")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1A")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+a1")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+B")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A+B2")
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+       if err != nil {
+               t.Fatalf("Unexpected error parsing block locator: %v", err)
+       }
+       expectBlockLocator(t, b, BlockLocator{Digest: AssertFromString("365f83f5f808896ec834c8b595288735"),
+               Size: 2310,
+               Hints: []string{"K@qr1hi",
+                       "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
+}
index eae3f3ce2d6cbfbcaef788affef25d0aa2e23082..40f08ce63d0bc9a4201f7f66054b817c0d764f88 100644 (file)
@@ -2,11 +2,15 @@
 
 package blockdigest
 
-import (
-       "fmt"
-)
-
 // Just used for testing when we need some distinct BlockDigests
 func MakeTestBlockDigest(i int) BlockDigest {
-       return AssertFromString(fmt.Sprintf("%032x", i))
+       return BlockDigest{L: uint64(i)}
+}
+
+func MakeTestDigestSpecifySize(i int, s int) DigestWithSize {
+       return DigestWithSize{Digest: BlockDigest{L: uint64(i)}, Size: uint32(s)}
+}
+
+func MakeTestDigestWithSize(i int) DigestWithSize {
+       return MakeTestDigestSpecifySize(i, i)
 }
index f6698c67d2f2436ff38b9e4c6f641e62f3b3110e..4e816cd73b30abbb7afce9f4b51d3767a897cfa8 100644 (file)
@@ -5,27 +5,15 @@
 package manifest
 
 import (
-       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "log"
-       "regexp"
-       "strconv"
        "strings"
 )
 
-var LocatorPattern = regexp.MustCompile(
-       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
-
 type Manifest struct {
        Text string
 }
 
-type BlockLocator struct {
-       Digest blockdigest.BlockDigest
-       Size   int
-       Hints  []string
-}
-
 // Represents a single line from a manifest.
 type ManifestStream struct {
        StreamName string
@@ -33,40 +21,13 @@ type ManifestStream struct {
        Files      []string
 }
 
-func ParseBlockLocator(s string) (b BlockLocator, err error) {
-       if !LocatorPattern.MatchString(s) {
-               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
-                       "\"%s\".",
-                       s,
-                       LocatorPattern.String())
-       } else {
-               tokens := strings.Split(s, "+")
-               var blockSize int64
-               var blockDigest blockdigest.BlockDigest
-               // We expect both of the following to succeed since LocatorPattern
-               // restricts the strings appropriately.
-               blockDigest, err = blockdigest.FromString(tokens[0])
-               if err != nil {
-                       return
-               }
-               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
-               if err != nil {
-                       return
-               }
-               b.Digest = blockDigest
-               b.Size = int(blockSize)
-               b.Hints = tokens[2:]
-       }
-       return
-}
-
 func parseManifestStream(s string) (m ManifestStream) {
        tokens := strings.Split(s, " ")
        m.StreamName = tokens[0]
        tokens = tokens[1:]
        var i int
        for i = range tokens {
-               if !LocatorPattern.MatchString(tokens[i]) {
+               if !blockdigest.IsBlockLocator(tokens[i]) {
                        break
                }
        }
@@ -100,12 +61,12 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
-func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator {
-       blockChannel := make(chan BlockLocator)
+func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator {
+       blockChannel := make(chan blockdigest.BlockLocator)
        go func(streamChannel <-chan ManifestStream) {
                for m := range streamChannel {
                        for _, block := range m.Blocks {
-                               if b, err := ParseBlockLocator(block); err == nil {
+                               if b, err := blockdigest.ParseBlockLocator(block); err == nil {
                                        blockChannel <- b
                                } else {
                                        log.Printf("ERROR: Failed to parse block: %v", err)
index c1bfb14e1460757ed3991dc095e09c5fef46372e..8cfe3d907721e4f7d33048dfa16ef91e38dec12d 100644 (file)
@@ -7,14 +7,14 @@ import (
        "testing"
 )
 
-func getStackTrace() (string) {
+func getStackTrace() string {
        buf := make([]byte, 1000)
        bytes_written := runtime.Stack(buf, false)
        return "Stack Trace:\n" + string(buf[:bytes_written])
 }
 
 func expectFromChannel(t *testing.T, c <-chan string, expected string) {
-       actual, ok := <- c
+       actual, ok := <-c
        if !ok {
                t.Fatalf("Expected to receive %s but channel was closed. %s",
                        expected,
@@ -29,7 +29,7 @@ func expectFromChannel(t *testing.T, c <-chan string, expected string) {
 }
 
 func expectChannelClosed(t *testing.T, c <-chan interface{}) {
-       received, ok := <- c
+       received, ok := <-c
        if ok {
                t.Fatalf("Expected channel to be closed, but received %v instead. %s",
                        received,
@@ -63,67 +63,17 @@ func expectManifestStream(t *testing.T, actual ManifestStream, expected Manifest
        expectStringSlicesEqual(t, actual.Files, expected.Files)
 }
 
-func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
        expectEqual(t, actual.Digest, expected.Digest)
        expectEqual(t, actual.Size, expected.Size)
        expectStringSlicesEqual(t, actual.Hints, expected.Hints)
 }
 
-func expectLocatorPatternMatch(t *testing.T, s string) {
-       if !LocatorPattern.MatchString(s) {
-               t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
-                       s)
-       }
-}
-
-func expectLocatorPatternFail(t *testing.T, s string) {
-       if LocatorPattern.MatchString(s) {
-               t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
-                       s)
-       }
-}
-
-func TestLocatorPatternBasic(t *testing.T) {
-       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
-       expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
-       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
-       expectLocatorPatternMatch(t,
-               "12345678901234567890123456789012+12345+A1+B123wxyz@_-")
-       expectLocatorPatternMatch(t,
-               "12345678901234567890123456789012+12345+A1+B123wxyz@_-+C@")
-
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+")
-       expectLocatorPatternFail(t,  "1234567890123456789012345678901+12345")
-       expectLocatorPatternFail(t,  "123456789012345678901234567890123+12345")
-       expectLocatorPatternFail(t,  "g2345678901234abcdefababdeffdfdf+12345")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345 ")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1A")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+a1")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+B")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A+B2")
-}
-
 func TestParseManifestStreamSimple(t *testing.T) {
        m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
        expectManifestStream(t, m, ManifestStream{StreamName: ".",
                Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-               Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
-}
-
-func TestParseBlockLocatorSimple(t *testing.T) {
-       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
-       if err != nil {
-               t.Fatalf("Unexpected error parsing block locator: %v", err)
-       }
-       expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
-               Size: 2310,
-               Hints: []string{"K@qr1hi",
-                       "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
+               Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -139,9 +89,9 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
                firstStream,
                ManifestStream{StreamName: ".",
                        Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
-                       Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+                       Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
-       received, ok := <- streamIter
+       received, ok := <-streamIter
        if ok {
                t.Fatalf("Expected streamIter to be closed, but received %v instead.",
                        received)
@@ -159,20 +109,20 @@ func TestBlockIterLongManifest(t *testing.T) {
        firstBlock := <-blockChannel
        expectBlockLocator(t,
                firstBlock,
-               BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
-                       Size: 15693477,
+               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
+                       Size:  15693477,
                        Hints: []string{"E2866e643690156651c03d876e638e674dcd79475@5441920c"}})
        blocksRead := 1
-       var lastBlock BlockLocator
+       var lastBlock blockdigest.BlockLocator
        for lastBlock = range blockChannel {
                //log.Printf("Blocks Read: %d", blocksRead)
-               blocksRead++
+               blocksRead++
        }
        expectEqual(t, blocksRead, 853)
 
        expectBlockLocator(t,
                lastBlock,
-               BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
-                       Size: 31367794,
+               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
+                       Size:  31367794,
                        Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
 }
index ddc4f953d5cb9ae1865c32688ff6b9066c6c5f68..ed6df9dc5c8cdb0a49e68fcd0b4a516f766581ea 100644 (file)
@@ -42,10 +42,10 @@ type ReadCollections struct {
        ReadAllCollections       bool
        UuidToCollection         map[string]Collection
        OwnerToCollectionSize    map[string]int
-       BlockToReplication       map[blockdigest.BlockDigest]int
+       BlockToReplication       map[blockdigest.DigestWithSize]int
        CollectionUuidToIndex    map[string]int
        CollectionIndexToUuid    []string
-       BlockToCollectionIndices map[blockdigest.BlockDigest][]int
+       BlockToCollectionIndices map[blockdigest.DigestWithSize][]int
 }
 
 type GetCollectionsParams struct {
@@ -283,11 +283,11 @@ func ProcessCollections(arvLogger *logger.Logger,
 
 func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
        readCollections.OwnerToCollectionSize = make(map[string]int)
-       readCollections.BlockToReplication = make(map[blockdigest.BlockDigest]int)
+       readCollections.BlockToReplication = make(map[blockdigest.DigestWithSize]int)
        numCollections := len(readCollections.UuidToCollection)
        readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
        readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
-       readCollections.BlockToCollectionIndices = make(map[blockdigest.BlockDigest][]int)
+       readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
 
        for _, coll := range readCollections.UuidToCollection {
                collectionIndex := len(readCollections.CollectionIndexToUuid)
@@ -298,12 +298,14 @@ func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
                readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
                        readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
 
-               for block, _ := range coll.BlockDigestToSize {
-                       readCollections.BlockToCollectionIndices[block] =
-                               append(readCollections.BlockToCollectionIndices[block], collectionIndex)
-                       storedReplication := readCollections.BlockToReplication[block]
+               for block, size := range coll.BlockDigestToSize {
+                       locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
+                       readCollections.BlockToCollectionIndices[locator] =
+                               append(readCollections.BlockToCollectionIndices[locator],
+                                       collectionIndex)
+                       storedReplication := readCollections.BlockToReplication[locator]
                        if coll.ReplicationLevel > storedReplication {
-                               readCollections.BlockToReplication[block] = coll.ReplicationLevel
+                               readCollections.BlockToReplication[locator] = coll.ReplicationLevel
                        }
                }
        }
index 3dc8e37dffb60c65a38884a902a6b8d4766f99ba..4af5d4c8c4ae06896f639a6331e1bdfea347fa51 100644 (file)
@@ -21,8 +21,8 @@ var _ = Suite(&MySuite{})
 // BlockToCollectionUuids.
 type ExpectedSummary struct {
        OwnerToCollectionSize  map[string]int
-       BlockToReplication     map[blockdigest.BlockDigest]int
-       BlockToCollectionUuids map[blockdigest.BlockDigest][]string
+       BlockToReplication     map[blockdigest.DigestWithSize]int
+       BlockToCollectionUuids map[blockdigest.DigestWithSize][]string
 }
 
 func CompareSummarizedReadCollections(c *C,
@@ -36,7 +36,7 @@ func CompareSummarizedReadCollections(c *C,
                expected.BlockToReplication)
 
        summarizedBlockToCollectionUuids :=
-               make(map[blockdigest.BlockDigest]map[string]struct{})
+               make(map[blockdigest.DigestWithSize]map[string]struct{})
        for digest, indices := range summarized.BlockToCollectionIndices {
                uuidSet := make(map[string]struct{})
                summarizedBlockToCollectionUuids[digest] = uuidSet
@@ -46,7 +46,7 @@ func CompareSummarizedReadCollections(c *C,
        }
 
        expectedBlockToCollectionUuids :=
-               make(map[blockdigest.BlockDigest]map[string]struct{})
+               make(map[blockdigest.DigestWithSize]map[string]struct{})
        for digest, uuidSlice := range expected.BlockToCollectionUuids {
                uuidSet := make(map[string]struct{})
                expectedBlockToCollectionUuids[digest] = uuidSet
@@ -69,13 +69,13 @@ func (s *MySuite) TestSummarizeSimple(checker *C) {
 
        c := rc.UuidToCollection["col0"]
 
-       blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-       blockDigest2 := blockdigest.MakeTestBlockDigest(2)
+       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
 
        expected := ExpectedSummary{
                OwnerToCollectionSize:  map[string]int{c.OwnerUuid: c.TotalSize},
-               BlockToReplication:     map[blockdigest.BlockDigest]int{blockDigest1: 5, blockDigest2: 5},
-               BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+               BlockToReplication:     map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
+               BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
        }
 
        CompareSummarizedReadCollections(checker, rc, expected)
@@ -98,21 +98,21 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
        c0 := rc.UuidToCollection["col0"]
        c1 := rc.UuidToCollection["col1"]
 
-       blockDigest1 := blockdigest.MakeTestBlockDigest(1)
-       blockDigest2 := blockdigest.MakeTestBlockDigest(2)
-       blockDigest3 := blockdigest.MakeTestBlockDigest(3)
+       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
+       blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
 
        expected := ExpectedSummary{
                OwnerToCollectionSize: map[string]int{
                        c0.OwnerUuid: c0.TotalSize,
                        c1.OwnerUuid: c1.TotalSize,
                },
-               BlockToReplication: map[blockdigest.BlockDigest]int{
+               BlockToReplication: map[blockdigest.DigestWithSize]int{
                        blockDigest1: 5,
                        blockDigest2: 8,
                        blockDigest3: 8,
                },
-               BlockToCollectionUuids: map[blockdigest.BlockDigest][]string{
+               BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
                        blockDigest1: []string{c0.Uuid},
                        blockDigest2: []string{c0.Uuid, c1.Uuid},
                        blockDigest3: []string{c1.Uuid},
index e1c9c29214114c2a816c09d2f117139da07f01da..c666337d0dd98da68b1a21e24dd06304b80f4930 100644 (file)
@@ -10,7 +10,6 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
        "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
        "io/ioutil"
        "log"
@@ -29,20 +28,18 @@ type ServerAddress struct {
 
 // Info about a particular block returned by the server
 type BlockInfo struct {
-       Digest blockdigest.BlockDigest
-       Size   int
+       Digest blockdigest.DigestWithSize
        Mtime  int64 // TODO(misha): Replace this with a timestamp.
 }
 
 // Info about a specified block given by a server
 type BlockServerInfo struct {
        ServerIndex int
-       Size        int
        Mtime       int64 // TODO(misha): Replace this with a timestamp.
 }
 
 type ServerContents struct {
-       BlockDigestToInfo map[blockdigest.BlockDigest]BlockInfo
+       BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
 }
 
 type ServerResponse struct {
@@ -55,7 +52,7 @@ type ReadServers struct {
        KeepServerIndexToAddress []ServerAddress
        KeepServerAddressToIndex map[ServerAddress]int
        ServerToContents         map[ServerAddress]ServerContents
-       BlockToServers           map[blockdigest.BlockDigest][]BlockServerInfo
+       BlockToServers           map[blockdigest.DigestWithSize][]BlockServerInfo
        BlockReplicationCounts   map[int]int
 }
 
@@ -192,7 +189,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
        }
 
        results.ServerToContents = make(map[ServerAddress]ServerContents)
-       results.BlockToServers = make(map[blockdigest.BlockDigest][]BlockServerInfo)
+       results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
 
        // Read all the responses
        for i := range sdkResponse.KeepServers {
@@ -207,7 +204,6 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
                        results.BlockToServers[blockInfo.Digest] = append(
                                results.BlockToServers[blockInfo.Digest],
                                BlockServerInfo{ServerIndex: serverIndex,
-                                       Size:  blockInfo.Size,
                                        Mtime: blockInfo.Mtime})
                }
        }
@@ -331,7 +327,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
 
        response.Address = keepServer
        response.Contents.BlockDigestToInfo =
-               make(map[blockdigest.BlockDigest]BlockInfo)
+               make(map[blockdigest.DigestWithSize]BlockInfo)
        scanner := bufio.NewScanner(resp.Body)
        numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
        for scanner.Scan() {
@@ -348,33 +344,8 @@ func ReadServerResponse(arvLogger *logger.Logger,
                if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
                        // This server returned multiple lines containing the same block digest.
                        numDuplicates += 1
-                       if storedBlock.Size != blockInfo.Size {
-                               numSizeDisagreements += 1
-                               // TODO(misha): Consider failing here.
-                               message := fmt.Sprintf("Saw different sizes for the same block "+
-                                       "on %s: %+v %+v",
-                                       keepServer.String(),
-                                       storedBlock,
-                                       blockInfo)
-                               log.Println(message)
-                               if arvLogger != nil {
-                                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                                               keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                                               serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
-                                               var error_list []string
-                                               read_error_list, has_list := serverInfo["error_list"]
-                                               if has_list {
-                                                       error_list = read_error_list.([]string)
-                                               } // If we didn't have the list, error_list is already an empty list
-                                               serverInfo["error_list"] = append(error_list, message)
-                                       })
-                               }
-                       }
-                       // Keep the block that is bigger, or the block that's newer in
-                       // the case of a size tie.
-                       if storedBlock.Size < blockInfo.Size ||
-                               (storedBlock.Size == blockInfo.Size &&
-                                       storedBlock.Mtime < blockInfo.Mtime) {
+                       // Keep the block that's newer.
+                       if storedBlock.Mtime < blockInfo.Mtime {
                                response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
                        }
                } else {
@@ -419,8 +390,8 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
                        tokens)
        }
 
-       var locator manifest.BlockLocator
-       if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+       var locator blockdigest.BlockLocator
+       if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
                err = fmt.Errorf("%v Received error while parsing line \"%s\"",
                        err, indexLine)
                return
@@ -436,8 +407,9 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
        if err != nil {
                return
        }
-       blockInfo.Digest = locator.Digest
-       blockInfo.Size = locator.Size
+       blockInfo.Digest =
+               blockdigest.DigestWithSize{Digest: locator.Digest,
+                       Size: uint32(locator.Size)}
        return
 }
 
index 726f2c69a71a3ad9ecdd1564f5103d29412defa5..e9bd5d1877c4a931ba10841afddb726d2bdfc352 100644 (file)
@@ -14,13 +14,11 @@ import (
        "strings"
 )
 
-type Locator struct {
-       Digest blockdigest.BlockDigest
-       // TODO(misha): Add size field to the Locator (and MarshalJSON() below)
-}
+type Locator blockdigest.DigestWithSize
 
 func (l Locator) MarshalJSON() ([]byte, error) {
-       return []byte("\"" + l.Digest.String() + "\""), nil
+       //return []byte("\"" + l.Digest.String() + "\""), nil
+       return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
 }
 
 // One entry in the Pull List
@@ -32,15 +30,24 @@ type PullRequest struct {
 // The Pull List for a particular server
 type PullList []PullRequest
 
-// PullListByDigest implements sort.Interface for PullList based on
+// PullListByLocator implements sort.Interface for PullList based on
 // the Digest.
-type PullListByDigest PullList
+type PullListByLocator PullList
 
-func (a PullListByDigest) Len() int      { return len(a) }
-func (a PullListByDigest) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a PullListByDigest) Less(i, j int) bool {
+func (a PullListByLocator) Len() int      { return len(a) }
+func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a PullListByLocator) Less(i, j int) bool {
        di, dj := a[i].Locator.Digest, a[j].Locator.Digest
-       return di.H < dj.H || (di.H == dj.H && di.L < dj.L)
+       if di.H < dj.H {
+               return true
+       } else if di.H == dj.H {
+               if di.L < dj.L {
+                       return true
+               } else if di.L == dj.L {
+                       return a[i].Locator.Size < a[j].Locator.Size
+               }
+       }
+       return false
 }
 
 // For a given under-replicated block, this structure represents which
@@ -55,7 +62,7 @@ type PullServers struct {
 // each under-replicated block.
 func ComputePullServers(kc *keepclient.KeepClient,
        keepServerInfo *keep.ReadServers,
-       blockToDesiredReplication map[blockdigest.BlockDigest]int,
+       blockToDesiredReplication map[blockdigest.DigestWithSize]int,
        underReplicated BlockSet) (m map[Locator]PullServers) {
        m = map[Locator]PullServers{}
        // We use CanonicalString to avoid filling memory with dupicate
@@ -91,7 +98,7 @@ func ComputePullServers(kc *keepclient.KeepClient,
                                roots := keepclient.NewRootSorter(kc.LocalRoots(),
                                        block.String()).GetSortedRoots()
 
-                               l := Locator{Digest: block}
+                               l := Locator(block)
                                m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
                                        roots, numCopiesMissing)
                        }
index 692af5c4b900b9c5f04919063bf37f966399d1dd..9628a32018936bcc157e44b429b351bdaf218f56 100644 (file)
@@ -29,13 +29,13 @@ func stringSet(slice ...string) (m map[string]struct{}) {
 
 func (s *MySuite) TestPullListPrintsJSONCorrectly(c *C) {
        pl := PullList{PullRequest{
-               Locator: Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)},
+               Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
                Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
                        "keep1.qr1hi.arvadosapi.com:25108"}}}
 
        b, err := json.Marshal(pl)
        c.Assert(err, IsNil)
-       expectedOutput := `[{"locator":"0000000000000000000000000badbeef",` +
+       expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
                `"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
                `"keep1.qr1hi.arvadosapi.com:25108"]}]`
        c.Check(string(b), Equals, expectedOutput)
@@ -129,10 +129,10 @@ func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (
        }
 
        for _, v := range obtained {
-               sort.Sort(PullListByDigest(v))
+               sort.Sort(PullListByLocator(v))
        }
        for _, v := range expected {
-               sort.Sort(PullListByDigest(v))
+               sort.Sort(PullListByLocator(v))
        }
 
        return DeepEquals.Check(params, names)
index 8621d550dbda9c5e6dab5c4481f0a57484daa328..efb6061c9aef4ca8ebbda8e4bde0e74c8f8d92c5 100644 (file)
@@ -11,10 +11,10 @@ import (
        "sort"
 )
 
-type BlockSet map[blockdigest.BlockDigest]struct{}
+type BlockSet map[blockdigest.DigestWithSize]struct{}
 
 // Adds a single block to the set.
-func (bs BlockSet) Insert(digest blockdigest.BlockDigest) {
+func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
        bs[digest] = struct{}{}
 }
 
@@ -112,7 +112,7 @@ func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
 // Adds a block to the set for a given replication level.
 func (rlbs ReplicationLevelBlockSetMap) Insert(
        repLevels ReplicationLevels,
-       block blockdigest.BlockDigest) {
+       block blockdigest.DigestWithSize) {
        rlbs.GetOrCreate(repLevels).Insert(block)
 }
 
index 04ca5a5c599277d3b043feb1fe328b77cf987965..ea76df4d34043ea1f706ca89a82b699b52c806ca 100644 (file)
@@ -12,7 +12,7 @@ import (
 func BlockSetFromSlice(digests []int) (bs BlockSet) {
        bs = make(BlockSet)
        for _, digest := range digests {
-               bs.Insert(blockdigest.MakeTestBlockDigest(digest))
+               bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
        }
        return
 }
@@ -46,9 +46,9 @@ func SummarizeReplication(readCollections collection.ReadCollections,
 // Takes a map from block digest to replication level and represents
 // it in a keep.ReadServers structure.
 func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
-       rs.BlockToServers = make(map[blockdigest.BlockDigest][]keep.BlockServerInfo)
+       rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
        for digest, replication := range digestToReplication {
-               rs.BlockToServers[blockdigest.MakeTestBlockDigest(digest)] =
+               rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
                        make([]keep.BlockServerInfo, replication)
        }
        return
@@ -66,10 +66,10 @@ func VerifyToCollectionIndexSet(
        expected := CollectionIndexSetFromSlice(expectedCollections)
 
        rc := collection.ReadCollections{
-               BlockToCollectionIndices: map[blockdigest.BlockDigest][]int{},
+               BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
        }
        for digest, indices := range blockToCollectionIndices {
-               rc.BlockToCollectionIndices[blockdigest.MakeTestBlockDigest(digest)] = indices
+               rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
        }
 
        returned := make(CollectionIndexSet)