X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a9034a1baae24357fa49e9dc4ef25922c1ec6a90..e1eabe76d75d783655c5e3513b9c6e0ffbfd9b52:/sdk/go/manifest/manifest.go diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go index 882b4ff45d..f6698c67d2 100644 --- a/sdk/go/manifest/manifest.go +++ b/sdk/go/manifest/manifest.go @@ -5,8 +5,8 @@ package manifest import ( - "bufio" "fmt" + "git.curoverse.com/arvados.git/sdk/go/blockdigest" "log" "regexp" "strconv" @@ -21,39 +21,46 @@ type Manifest struct { } type BlockLocator struct { - Digest string - Size int - Hints []string + Digest blockdigest.BlockDigest + Size int + Hints []string } -type ManifestLine struct { - StreamName string - Blocks []string - Files []string +// Represents a single line from a manifest. +type ManifestStream struct { + StreamName string + Blocks []string + Files []string } -func parseBlockLocator(s string) (b BlockLocator, err error) { +func ParseBlockLocator(s string) (b BlockLocator, err error) { if !LocatorPattern.MatchString(s) { - err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern " + + err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+ "\"%s\".", s, LocatorPattern.String()) } else { tokens := strings.Split(s, "+") var blockSize int64 - // We expect ParseInt to succeed since LocatorPattern restricts - // tokens[1] to contain exclusively digits. + var blockDigest blockdigest.BlockDigest + // We expect both of the following to succeed since LocatorPattern + // restricts the strings appropriately. + blockDigest, err = blockdigest.FromString(tokens[0]) + if err != nil { + return + } blockSize, err = strconv.ParseInt(tokens[1], 10, 0) - if err == nil { - b.Digest = tokens[0] - b.Size = int(blockSize) - b.Hints = tokens[2:] + if err != nil { + return } + b.Digest = blockDigest + b.Size = int(blockSize) + b.Hints = tokens[2:] } return } -func parseManifestLine(s string) (m ManifestLine) { +func parseManifestStream(s string) (m ManifestStream) { tokens := strings.Split(s, " ") m.StreamName = tokens[0] tokens = tokens[1:] @@ -68,30 +75,37 @@ func parseManifestLine(s string) (m ManifestLine) { return } -func (m *Manifest) LineIter() <-chan ManifestLine { - ch := make(chan ManifestLine) +func (m *Manifest) StreamIter() <-chan ManifestStream { + ch := make(chan ManifestStream) go func(input string) { - scanner := bufio.NewScanner(strings.NewReader(input)) - for scanner.Scan() { - // We parse one line at a time, to save effort if we only need - // the first few lines. - ch <- parseManifestLine(scanner.Text()) + // This slice holds the current line and the remainder of the + // manifest. We parse one line at a time, to save effort if we + // only need the first few lines. + lines := []string{"", input} + for { + lines = strings.SplitN(lines[1], "\n", 2) + if len(lines[0]) > 0 { + // Only parse non-blank lines + ch <- parseManifestStream(lines[0]) + } + if len(lines) == 1 { + break + } } close(ch) }(m.Text) return ch } - // Blocks may appear mulitple times within the same manifest if they // are used by multiple files. In that case this Iterator will output // the same block multiple times. func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator { blockChannel := make(chan BlockLocator) - go func(lineChannel <-chan ManifestLine) { - for m := range lineChannel { + go func(streamChannel <-chan ManifestStream) { + for m := range streamChannel { for _, block := range m.Blocks { - if b, err := parseBlockLocator(block); err == nil { + if b, err := ParseBlockLocator(block); err == nil { blockChannel <- b } else { log.Printf("ERROR: Failed to parse block: %v", err) @@ -99,6 +113,6 @@ func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator { } } close(blockChannel) - }(m.LineIter()) + }(m.StreamIter()) return blockChannel }