X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/efaf24041034cbc5de09d80283bd98f3e6434854..5e27876fa4d3faf3b973282bfb4f152c02345bdc:/sdk/go/manifest/manifest.go diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go index 404a1beda8..4e816cd73b 100644 --- a/sdk/go/manifest/manifest.go +++ b/sdk/go/manifest/manifest.go @@ -5,61 +5,29 @@ package manifest import ( - "bufio" - "fmt" + "git.curoverse.com/arvados.git/sdk/go/blockdigest" "log" - "regexp" - "strconv" "strings" ) -var LocatorPattern = regexp.MustCompile( - "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$") - type Manifest struct { Text string } -type BlockLocator struct { - Digest string - Size int - Hints []string -} - -type ManifestLine struct { - StreamName string - Blocks []string - Files []string -} - -func parseBlockLocator(s string) (b BlockLocator, err error) { - if !LocatorPattern.MatchString(s) { - err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern " + - "\"%s\".", - s, - LocatorPattern.String()) - } else { - tokens := strings.Split(s, "+") - var blockSize int64 - // We expect ParseInt to succeed since LocatorPattern restricts - // tokens[1] to contain exclusively digits. - blockSize, err = strconv.ParseInt(tokens[1], 10, 0) - if err == nil { - b.Digest = tokens[0] - b.Size = int(blockSize) - b.Hints = tokens[2:] - } - } - return +// Represents a single line from a manifest. +type ManifestStream struct { + StreamName string + Blocks []string + Files []string } -func parseManifestLine(s string) (m ManifestLine) { +func parseManifestStream(s string) (m ManifestStream) { tokens := strings.Split(s, " ") m.StreamName = tokens[0] tokens = tokens[1:] var i int for i = range tokens { - if !LocatorPattern.MatchString(tokens[i]) { + if !blockdigest.IsBlockLocator(tokens[i]) { break } } @@ -68,34 +36,37 @@ func parseManifestLine(s string) (m ManifestLine) { return } -func (m *Manifest) LineIter() <-chan ManifestLine { - ch := make(chan ManifestLine) +func (m *Manifest) StreamIter() <-chan ManifestStream { + ch := make(chan ManifestStream) go func(input string) { - scanner := bufio.NewScanner(strings.NewReader(input)) - for scanner.Scan() { - // We parse one line at a time, to save effort if we only need - // the first few lines. - ch <- parseManifestLine(scanner.Text()) - } - if err := scanner.Err(); err != nil { - log.Fatalf("Error encountered iterating through manifest lines: %v", - err) + // This slice holds the current line and the remainder of the + // manifest. We parse one line at a time, to save effort if we + // only need the first few lines. + lines := []string{"", input} + for { + lines = strings.SplitN(lines[1], "\n", 2) + if len(lines[0]) > 0 { + // Only parse non-blank lines + ch <- parseManifestStream(lines[0]) + } + if len(lines) == 1 { + break + } } close(ch) }(m.Text) return ch } - // Blocks may appear mulitple times within the same manifest if they // are used by multiple files. In that case this Iterator will output // the same block multiple times. -func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator { - blockChannel := make(chan BlockLocator) - go func(lineChannel <-chan ManifestLine) { - for m := range lineChannel { +func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator { + blockChannel := make(chan blockdigest.BlockLocator) + go func(streamChannel <-chan ManifestStream) { + for m := range streamChannel { for _, block := range m.Blocks { - if b, err := parseBlockLocator(block); err == nil { + if b, err := blockdigest.ParseBlockLocator(block); err == nil { blockChannel <- b } else { log.Printf("ERROR: Failed to parse block: %v", err) @@ -103,6 +74,6 @@ func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator { } } close(blockChannel) - }(m.LineIter()) + }(m.StreamIter()) return blockChannel }