package manifest
import (
- "bufio"
"fmt"
+ "git.curoverse.com/arvados.git/sdk/go/blockdigest"
"log"
"regexp"
"strconv"
}
type BlockLocator struct {
- Digest string
- Size int
- Hints []string
+ Digest blockdigest.BlockDigest
+ Size int
+ Hints []string
}
-type ManifestLine struct {
- StreamName string
- Blocks []string
- Files []string
+// Represents a single line from a manifest.
+type ManifestStream struct {
+ StreamName string
+ Blocks []string
+ Files []string
}
-func parseBlockLocator(s string) (b BlockLocator, err error) {
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
if !LocatorPattern.MatchString(s) {
- err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern " +
+ err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
"\"%s\".",
s,
LocatorPattern.String())
} else {
tokens := strings.Split(s, "+")
var blockSize int64
- // We expect ParseInt to succeed since LocatorPattern restricts
- // tokens[1] to contain exclusively digits.
+ var blockDigest blockdigest.BlockDigest
+ // We expect both of the following to succeed since LocatorPattern
+ // restricts the strings appropriately.
+ blockDigest, err = blockdigest.FromString(tokens[0])
+ if err != nil {
+ return
+ }
blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
- if err == nil {
- b.Digest = tokens[0]
- b.Size = int(blockSize)
- b.Hints = tokens[2:]
+ if err != nil {
+ return
}
+ b.Digest = blockDigest
+ b.Size = int(blockSize)
+ b.Hints = tokens[2:]
}
return
}
-func parseManifestLine(s string) (m ManifestLine) {
+func parseManifestStream(s string) (m ManifestStream) {
tokens := strings.Split(s, " ")
m.StreamName = tokens[0]
tokens = tokens[1:]
return
}
-func (m *Manifest) LineIter() <-chan ManifestLine {
- ch := make(chan ManifestLine)
+func (m *Manifest) StreamIter() <-chan ManifestStream {
+ ch := make(chan ManifestStream)
go func(input string) {
- scanner := bufio.NewScanner(strings.NewReader(input))
- for scanner.Scan() {
- // We parse one line at a time, to save effort if we only need
- // the first few lines.
- ch <- parseManifestLine(scanner.Text())
+ // This slice holds the current line and the remainder of the
+ // manifest. We parse one line at a time, to save effort if we
+ // only need the first few lines.
+ lines := []string{"", input}
+ for {
+ lines = strings.SplitN(lines[1], "\n", 2)
+ if len(lines[0]) > 0 {
+ // Only parse non-blank lines
+ ch <- parseManifestStream(lines[0])
+ }
+ if len(lines) == 1 {
+ break
+ }
}
close(ch)
}(m.Text)
return ch
}
-
// Blocks may appear mulitple times within the same manifest if they
// are used by multiple files. In that case this Iterator will output
// the same block multiple times.
func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator {
blockChannel := make(chan BlockLocator)
- go func(lineChannel <-chan ManifestLine) {
- for m := range lineChannel {
+ go func(streamChannel <-chan ManifestStream) {
+ for m := range streamChannel {
for _, block := range m.Blocks {
- if b, err := parseBlockLocator(block); err == nil {
+ if b, err := ParseBlockLocator(block); err == nil {
blockChannel <- b
} else {
log.Printf("ERROR: Failed to parse block: %v", err)
}
}
close(blockChannel)
- }(m.LineIter())
+ }(m.StreamIter())
return blockChannel
}