+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
/* Deals with parsing Manifest Text. */
// Inspired by the Manifest class in arvados/sdk/ruby/lib/arvados/keep.rb
import (
"errors"
"fmt"
- "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+ "git.arvados.org/arvados.git/sdk/go/blockdigest"
"path"
"regexp"
"sort"
type ManifestStream struct {
StreamName string
Blocks []string
- BlockOffsets []uint64
+ blockOffsets []uint64
FileStreamSegments []FileStreamSegment
Err error
}
lo = i
} else {
hi = i
- i = ((hi + lo) / 2)
- block_start = offsets[i]
- block_end = offsets[i+1]
}
+ i = ((hi + lo) / 2)
+ block_start = offsets[i]
+ block_end = offsets[i+1]
}
return i
}
}
// Binary search to determine first block in the stream
- i := firstBlock(s.BlockOffsets, wantPos)
+ i := firstBlock(s.blockOffsets, wantPos)
if i == -1 {
// Shouldn't happen, file segments are checked in parseManifestStream
panic(fmt.Sprintf("File segment %v extends past end of stream", fTok))
}
- for i < len(s.Blocks) {
- blockPos := s.BlockOffsets[i]
- blockEnd := s.BlockOffsets[i+1]
+ for ; i < len(s.Blocks); i++ {
+ blockPos := s.blockOffsets[i]
+ blockEnd := s.blockOffsets[i+1]
if blockEnd <= wantPos {
// Shouldn't happen, FirstBlock() should start
// us on the right block, so if this triggers
fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
}
ch <- &fseg
- i += 1
}
}
}
return
}
- m.BlockOffsets = make([]uint64, len(m.Blocks)+1)
+ m.blockOffsets = make([]uint64, len(m.Blocks)+1)
var streamoffset uint64
for i, b := range m.Blocks {
bl, err := ParseBlockLocator(b)
m.Err = err
return
}
- m.BlockOffsets[i] = streamoffset
+ m.blockOffsets[i] = streamoffset
streamoffset += uint64(bl.Size)
}
- m.BlockOffsets[len(m.Blocks)] = streamoffset
+ m.blockOffsets[len(m.Blocks)] = streamoffset
if len(fileTokens) == 0 {
m.Err = fmt.Errorf("No file tokens found")
return
}
-func (m *Manifest) segment() *segmentedManifest {
+func (m *Manifest) segment() (*segmentedManifest, error) {
files := make(segmentedManifest)
for stream := range m.StreamIter() {
if stream.Err != nil {
- // Skip streams with errors
- continue
+ // Stream has an error
+ return nil, stream.Err
}
currentStreamfiles := make(map[string]bool)
for _, f := range stream.FileStreamSegments {
}
}
- return &files
+ return &files, nil
}
func (stream segmentedStream) normalizedText(name string) string {
var sortedfiles []string
- for k, _ := range stream {
+ for k := range stream {
sortedfiles = append(sortedfiles, k)
}
sort.Strings(sortedfiles)
stream_tokens := []string{EscapeName(name)}
- blocks := make(map[string]int64)
+ blocks := make(map[blockdigest.BlockDigest]int64)
var streamoffset int64
// Go through each file and add each referenced block exactly once.
for _, streamfile := range sortedfiles {
for _, segment := range stream[streamfile] {
- if _, ok := blocks[segment.Locator]; !ok {
+ b, _ := ParseBlockLocator(segment.Locator)
+ if _, ok := blocks[b.Digest]; !ok {
stream_tokens = append(stream_tokens, segment.Locator)
- blocks[segment.Locator] = streamoffset
- b, _ := ParseBlockLocator(segment.Locator)
+ blocks[b.Digest] = streamoffset
streamoffset += int64(b.Size)
}
}
fout := EscapeName(streamfile)
for _, segment := range stream[streamfile] {
// Collapse adjacent segments
- streamoffset = blocks[segment.Locator] + int64(segment.Offset)
+ b, _ := ParseBlockLocator(segment.Locator)
+ streamoffset = blocks[b.Digest] + int64(segment.Offset)
if span_start == -1 {
span_start = streamoffset
span_end = streamoffset + int64(segment.Len)
}
var sortedstreams []string
- for k, _ := range m {
+ for k := range m {
sortedstreams = append(sortedstreams, k)
}
sort.Strings(sortedstreams)
return manifest
}
-// ManifestTextForPath extracts some or all of the manifest and returns
-// normalized manifest text. This is a swiss army knife function that can be
-// used a couple of different ways:
+// Extract extracts some or all of the manifest and returns the extracted
+// portion as a normalized manifest. This is a swiss army knife function that
+// can be several ways:
+//
+// If 'srcpath' and 'relocate' are '.' it simply returns an equivalent manifest
+// in normalized form.
+//
+// Extract(".", ".") // return entire normalized manfest text
//
// If 'srcpath' points to a single file, it will return manifest text for just that file.
// The value of "relocate" is can be used to rename the file or set the file stream.
//
-// ManifestTextForPath("./foo", ".") (extract file "foo" and put it in stream ".")
-// ManifestTextForPath("./foo", "./bar") (extract file "foo", rename it to "bar" in stream ".")
-// ManifestTextForPath("./foo", "./bar/") (extract file "foo", rename it to "./bar/foo")
-// ManifestTextForPath("./foo", "./bar/baz") (extract file "foo", rename it to "./bar/baz")
+// Extract("./foo", ".") // extract file "foo" and put it in stream "."
+// Extract("./foo", "./bar") // extract file "foo", rename it to "bar" in stream "."
+// Extract("./foo", "./bar/") // extract file "foo", rename it to "./bar/foo"
+// Extract("./foo", "./bar/baz") // extract file "foo", rename it to "./bar/baz")
//
// Otherwise it will return the manifest text for all streams with the prefix in "srcpath" and place
// them under the path in "relocate".
//
-// ManifestTextForPath(".", ".") (return entire normalized manfest text)
-// ManifestTextForPath("./stream", ".") (extract "./stream" to "." and "./stream/subdir" to "./subdir")
-// ManifestTextForPath("./stream", "./bar") (extract "./stream" to "./bar" and "./stream/subdir" to "./bar/subdir")
-func (m *Manifest) ManifestTextForPath(srcpath, relocate string) string {
- return m.segment().manifestTextForPath(srcpath, relocate)
-}
-
-// NormalizedText returns the manifest text in normalized form.
-func (m *Manifest) NormalizedText() string {
- return m.ManifestTextForPath(".", ".")
+// Extract("./stream", ".") // extract "./stream" to "." and "./stream/subdir" to "./subdir")
+// Extract("./stream", "./bar") // extract "./stream" to "./bar" and "./stream/subdir" to "./bar/subdir")
+func (m Manifest) Extract(srcpath, relocate string) (ret Manifest) {
+ segmented, err := m.segment()
+ if err != nil {
+ ret.Err = err
+ return
+ }
+ ret.Text = segmented.manifestTextForPath(srcpath, relocate)
+ return
}
func (m *Manifest) StreamIter() <-chan ManifestStream {