type collectionFileSystem struct {
fileSystem
uuid string
+ savedPDH atomic.Value
replicas int
storageClasses []string
+ // guessSignatureTTL tracks a lower bound for the server's
+ // configured BlobSigningTTL. The guess is initially zero, and
+ // increases when we come across a signature with an expiry
+ // time further in the future than the previous guess.
+ //
+ // When the guessed TTL is much smaller than the real TTL,
+ // preemptive signature refresh is delayed or missed entirely,
+ // which is OK.
+ guessSignatureTTL time.Duration
+ holdCheckChanges time.Time
+ lockCheckChanges sync.Mutex
}
// FileSystem returns a CollectionFileSystem for the collection.
thr: newThrottle(concurrentWriters),
},
}
+ fs.savedPDH.Store(c.PortableDataHash)
if r := c.ReplicationDesired; r != nil {
fs.replicas = *r
}
return fs, nil
}
-func backdateTree(n inode, modTime time.Time) {
+// caller must have lock (or guarantee no concurrent accesses somehow)
+func eachNode(n inode, ffunc func(*filenode), dfunc func(*dirnode)) {
switch n := n.(type) {
case *filenode:
- n.fileinfo.modTime = modTime
+ if ffunc != nil {
+ ffunc(n)
+ }
case *dirnode:
- n.fileinfo.modTime = modTime
+ if dfunc != nil {
+ dfunc(n)
+ }
for _, n := range n.inodes {
- backdateTree(n, modTime)
+ eachNode(n, ffunc, dfunc)
+ }
+ }
+}
+
+// caller must have lock (or guarantee no concurrent accesses somehow)
+func backdateTree(n inode, modTime time.Time) {
+ eachNode(n, func(fn *filenode) {
+ fn.fileinfo.modTime = modTime
+ }, func(dn *dirnode) {
+ dn.fileinfo.modTime = modTime
+ })
+}
+
+// Approximate portion of signature TTL remaining, usually between 0
+// and 1, or negative if some signatures have expired.
+func (fs *collectionFileSystem) signatureTimeLeft() (float64, time.Duration) {
+ var (
+ now = time.Now()
+ earliest = now.Add(time.Hour * 24 * 7 * 365)
+ latest time.Time
+ )
+ fs.fileSystem.root.RLock()
+ eachNode(fs.root, func(fn *filenode) {
+ fn.Lock()
+ defer fn.Unlock()
+ for _, seg := range fn.segments {
+ seg, ok := seg.(storedSegment)
+ if !ok {
+ continue
+ }
+ expiryTime, err := signatureExpiryTime(seg.locator)
+ if err != nil {
+ continue
+ }
+ if expiryTime.Before(earliest) {
+ earliest = expiryTime
+ }
+ if expiryTime.After(latest) {
+ latest = expiryTime
+ }
}
+ }, nil)
+ fs.fileSystem.root.RUnlock()
+
+ if latest.IsZero() {
+ // No signatures == 100% of TTL remaining.
+ return 1, 1
}
+
+ ttl := latest.Sub(now)
+ fs.fileSystem.root.Lock()
+ {
+ if ttl > fs.guessSignatureTTL {
+ // ttl is closer to the real TTL than
+ // guessSignatureTTL.
+ fs.guessSignatureTTL = ttl
+ } else {
+ // Use the previous best guess to compute the
+ // portion remaining (below, after unlocking
+ // mutex).
+ ttl = fs.guessSignatureTTL
+ }
+ }
+ fs.fileSystem.root.Unlock()
+
+ return earliest.Sub(now).Seconds() / ttl.Seconds(), ttl
+}
+
+func (fs *collectionFileSystem) updateSignatures(newmanifest string) {
+ newLoc := map[string]string{}
+ for _, tok := range regexp.MustCompile(`\S+`).FindAllString(newmanifest, -1) {
+ if mBlkRe.MatchString(tok) {
+ newLoc[stripAllHints(tok)] = tok
+ }
+ }
+ fs.fileSystem.root.Lock()
+ defer fs.fileSystem.root.Unlock()
+ eachNode(fs.root, func(fn *filenode) {
+ fn.Lock()
+ defer fn.Unlock()
+ for idx, seg := range fn.segments {
+ seg, ok := seg.(storedSegment)
+ if !ok {
+ continue
+ }
+ loc, ok := newLoc[stripAllHints(seg.locator)]
+ if !ok {
+ continue
+ }
+ seg.locator = loc
+ fn.segments[idx] = seg
+ }
+ }, nil)
}
func (fs *collectionFileSystem) newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error) {
return ErrInvalidOperation
}
+// Check for and incorporate upstream changes -- unless that has
+// already been done recently, in which case this func is a no-op.
+func (fs *collectionFileSystem) checkChangesOnServer() error {
+ if fs.uuid == "" && fs.savedPDH.Load() == "" {
+ return nil
+ }
+
+ // First try UUID if any, then last known PDH. Stop if all
+ // signatures are new enough.
+ checkingAll := false
+ for _, id := range []string{fs.uuid, fs.savedPDH.Load().(string)} {
+ if id == "" {
+ continue
+ }
+
+ fs.lockCheckChanges.Lock()
+ if !checkingAll && fs.holdCheckChanges.After(time.Now()) {
+ fs.lockCheckChanges.Unlock()
+ return nil
+ }
+ remain, ttl := fs.signatureTimeLeft()
+ if remain > 0.01 && !checkingAll {
+ fs.holdCheckChanges = time.Now().Add(ttl / 100)
+ }
+ fs.lockCheckChanges.Unlock()
+
+ if remain >= 0.5 {
+ break
+ }
+ checkingAll = true
+ var coll Collection
+ err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, map[string]interface{}{"select": []string{"portable_data_hash", "manifest_text"}})
+ if err != nil {
+ continue
+ }
+ fs.updateSignatures(coll.ManifestText)
+ }
+ return nil
+}
+
+// Refresh signature on a single locator, if necessary. Assume caller
+// has lock. If an update is needed, and there are any storedSegments
+// whose signatures can be updated, start a background task to update
+// them asynchronously when the caller releases locks.
+func (fs *collectionFileSystem) refreshSignature(locator string) string {
+ exp, err := signatureExpiryTime(locator)
+ if err != nil || exp.Sub(time.Now()) > time.Minute {
+ // Synchronous update is not needed. Start an
+ // asynchronous update if needed.
+ go fs.checkChangesOnServer()
+ return locator
+ }
+ var manifests string
+ for _, id := range []string{fs.uuid, fs.savedPDH.Load().(string)} {
+ if id == "" {
+ continue
+ }
+ var coll Collection
+ err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, map[string]interface{}{"select": []string{"portable_data_hash", "manifest_text"}})
+ if err != nil {
+ continue
+ }
+ manifests += coll.ManifestText
+ }
+ hash := stripAllHints(locator)
+ for _, tok := range regexp.MustCompile(`\S+`).FindAllString(manifests, -1) {
+ if mBlkRe.MatchString(tok) {
+ if stripAllHints(tok) == hash {
+ locator = tok
+ break
+ }
+ }
+ }
+ go fs.updateSignatures(manifests)
+ return locator
+}
+
func (fs *collectionFileSystem) Sync() error {
+ err := fs.checkChangesOnServer()
+ if err != nil {
+ return err
+ }
if fs.uuid == "" {
return nil
}
if err != nil {
return fmt.Errorf("sync failed: %s", err)
}
- coll := &Collection{
+ if PortableDataHash(txt) == fs.savedPDH.Load() {
+ // No local changes since last save or initial load.
+ return nil
+ }
+ coll := Collection{
UUID: fs.uuid,
ManifestText: txt,
}
- err = fs.RequestAndDecode(nil, "PUT", "arvados/v1/collections/"+fs.uuid, nil, map[string]interface{}{
+
+ selectFields := []string{"uuid", "portable_data_hash"}
+ fs.lockCheckChanges.Lock()
+ remain, _ := fs.signatureTimeLeft()
+ fs.lockCheckChanges.Unlock()
+ if remain < 0.5 {
+ selectFields = append(selectFields, "manifest_text")
+ }
+
+ err = fs.RequestAndDecode(&coll, "PUT", "arvados/v1/collections/"+fs.uuid, nil, map[string]interface{}{
"collection": map[string]string{
"manifest_text": coll.ManifestText,
},
- "select": []string{"uuid"},
+ "select": selectFields,
})
if err != nil {
return fmt.Errorf("sync failed: update %s: %s", fs.uuid, err)
}
+ fs.updateSignatures(coll.ManifestText)
+ fs.savedPDH.Store(coll.PortableDataHash)
return nil
}
return fs.fileSystem.root.(*dirnode).TreeSize()
}
+func (fs *collectionFileSystem) Snapshot() (inode, error) {
+ return fs.fileSystem.root.Snapshot()
+}
+
+func (fs *collectionFileSystem) Splice(r inode) error {
+ return fs.fileSystem.root.Splice(r)
+}
+
// filenodePtr is an offset into a file that is (usually) efficient to
// seek to. Specifically, if filenode.repacked==filenodePtr.repacked
// then
err = io.EOF
return
}
+ if ss, ok := fn.segments[ptr.segmentIdx].(storedSegment); ok {
+ ss.locator = fn.fs.refreshSignature(ss.locator)
+ fn.segments[ptr.segmentIdx] = ss
+ }
n, err = fn.segments[ptr.segmentIdx].ReadAt(p, int64(ptr.segmentOff))
if n > 0 {
ptr.off += int64(n)
}
}
+func (fn *filenode) Snapshot() (inode, error) {
+ fn.RLock()
+ defer fn.RUnlock()
+ segments := make([]segment, 0, len(fn.segments))
+ for _, seg := range fn.segments {
+ segments = append(segments, seg.Slice(0, seg.Len()))
+ }
+ return &filenode{
+ fileinfo: fn.fileinfo,
+ segments: segments,
+ }, nil
+}
+
+func (fn *filenode) Splice(repl inode) error {
+ repl, err := repl.Snapshot()
+ if err != nil {
+ return err
+ }
+ fn.parent.Lock()
+ defer fn.parent.Unlock()
+ fn.Lock()
+ defer fn.Unlock()
+ _, err = fn.parent.Child(fn.fileinfo.name, func(inode) (inode, error) { return repl, nil })
+ if err != nil {
+ return err
+ }
+ switch repl := repl.(type) {
+ case *dirnode:
+ repl.parent = fn.parent
+ repl.fileinfo.name = fn.fileinfo.name
+ repl.setTreeFS(fn.fs)
+ case *filenode:
+ repl.parent = fn.parent
+ repl.fileinfo.name = fn.fileinfo.name
+ repl.fs = fn.fs
+ default:
+ return fmt.Errorf("cannot splice snapshot containing %T: %w", repl, ErrInvalidArgument)
+ }
+ return nil
+}
+
type dirnode struct {
fs *collectionFileSystem
treenode
case *dirnode:
size += node.MemorySize()
case *filenode:
+ size += 64
for _, seg := range node.segments {
switch seg := seg.(type) {
case *memSegment:
size += int64(seg.Len())
}
+ size += 64
}
}
}
- return
+ return 64 + size
}
// caller must have write lock.
return
}
+func (dn *dirnode) Snapshot() (inode, error) {
+ return dn.snapshot()
+}
+
+func (dn *dirnode) snapshot() (*dirnode, error) {
+ dn.RLock()
+ defer dn.RUnlock()
+ snap := &dirnode{
+ treenode: treenode{
+ inodes: make(map[string]inode, len(dn.inodes)),
+ fileinfo: dn.fileinfo,
+ },
+ }
+ for name, child := range dn.inodes {
+ dupchild, err := child.Snapshot()
+ if err != nil {
+ return nil, err
+ }
+ snap.inodes[name] = dupchild
+ dupchild.SetParent(snap, name)
+ }
+ return snap, nil
+}
+
+func (dn *dirnode) Splice(repl inode) error {
+ repl, err := repl.Snapshot()
+ if err != nil {
+ return fmt.Errorf("cannot copy snapshot: %w", err)
+ }
+ switch repl := repl.(type) {
+ default:
+ return fmt.Errorf("cannot splice snapshot containing %T: %w", repl, ErrInvalidArgument)
+ case *dirnode:
+ dn.Lock()
+ defer dn.Unlock()
+ dn.inodes = repl.inodes
+ dn.setTreeFS(dn.fs)
+ case *filenode:
+ dn.parent.Lock()
+ defer dn.parent.Unlock()
+ removing, err := dn.parent.Child(dn.fileinfo.name, nil)
+ if err != nil {
+ return fmt.Errorf("cannot use Splice to replace a top-level directory with a file: %w", ErrInvalidOperation)
+ } else if removing != dn {
+ // If ../thisdirname is not this dirnode, it
+ // must be an inode that wraps a dirnode, like
+ // a collectionFileSystem or deferrednode.
+ if deferred, ok := removing.(*deferrednode); ok {
+ // More useful to report the type of
+ // the wrapped node rather than just
+ // *deferrednode. (We know the real
+ // inode is already loaded because dn
+ // is inside it.)
+ removing = deferred.realinode()
+ }
+ return fmt.Errorf("cannot use Splice to attach a file at top level of %T: %w", removing, ErrInvalidOperation)
+ }
+ dn.Lock()
+ defer dn.Unlock()
+ _, err = dn.parent.Child(dn.fileinfo.name, func(inode) (inode, error) { return repl, nil })
+ if err != nil {
+ return fmt.Errorf("error replacing filenode: dn.parent.Child(): %w", err)
+ }
+ repl.fs = dn.fs
+ }
+ return nil
+}
+
+func (dn *dirnode) setTreeFS(fs *collectionFileSystem) {
+ dn.fs = fs
+ for _, child := range dn.inodes {
+ switch child := child.(type) {
+ case *dirnode:
+ child.setTreeFS(fs)
+ case *filenode:
+ child.fs = fs
+ }
+ }
+}
+
type segment interface {
io.ReaderAt
Len() int