"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"git.arvados.org/arvados.git/sdk/go/manifest"
- "github.com/bmatcuk/doublestar"
+ "github.com/bmatcuk/doublestar/v4"
)
type printfer interface {
return collfs.MarshalManifest(".")
}
-func (cp *copier) matchGlobs(path string) bool {
+func (cp *copier) matchGlobs(path string, isDir bool) bool {
// An entry in the top level of the output directory looks
// like "/foo", but globs look like "foo", so we strip the
// leading "/" before matching.
path = strings.TrimLeft(path, "/")
for _, glob := range cp.globs {
- if match, _ := doublestar.Match(glob, path); match {
+ if !isDir && strings.HasSuffix(glob, "/**") {
+ // doublestar.Match("f*/**", "ff") and
+ // doublestar.Match("f*/**", "ff/gg") both
+ // return true, but (to be compatible with
+ // bash shopt) "ff" should match only if it is
+ // a directory.
+ //
+ // To avoid errant matches, we add the file's
+ // basename to the end of the pattern:
+ //
+ // Match("f*/**/ff", "ff") => false
+ // Match("f*/**/gg", "ff/gg") => true
+ //
+ // Of course, we need to escape basename in
+ // case it contains *, ?, \, etc.
+ _, name := filepath.Split(path)
+ escapedName := strings.TrimSuffix(strings.Replace(name, "", "\\", -1), "\\")
+ if match, _ := doublestar.Match(glob+"/"+escapedName, path); match {
+ return true
+ }
+ } else if match, _ := doublestar.Match(glob, path); match {
return true
+ } else if isDir {
+ // Workaround doublestar bug (v4.6.1).
+ // "foo*/**" should match "foo", but does not,
+ // because isZeroLengthPattern does not accept
+ // "*/**" as a zero length pattern.
+ if trunc := strings.TrimSuffix(glob, "*/**"); trunc != glob {
+ if match, _ := doublestar.Match(trunc, path); match {
+ return true
+ }
+ }
}
}
return false
}
keepdirs := make(map[string]bool)
for _, path := range cp.dirs {
- if cp.matchGlobs(path) {
+ if cp.matchGlobs(path, true) {
keepdirs[path] = true
}
}
}
var keepfiles []filetodo
for _, file := range cp.files {
- if cp.matchGlobs(file.dst) {
+ if cp.matchGlobs(file.dst, false) {
keepfiles = append(keepfiles, file)
}
}
}
include := make(map[string]bool)
err := fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
- if cp.matchGlobs(path) {
+ if cp.matchGlobs(path, ent.IsDir()) {
for i, c := range path {
if i > 0 && c == '/' {
include[path[:i]] = true
return err
}
+// Return true if it's possible for any descendant of the given path
+// to match anything in cp.globs. Used by walkMount to avoid loading
+// collections that are mounted underneath ctrOutputPath but excluded
+// by globs.
+func (cp *copier) subtreeCouldMatch(path string) bool {
+ if len(cp.globs) == 0 {
+ return true
+ }
+ pathdepth := 1 + strings.Count(path, "/")
+ for _, glob := range cp.globs {
+ globdepth := 0
+ lastsep := 0
+ for i, c := range glob {
+ if c != '/' || !doublestar.ValidatePattern(glob[:i]) {
+ // Escaped "/", or "/" in a character
+ // class, is not a path separator.
+ continue
+ }
+ if glob[lastsep:i] == "**" {
+ return true
+ }
+ lastsep = i + 1
+ if globdepth++; globdepth == pathdepth {
+ if match, _ := doublestar.Match(glob[:i]+"/*", path+"/z"); match {
+ return true
+ }
+ break
+ }
+ }
+ if globdepth < pathdepth && glob[lastsep:] == "**" {
+ return true
+ }
+ }
+ return false
+}
+
func (cp *copier) copyFile(fs arvados.CollectionFileSystem, f filetodo) (int64, error) {
cp.logger.Printf("copying %q (%d bytes)", strings.TrimLeft(f.dst, "/"), f.size)
dst, err := fs.OpenFile(f.dst, os.O_CREATE|os.O_WRONLY, 0666)
// copy, relative to its mount point -- ".", "./foo.txt", ...
srcRelPath := filepath.Join(".", srcMount.Path, src[len(srcRoot):])
- // outputRelPath is the path relative in the output directory
- // that corresponds to the path in the output collection where
- // the file will go, for logging
+ // outputRelPath is the destination path relative to the
+ // output directory. Used for logging and glob matching.
var outputRelPath = ""
if strings.HasPrefix(src, cp.ctrOutputDir) {
outputRelPath = strings.TrimPrefix(src[len(cp.ctrOutputDir):], "/")
switch {
case srcMount.ExcludeFromOutput:
+ case outputRelPath != "*" && !cp.subtreeCouldMatch(outputRelPath):
+ cp.logger.Printf("not copying %q because contents cannot match output globs", outputRelPath)
+ return nil
case srcMount.Kind == "tmp":
// Handle by walking the host filesystem.
return cp.walkHostFS(dest, src, maxSymlinks, walkMountsBelow)
// (...except mount types that are
// handled as regular files.)
continue
+ } else if isMount && !cp.subtreeCouldMatch(src[len(cp.ctrOutputDir)+1:]) {
+ continue
}
err = cp.walkHostFS(dest, src, maxSymlinks, false)
if err != nil {