12183: Evaluate symlinks to handle chained links and absolute paths.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 29 Sep 2017 19:11:46 +0000 (15:11 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Wed, 1 Nov 2017 20:50:37 +0000 (16:50 -0400)
Ensure that all symlinks either resolve within the output directory or to a
keep mount.

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go

index 1665742c5fb02cc20f78cd321d6ad387b069792a..61179975b0e2b5c9fe47125c8d566ba15d6ae290 100644 (file)
@@ -920,6 +920,102 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
        return nil
 }
 
+// EvalSymlinks follows symlinks within the output directory.  If the symlink
+// leads to a keep mount, copy the manifest text from the keep mount into the
+// output manifestText.  Ensure that whether symlinks are relative or absolute,
+// they must remain within the output directory.
+//
+// Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
+func (runner *ContainerRunner) EvalSymlinks(path string, binds []string, symlinksToRemove *[]string) (manifestText string, err error) {
+       var links []string
+
+       defer func() {
+               if err != nil {
+                       *symlinksToRemove = append(*symlinksToRemove, links...)
+               }
+       }()
+
+       for n := 0; n < 32; n++ {
+               var info os.FileInfo
+               info, err = os.Lstat(path)
+               if err != nil {
+                       return
+               }
+
+               var tgt string
+               if info.Mode()&os.ModeSymlink == 0 {
+                       // Not a symlink, nothing to do.
+                       return
+               }
+
+               // Remember symlink for cleanup later
+               links = append(links, path)
+
+               tgt, err = os.Readlink(path)
+               if err != nil {
+                       return
+               }
+               if !strings.HasPrefix(tgt, "/") {
+                       // Relative symlink, resolve it to host path
+                       tgt = filepath.Join(filepath.Dir(path), tgt)
+               }
+               if strings.HasPrefix(tgt, runner.Container.OutputPath+"/") && !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+                       // Absolute symlink to container output path, adjust it to host output path.
+                       tgt = filepath.Join(runner.HostOutputDir, tgt[len(runner.Container.OutputPath):])
+               }
+
+               runner.CrunchLog.Printf("Resolve %q to %q", path, tgt)
+
+               // go through mounts and try reverse map to collection reference
+               for _, bind := range binds {
+                       mnt := runner.Container.Mounts[bind]
+                       if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
+                               // get path relative to bind
+                               targetSuffix := tgt[len(bind):]
+
+                               // Copy mount and adjust the path to add path relative to the bind
+                               adjustedMount := mnt
+                               adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
+
+                               for _, l := range links {
+                                       // The chain of one or more symlinks
+                                       // terminates in this keep mount, so
+                                       // add them all to the manifest text at
+                                       // appropriate locations.
+                                       var m string
+                                       outputSuffix := l[len(runner.HostOutputDir):]
+                                       m, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
+                                       if err != nil {
+                                               return
+                                       }
+                                       manifestText = manifestText + m
+                                       *symlinksToRemove = append(*symlinksToRemove, l)
+                               }
+                               return
+                       }
+               }
+
+               // If target is not a mount, it must be within the output
+               // directory, otherwise it is an error.
+               if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+                       err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
+                               path[len(runner.HostOutputDir):], tgt)
+                       return
+               }
+
+               // Update symlink to host FS
+               os.Remove(path)
+               os.Symlink(tgt, path)
+
+               // Target is within the output directory, so loop and check if
+               // it is also a symlink.
+               path = tgt
+       }
+       // Got stuck in a loop or just a pathological number of links, give up.
+       err = fmt.Errorf("Too many symlinks.")
+       return
+}
+
 // HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
 func (runner *ContainerRunner) CaptureOutput() error {
        if runner.finalState != "Complete" {
@@ -966,73 +1062,28 @@ func (runner *ContainerRunner) CaptureOutput() error {
        if err != nil {
                // Regular directory
 
+               var symlinksToRemove []string
+               var m string
                // Find symlinks to arv-mounted files & dirs.
                err = filepath.Walk(runner.HostOutputDir, func(path string, info os.FileInfo, err error) error {
                        if err != nil {
                                return err
                        }
-                       if info.Mode()&os.ModeSymlink == 0 {
-                               return nil
-                       }
-                       // read link to get container internal path
-                       // only support 1 level of symlinking here.
-                       var tgt string
-                       tgt, err = os.Readlink(path)
-                       if err != nil {
-                               return err
-                       }
-
-                       // get path relative to output dir
-                       outputSuffix := path[len(runner.HostOutputDir):]
-
-                       if strings.HasPrefix(tgt, "/") {
-                               // go through mounts and try reverse map to collection reference
-                               for _, bind := range binds {
-                                       mnt := runner.Container.Mounts[bind]
-                                       if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
-                                               // get path relative to bind
-                                               targetSuffix := tgt[len(bind):]
-
-                                               // Copy mount and adjust the path to add path relative to the bind
-                                               adjustedMount := mnt
-                                               adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
-
-                                               // get manifest text
-                                               var m string
-                                               m, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
-                                               if err != nil {
-                                                       return err
-                                               }
-                                               manifestText = manifestText + m
-                                               // delete symlink so WriteTree won't try to to dereference it.
-                                               os.Remove(path)
-                                               return nil
-                                       }
-                               }
-                       }
-
-                       // Not a link to a mount.  Must be dereferencible and
-                       // point into the output directory.
-                       tgt, err = filepath.EvalSymlinks(path)
-                       if err != nil {
-                               os.Remove(path)
-                               return err
-                       }
-
-                       // Symlink target must be within the output directory otherwise it's an error.
-                       if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
-                               os.Remove(path)
-                               return fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
-                                       outputSuffix, tgt)
+                       m, err = runner.EvalSymlinks(path, binds, &symlinksToRemove)
+                       if err == nil {
+                               manifestText = manifestText + m
                        }
-                       return nil
+                       return err
                })
+               runner.CrunchLog.Printf("sm %q", symlinksToRemove)
+               for _, l := range symlinksToRemove {
+                       os.Remove(l)
+               }
                if err != nil {
                        return fmt.Errorf("While checking output symlinks: %v", err)
                }
 
                cw := CollectionWriter{0, runner.Kc, nil, nil, sync.Mutex{}}
-               var m string
                m, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
                manifestText = manifestText + m
                if err != nil {
index 5e77d7bd7bcd41fb19c80ede9b450a542cbe0635..dbf9cc7b989521ac380fa313cb869e187fd7a7dc 100644 (file)
@@ -1523,6 +1523,46 @@ func (s *TestSuite) TestOutputError(c *C) {
        c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
 }
 
+func (s *TestSuite) TestOutputSymlinkToOutput(c *C) {
+       helperRecord := `{
+               "command": ["/bin/sh", "-c", "echo $FROBIZ"],
+               "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+               "cwd": "/bin",
+               "environment": {"FROBIZ": "bilbo"},
+               "mounts": {
+        "/tmp": {"kind": "tmp"}
+    },
+               "output_path": "/tmp",
+               "priority": 1,
+               "runtime_constraints": {}
+       }`
+
+       extraMounts := []string{}
+
+       api, _, _ := FullRunHelper(c, helperRecord, extraMounts, 0, func(t *TestDockerClient) {
+               rf, _ := os.Create(t.realTemp + "/2/realfile")
+               rf.Write([]byte("foo"))
+               rf.Close()
+               os.Symlink("/tmp/realfile", t.realTemp+"/2/file1")
+               os.Symlink("realfile", t.realTemp+"/2/file2")
+               os.Symlink("/tmp/file1", t.realTemp+"/2/file3")
+               os.Symlink("file2", t.realTemp+"/2/file4")
+               t.logWriter.Close()
+       })
+
+       c.Check(api.CalledWith("container.exit_code", 0), NotNil)
+       c.Check(api.CalledWith("container.state", "Complete"), NotNil)
+       for _, v := range api.Content {
+               if v["collection"] != nil {
+                       collection := v["collection"].(arvadosclient.Dict)
+                       if strings.Index(collection["name"].(string), "output") == 0 {
+                               manifest := collection["manifest_text"].(string)
+                               c.Check(manifest, Equals, ". 7a2c86e102dcc231bd232aad99686dfa+15 0:3:file1 3:3:file2 6:3:file3 9:3:file4 12:3:realfile\n")
+                       }
+               }
+       }
+}
+
 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
        helperRecord := `{
                "command": ["/bin/sh", "-c", "echo $FROBIZ"],