Merge branch '17816-singularity-cwd' into main refs #17816
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 16 Jul 2021 15:27:06 +0000 (11:27 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 16 Jul 2021 15:27:06 +0000 (11:27 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

13 files changed:
doc/admin/upgrading.html.textile.liquid
lib/crunchrun/crunchrun.go
lib/crunchrun/crunchrun_test.go
lib/crunchrun/docker.go
lib/crunchrun/executor.go
lib/crunchrun/executor_test.go
lib/crunchrun/singularity.go
services/crunch-dispatch-local/crunch-dispatch-local.go
services/crunch-dispatch-local/crunch-dispatch-local_test.go
services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run [changed from symlink to file mode: 0755]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service [deleted file]
tools/arvbox/lib/arvbox/docker/service/ready/run-service

index 13f093394ba481d92d25a56a08fa0f3d04285d30..3c283c354cf2f6e48dd4f3f2dfb2e732276f169e 100644 (file)
@@ -35,10 +35,14 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
-h2(#main). development main (as of 2021-06-03)
+h2(#main). development main (as of 2021-07-15)
 
 "Upgrading from 2.2.0":#v2_2_0
 
+h3. crunch-dispatch-local now requires config.yml
+
+The @crunch-dispatch-local@ dispatcher now reads the API host and token from the system wide @/etc/arvados/config.yml@ .  It will fail to start that file is not found or not readable.
+
 h2(#v2_2_0). v2.2.0 (2021-06-03)
 
 "Upgrading from 2.1.0":#v2_1_0
index 23fbc430b42611eda108563e91f5387d766208ce..412f1bbfbfa95027eb5c043c5e1fcf07449139b0 100644 (file)
@@ -77,7 +77,10 @@ type PsProcess interface {
 // ContainerRunner is the main stateful struct used for a single execution of a
 // container.
 type ContainerRunner struct {
-       executor containerExecutor
+       executor       containerExecutor
+       executorStdin  io.Closer
+       executorStdout io.Closer
+       executorStderr io.Closer
 
        // Dispatcher client is initialized with the Dispatcher token.
        // This is a privileged token used to manage container status
@@ -106,8 +109,6 @@ type ContainerRunner struct {
        ExitCode      *int
        NewLogWriter  NewLogWriter
        CrunchLog     *ThrottledLogger
-       Stdout        io.WriteCloser
-       Stderr        io.WriteCloser
        logUUID       string
        logMtx        sync.Mutex
        LogCollection arvados.CollectionFileSystem
@@ -877,7 +878,7 @@ func (runner *ContainerRunner) getStdoutFile(mntPath string) (*os.File, error) {
 
 // CreateContainer creates the docker container.
 func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[string]bindmount) error {
-       var stdin io.ReadCloser
+       var stdin io.ReadCloser = ioutil.NopCloser(bytes.NewReader(nil))
        if mnt, ok := runner.Container.Mounts["stdin"]; ok {
                switch mnt.Kind {
                case "collection":
@@ -954,6 +955,9 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st
        if !runner.enableMemoryLimit {
                ram = 0
        }
+       runner.executorStdin = stdin
+       runner.executorStdout = stdout
+       runner.executorStderr = stderr
        return runner.executor.Create(containerSpec{
                Image:         imageID,
                VCPUs:         runner.Container.RuntimeConstraints.VCPUs,
@@ -1018,6 +1022,27 @@ func (runner *ContainerRunner) WaitFinish() error {
        }
        runner.ExitCode = &exitcode
 
+       var returnErr error
+       if err = runner.executorStdin.Close(); err != nil {
+               err = fmt.Errorf("error closing container stdin: %s", err)
+               runner.CrunchLog.Printf("%s", err)
+               returnErr = err
+       }
+       if err = runner.executorStdout.Close(); err != nil {
+               err = fmt.Errorf("error closing container stdout: %s", err)
+               runner.CrunchLog.Printf("%s", err)
+               if returnErr == nil {
+                       returnErr = err
+               }
+       }
+       if err = runner.executorStderr.Close(); err != nil {
+               err = fmt.Errorf("error closing container stderr: %s", err)
+               runner.CrunchLog.Printf("%s", err)
+               if returnErr == nil {
+                       returnErr = err
+               }
+       }
+
        if runner.statReporter != nil {
                runner.statReporter.Stop()
                err = runner.statLogger.Close()
@@ -1025,7 +1050,7 @@ func (runner *ContainerRunner) WaitFinish() error {
                        runner.CrunchLog.Printf("error closing crunchstat logs: %v", err)
                }
        }
-       return nil
+       return returnErr
 }
 
 func (runner *ContainerRunner) updateLogs() {
index 42a2cf3ad84de8e214a7efdaa1216e01258625c2..bb7ffdf0306b26b2f5c56062aaaaaf7b256e5447 100644 (file)
@@ -120,8 +120,6 @@ func (e *stubExecutor) CgroupID() string                { return "cgroupid" }
 func (e *stubExecutor) Stop() error                     { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
 func (e *stubExecutor) Close()                          { e.closed = true }
 func (e *stubExecutor) Wait(context.Context) (int, error) {
-       defer e.created.Stdout.Close()
-       defer e.created.Stderr.Close()
        return <-e.exit, e.waitErr
 }
 
@@ -524,8 +522,6 @@ func dockerLog(fd byte, msg string) []byte {
 func (s *TestSuite) TestRunContainer(c *C) {
        s.executor.runFunc = func() {
                fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
-               s.executor.created.Stdout.Close()
-               s.executor.created.Stderr.Close()
                s.executor.exit <- 0
        }
 
index a39b754b3d396e4c02c75a518d47940dab9212f7..861f8c8c1913f07bab8d7ea722dfa3c643678059 100644 (file)
@@ -186,7 +186,7 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
        }
 }
 
-func (e *dockerExecutor) startIO(stdin io.ReadCloser, stdout, stderr io.WriteCloser) error {
+func (e *dockerExecutor) startIO(stdin io.Reader, stdout, stderr io.Writer) error {
        resp, err := e.dockerclient.ContainerAttach(context.TODO(), e.containerID, dockertypes.ContainerAttachOptions{
                Stream: true,
                Stdin:  stdin != nil,
@@ -213,8 +213,7 @@ func (e *dockerExecutor) startIO(stdin io.ReadCloser, stdout, stderr io.WriteClo
        return nil
 }
 
-func (e *dockerExecutor) handleStdin(stdin io.ReadCloser, conn io.Writer, closeConn func() error) error {
-       defer stdin.Close()
+func (e *dockerExecutor) handleStdin(stdin io.Reader, conn io.Writer, closeConn func() error) error {
        defer closeConn()
        _, err := io.Copy(conn, stdin)
        if err != nil {
@@ -225,7 +224,7 @@ func (e *dockerExecutor) handleStdin(stdin io.ReadCloser, conn io.Writer, closeC
 
 // Handle docker log protocol; see
 // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
-func (e *dockerExecutor) handleStdoutStderr(stdout, stderr io.WriteCloser, reader io.Reader) error {
+func (e *dockerExecutor) handleStdoutStderr(stdout, stderr io.Writer, reader io.Reader) error {
        header := make([]byte, 8)
        var err error
        for err == nil {
@@ -247,14 +246,6 @@ func (e *dockerExecutor) handleStdoutStderr(stdout, stderr io.WriteCloser, reade
        if err != nil {
                return fmt.Errorf("error copying stdout/stderr from docker: %v", err)
        }
-       err = stdout.Close()
-       if err != nil {
-               return fmt.Errorf("error writing stdout: close: %v", err)
-       }
-       err = stderr.Close()
-       if err != nil {
-               return fmt.Errorf("error writing stderr: close: %v", err)
-       }
        return nil
 }
 
index c773febe94dda6b1866cc7e5fb26ce2726750328..f4feaa06c21447cc66b2e57a962e2d2c306e6de7 100644 (file)
@@ -25,9 +25,9 @@ type containerSpec struct {
        EnableNetwork bool
        NetworkMode   string // docker network mode, normally "default"
        CgroupParent  string
-       Stdin         io.ReadCloser
-       Stdout        io.WriteCloser
-       Stderr        io.WriteCloser
+       Stdin         io.Reader
+       Stdout        io.Writer
+       Stderr        io.Writer
 }
 
 // containerExecutor is an interface to a container runtime
index 4b6a4b1b2da89602c1601058a1a73b406f85e0ad..5934c57b6c5f90bf971664c614a8348fb18b9e50 100644 (file)
@@ -141,6 +141,13 @@ func (s *executorSuite) TestExecEnableNetwork(c *C) {
        }
 }
 
+func (s *executorSuite) TestExecWorkingDir(c *C) {
+       s.spec.WorkingDir = "/tmp"
+       s.spec.Command = []string{"sh", "-c", "pwd"}
+       s.checkRun(c, 0)
+       c.Check(s.stdout.String(), Equals, "/tmp\n")
+}
+
 func (s *executorSuite) TestExecStdoutStderr(c *C) {
        s.spec.Command = []string{"sh", "-c", "echo foo; echo -n bar >&2; echo baz; echo waz >&2"}
        s.checkRun(c, 0)
index 4bec8c3ebed11970c9f0c0734e625c5c32df2523..bcaff3bcc88300e51015f16a94751d20a39d5efe 100644 (file)
@@ -8,6 +8,7 @@ import (
        "io/ioutil"
        "os"
        "os/exec"
+       "sort"
        "syscall"
 
        "golang.org/x/net/context"
@@ -74,7 +75,7 @@ func (e *singularityExecutor) Create(spec containerSpec) error {
 }
 
 func (e *singularityExecutor) Start() error {
-       args := []string{"singularity", "exec", "--containall", "--no-home", "--cleanenv"}
+       args := []string{"singularity", "exec", "--containall", "--no-home", "--cleanenv", "--pwd", e.spec.WorkingDir}
        if !e.spec.EnableNetwork {
                args = append(args, "--net", "--network=none")
        }
@@ -82,7 +83,13 @@ func (e *singularityExecutor) Start() error {
                false: "rw",
                true:  "ro",
        }
-       for path, mount := range e.spec.BindMounts {
+       var binds []string
+       for path, _ := range e.spec.BindMounts {
+               binds = append(binds, path)
+       }
+       sort.Strings(binds)
+       for _, path := range binds {
+               mount := e.spec.BindMounts[path]
                args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly])
        }
        args = append(args, e.imageFilename)
index 2922817b557ccef70fa25f32c66b8447575abb5d..c202e683f2810e85ab6fddda40793f021b3e0eff 100644 (file)
@@ -17,6 +17,7 @@ import (
        "syscall"
        "time"
 
+       "git.arvados.org/arvados.git/lib/config"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/dispatch"
@@ -74,10 +75,37 @@ func doMain() error {
                return nil
        }
 
+       loader := config.NewLoader(nil, logger)
+       cfg, err := loader.Load()
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return fmt.Errorf("config error: %s", err)
+       }
+
        logger.Printf("crunch-dispatch-local %s started", version)
 
        runningCmds = make(map[string]*exec.Cmd)
 
+       var client arvados.Client
+       client.APIHost = cluster.Services.Controller.ExternalURL.Host
+       client.AuthToken = cluster.SystemRootToken
+       client.Insecure = cluster.TLS.Insecure
+
+       if client.APIHost != "" || client.AuthToken != "" {
+               // Copy real configs into env vars so [a]
+               // MakeArvadosClient() uses them, and [b] they get
+               // propagated to crunch-run via SLURM.
+               os.Setenv("ARVADOS_API_HOST", client.APIHost)
+               os.Setenv("ARVADOS_API_TOKEN", client.AuthToken)
+               os.Setenv("ARVADOS_API_HOST_INSECURE", "")
+               if client.Insecure {
+                       os.Setenv("ARVADOS_API_HOST_INSECURE", "1")
+               }
+               os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
+       } else {
+               logger.Warnf("Client credentials missing from config, so falling back on environment variables (deprecated).")
+       }
+
        arv, err := arvadosclient.MakeArvadosClient()
        if err != nil {
                logger.Errorf("error making Arvados client: %v", err)
@@ -90,7 +118,7 @@ func doMain() error {
        dispatcher := dispatch.Dispatcher{
                Logger:       logger,
                Arv:          arv,
-               RunContainer: (&LocalRun{startFunc, make(chan bool, 8), ctx}).run,
+               RunContainer: (&LocalRun{startFunc, make(chan bool, 8), ctx, cluster}).run,
                PollPeriod:   time.Duration(*pollInterval) * time.Second,
        }
 
@@ -128,6 +156,7 @@ type LocalRun struct {
        startCmd         func(container arvados.Container, cmd *exec.Cmd) error
        concurrencyLimit chan bool
        ctx              context.Context
+       cluster          *arvados.Cluster
 }
 
 // Run a container.
@@ -169,7 +198,7 @@ func (lr *LocalRun) run(dispatcher *dispatch.Dispatcher,
                waitGroup.Add(1)
                defer waitGroup.Done()
 
-               cmd := exec.Command(*crunchRunCommand, uuid)
+               cmd := exec.Command(*crunchRunCommand, "--runtime-engine="+lr.cluster.Containers.RuntimeEngine, uuid)
                cmd.Stdin = nil
                cmd.Stderr = os.Stderr
                cmd.Stdout = os.Stderr
index 5f51134df8a9866a126fc32922787fd2be7c8957..d976bf0812950488b796cb063def8c960d128849 100644 (file)
@@ -81,8 +81,10 @@ func (s *TestSuite) TestIntegration(c *C) {
                return cmd.Start()
        }
 
+       cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
+
        dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
-               (&LocalRun{startCmd, make(chan bool, 8), ctx}).run(d, c, s)
+               (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
                cancel()
        }
 
@@ -184,8 +186,10 @@ func testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubRespon
                return cmd.Start()
        }
 
+       cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
+
        dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
-               (&LocalRun{startCmd, make(chan bool, 8), ctx}).run(d, c, s)
+               (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
                cancel()
        }
 
index a5899ce8a7cc0809a57b64a9588d8e227846c274..2f2f013c714a0be6bf863cbf8329efae62e616b6 100644 (file)
@@ -255,6 +255,7 @@ func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []s
        // append() here avoids modifying crunchRunCommand's
        // underlying array, which is shared with other goroutines.
        crArgs := append([]string(nil), crunchRunCommand...)
+       crArgs = append(crArgs, "--runtime-engine="+disp.cluster.Containers.RuntimeEngine)
        crArgs = append(crArgs, container.UUID)
        crScript := strings.NewReader(execScript(crArgs))
 
deleted file mode 120000 (symlink)
index a388c8b67bf16bbb16601007540e58f1372ebc85..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1 +0,0 @@
-/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
new file mode 100755 (executable)
index 0000000000000000000000000000000000000000..821afdce50b1fac246071dabc7e5fd507b201c84
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+. /usr/local/lib/arvbox/go-setup.sh
+
+flock /var/lib/gopath/gopath.lock go install "git.arvados.org/arvados.git/services/crunch-dispatch-local"
+install $GOPATH/bin/crunch-dispatch-local /usr/local/bin
+ln -sf arvados-server /usr/local/bin/crunch-run
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+cat > /usr/local/bin/crunch-run.sh <<EOF
+#!/bin/sh
+exec /usr/local/bin/crunch-run -container-enable-networking=default -container-network-mode=host \$@
+EOF
+chmod +x /usr/local/bin/crunch-run.sh
+
+export ARVADOS_API_HOST=$localip:${services[controller-ssl]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat $ARVADOS_CONTAINER_PATH/superuser_token)
+
+exec /usr/local/bin/crunch-dispatch-local -crunch-run-command=/usr/local/bin/crunch-run.sh -poll-interval=1
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service
deleted file mode 100755 (executable)
index 821afdc..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-exec 2>&1
-set -ex -o pipefail
-
-. /usr/local/lib/arvbox/common.sh
-. /usr/local/lib/arvbox/go-setup.sh
-
-flock /var/lib/gopath/gopath.lock go install "git.arvados.org/arvados.git/services/crunch-dispatch-local"
-install $GOPATH/bin/crunch-dispatch-local /usr/local/bin
-ln -sf arvados-server /usr/local/bin/crunch-run
-
-if test "$1" = "--only-deps" ; then
-    exit
-fi
-
-cat > /usr/local/bin/crunch-run.sh <<EOF
-#!/bin/sh
-exec /usr/local/bin/crunch-run -container-enable-networking=default -container-network-mode=host \$@
-EOF
-chmod +x /usr/local/bin/crunch-run.sh
-
-export ARVADOS_API_HOST=$localip:${services[controller-ssl]}
-export ARVADOS_API_HOST_INSECURE=1
-export ARVADOS_API_TOKEN=$(cat $ARVADOS_CONTAINER_PATH/superuser_token)
-
-exec /usr/local/bin/crunch-dispatch-local -crunch-run-command=/usr/local/bin/crunch-run.sh -poll-interval=1
index b29dafed70b7696aeaee8657e4f4206c8a7b3d79..f49e9ea26fdc3e2294217d58d3d614b6784d1bb9 100755 (executable)
@@ -41,7 +41,7 @@ for sdk_app in arv arv-get cwl-runner arv-mount ; do
     fi
 done
 
-if ! (ps x | grep -v grep | grep "crunch-dispatch") > /dev/null ; then
+if ! (ps ax | grep -v grep | grep "crunch-dispatch") > /dev/null ; then
     waiting="$waiting crunch-dispatch"
 fi