17170: Add "arvados-client shell" subcommand and backend support.
authorTom Clegg <tom@curii.com>
Mon, 11 Jan 2021 20:53:28 +0000 (15:53 -0500)
committerTom Clegg <tom@curii.com>
Tue, 12 Jan 2021 05:32:03 +0000 (00:32 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

20 files changed:
cmd/arvados-client/cmd.go
cmd/arvados-client/container_gateway.go [new file with mode: 0644]
doc/install/install-api-server.html.textile.liquid
go.mod
go.sum
lib/controller/federation/conn.go
lib/controller/handler.go
lib/controller/localdb/container_gateway.go [new file with mode: 0644]
lib/controller/router/router.go
lib/controller/rpc/conn.go
lib/crunchrun/container_gateway.go [new file with mode: 0644]
lib/crunchrun/crunchrun.go
lib/dispatchcloud/worker/pool.go
lib/dispatchcloud/worker/runner.go
sdk/go/arvados/api.go
sdk/go/arvados/container.go
sdk/go/arvados/container_gateway.go [new file with mode: 0644]
services/api/app/models/container.rb
services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb [new file with mode: 0644]
services/api/db/structure.sql

index 47fcd5ad7dc88275c5c9ce47369f3432ac861632..aefcce79a45a4eb65fbbed209801d684da6b4923 100644 (file)
@@ -57,6 +57,8 @@ var (
                "mount":                mount.Command,
                "deduplication-report": deduplicationreport.Command,
                "costanalyzer":         costanalyzer.Command,
+               "shell":                shellCommand{},
+               "connect-ssh":          connectSSHCommand{},
        })
 )
 
diff --git a/cmd/arvados-client/container_gateway.go b/cmd/arvados-client/container_gateway.go
new file mode 100644 (file)
index 0000000..46fa693
--- /dev/null
@@ -0,0 +1,159 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package main
+
+import (
+       "context"
+       "flag"
+       "fmt"
+       "io"
+       "net/url"
+       "os"
+       "os/exec"
+       "strings"
+       "syscall"
+
+       "git.arvados.org/arvados.git/lib/controller/rpc"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+// shellCommand connects the terminal to an interactive shell on a
+// running container.
+type shellCommand struct{}
+
+func (shellCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       f := flag.NewFlagSet(prog, flag.ContinueOnError)
+       f.SetOutput(stderr)
+       f.Usage = func() {
+               fmt.Print(stderr, prog+`: open an interactive shell on a running container.
+
+Usage: `+prog+` [options] container-uuid [ssh-options] [remote-command [args...]]
+
+Options:
+`)
+               f.PrintDefaults()
+       }
+       detachKeys := f.String("detach-keys", "ctrl-],ctrl-]", "set detach key sequence, as in docker-attach(1)")
+       err := f.Parse(args)
+       if err != nil {
+               fmt.Println(stderr, err)
+               f.Usage()
+               return 2
+       }
+
+       if f.NArg() < 1 {
+               f.Usage()
+               return 2
+       }
+       targetUUID := f.Args()[0]
+       sshargs := f.Args()[1:]
+
+       selfbin, err := os.Readlink("/proc/self/exe")
+       if err != nil {
+               fmt.Fprintln(stderr, err)
+               return 2
+       }
+       sshargs = append([]string{
+               "-o", "ProxyCommand " + selfbin + " connect-ssh -detach-keys='" + strings.Replace(*detachKeys, "'", "'\\''", -1) + "' " + targetUUID,
+               "-o", "StrictHostKeyChecking no",
+               "root@" + targetUUID},
+               sshargs...)
+       cmd := exec.Command("ssh", sshargs...)
+       cmd.Stdin = stdin
+       cmd.Stdout = stdout
+       cmd.Stderr = stderr
+       err = cmd.Run()
+       if err == nil {
+               return 0
+       } else if exiterr, ok := err.(*exec.ExitError); !ok {
+               fmt.Fprintln(stderr, err)
+               return 1
+       } else if status, ok := exiterr.Sys().(syscall.WaitStatus); !ok {
+               fmt.Fprintln(stderr, err)
+               return 1
+       } else {
+               return status.ExitStatus()
+       }
+}
+
+// connectSSHCommand connects stdin/stdout to a container's gateway
+// server (see lib/crunchrun/ssh.go).
+//
+// It is intended to be invoked with OpenSSH client's ProxyCommand
+// config.
+type connectSSHCommand struct{}
+
+func (connectSSHCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       f := flag.NewFlagSet(prog, flag.ContinueOnError)
+       f.SetOutput(stderr)
+       f.Usage = func() {
+               fmt.Fprint(stderr, prog+`: connect to the gateway service for a running container.
+
+Usage: `+prog+` [options] container-uuid
+
+Options:
+`)
+               f.PrintDefaults()
+       }
+       detachKeys := f.String("detach-keys", "", "set detach key sequence, as in docker-attach(1)")
+       if err := f.Parse(args); err != nil {
+               fmt.Fprintln(stderr, err)
+               f.Usage()
+               return 2
+       } else if f.NArg() != 1 {
+               f.Usage()
+               return 2
+       }
+       targetUUID := f.Args()[0]
+       insecure := os.Getenv("ARVADOS_API_HOST_INSECURE")
+       rpcconn := rpc.NewConn("",
+               &url.URL{
+                       Scheme: "https",
+                       Host:   os.Getenv("ARVADOS_API_HOST"),
+               },
+               insecure == "1" || insecure == "yes" || insecure == "true",
+               func(context.Context) ([]string, error) {
+                       return []string{os.Getenv("ARVADOS_API_TOKEN")}, nil
+               })
+       // if strings.Contains(targetUUID, "-xvhdp-") {
+       //      cr, err := rpcconn.ContainerRequestGet(context.TODO(), arvados.GetOptions{UUID: targetUUID})
+       //      if err != nil {
+       //              fmt.Fprintln(stderr, err)
+       //              return 1
+       //      }
+       //      if cr.ContainerUUID == "" {
+       //              fmt.Fprintf(stderr, "no container assigned, container request state is %s\n", strings.ToLower(cr.State))
+       //              return 1
+       //      }
+       //      targetUUID = cr.ContainerUUID
+       // }
+       sshconn, err := rpcconn.ContainerSSH(context.TODO(), arvados.ContainerSSHOptions{
+               UUID:       targetUUID,
+               DetachKeys: *detachKeys,
+       })
+       if err != nil {
+               fmt.Fprintln(stderr, err)
+               return 1
+       }
+       defer sshconn.Conn.Close()
+
+       ctx, cancel := context.WithCancel(context.Background())
+       go func() {
+               defer cancel()
+               _, err := io.Copy(stdout, sshconn.Conn)
+               if err != nil && ctx.Err() == nil {
+                       fmt.Fprintf(stderr, "receive: %v\n", err)
+               }
+       }()
+       go func() {
+               defer cancel()
+               _, err := io.Copy(sshconn.Conn, stdin)
+               if err != nil && ctx.Err() == nil {
+                       fmt.Fprintf(stderr, "send: %v\n", err)
+               }
+       }()
+       <-ctx.Done()
+       return 0
+}
index ca55be53e332f12196b359b17a711e5b13c495fc..7d0353c9e71283617e0bb6c5e53c1de89204663b 100644 (file)
@@ -153,11 +153,13 @@ server {
     proxy_connect_timeout 90s;
     proxy_read_timeout    300s;
 
-    proxy_set_header      X-Forwarded-Proto https;
-    proxy_set_header      Host $http_host;
+    proxy_set_header      Host              $http_host;
+    proxy_set_header      Upgrade           $http_upgrade;
+    proxy_set_header      Connection        "upgrade";
     proxy_set_header      X-External-Client $external_client;
-    proxy_set_header      X-Real-IP $remote_addr;
-    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header      X-Forwarded-For   $proxy_add_x_forwarded_for;
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      X-Real-IP         $remote_addr;
   }
 }
 
diff --git a/go.mod b/go.mod
index 262978d9125d412b32bfee22508bcfe517de8ec6..88dcb86c72e35cafcf61905f7d67ebb554026b0d 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -20,6 +20,7 @@ require (
        github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092
        github.com/coreos/go-oidc v2.1.0+incompatible
        github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7
+       github.com/creack/pty v1.1.7
        github.com/dnaeon/go-vcr v1.0.1 // indirect
        github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect
        github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
@@ -33,6 +34,7 @@ require (
        github.com/go-asn1-ber/asn1-ber v1.4.1 // indirect
        github.com/go-ldap/ldap v3.0.3+incompatible
        github.com/gogo/protobuf v1.1.1
+       github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
        github.com/gorilla/context v1.1.1 // indirect
        github.com/gorilla/mux v1.6.1-0.20180107155708-5bbbb5b2b572
        github.com/hashicorp/golang-lru v0.5.1
diff --git a/go.sum b/go.sum
index 85d205112fb95ecf1895d96122686e0e2e2a849b..91b5689eb33424ce130f8c06400c74da002e585e 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -72,6 +72,8 @@ github.com/coreos/go-oidc v2.1.0+incompatible h1:sdJrfw8akMnCuUlaZU3tE/uYXFgfqom
 github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
 github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 h1:e3u8KWFMR3irlDo1Z/tL8Hsz1MJmCLkSoX5AZRMKZkg=
 github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
+github.com/creack/pty v1.1.7 h1:6pwm8kMQKCmgUg0ZHTm5+/YvRK0s3THD/28+T6/kk4A=
+github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
 github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -134,6 +136,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
 github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=
 github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
index 130368124cdd904a40ceb3938122181594c26804..a32382ce254afa9a8018792fa9fbee932f73476b 100644 (file)
@@ -336,6 +336,10 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption
        return conn.chooseBackend(options.UUID).ContainerUnlock(ctx, options)
 }
 
+func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ContainerSSHConnection, error) {
+       return conn.chooseBackend(options.UUID).ContainerSSH(ctx, options)
+}
+
 func (conn *Conn) SpecimenList(ctx context.Context, options arvados.ListOptions) (arvados.SpecimenList, error) {
        return conn.generated_SpecimenList(ctx, options)
 }
index 6669e020fdb9046abdceca72709348526af663c5..7847be0a4938cd03b316ba80a12f89ae80962347 100644 (file)
@@ -25,6 +25,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/health"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
        "github.com/jmoiron/sqlx"
+
        // sqlx needs lib/pq to talk to PostgreSQL
        _ "github.com/lib/pq"
 )
@@ -100,6 +101,7 @@ func (h *Handler) setup() {
                mux.Handle("/arvados/v1/collections/", rtr)
                mux.Handle("/arvados/v1/users", rtr)
                mux.Handle("/arvados/v1/users/", rtr)
+               mux.Handle("/arvados/v1/connect/", rtr)
                mux.Handle("/login", rtr)
                mux.Handle("/logout", rtr)
        }
diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go
new file mode 100644 (file)
index 0000000..01b4065
--- /dev/null
@@ -0,0 +1,80 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+       "bufio"
+       "context"
+       "crypto/hmac"
+       "crypto/sha256"
+       "errors"
+       "fmt"
+       "net"
+       "net/http"
+       "net/url"
+       "strings"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
+)
+
+// ContainerSSH returns a connection to the SSH server in the
+// appropriate crunch-run process on the worker node where the
+// specified container is running.
+//
+// If the returned error is nil, the caller is responsible for closing
+// sshconn.Conn.
+func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+       ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
+       if err != nil {
+               return
+       }
+       if ctr.GatewayAddress == "" || ctr.State != arvados.ContainerStateRunning {
+               err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusBadGateway)
+               return
+       }
+       netconn, err := net.Dial("tcp", ctr.GatewayAddress)
+       if err != nil {
+               return
+       }
+       bufr := bufio.NewReader(netconn)
+       bufw := bufio.NewWriter(netconn)
+
+       // Note this auth header does not protect from replay/mitm
+       // attacks (TODO: use TLS for that). It only authenticates us
+       // to crunch-run.
+       h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
+       fmt.Fprint(h, "%s", opts.UUID)
+       auth := fmt.Sprintf("%x", h.Sum(nil))
+
+       u := url.URL{
+               Scheme: "http",
+               Host:   ctr.GatewayAddress,
+               Path:   "/ssh",
+       }
+       bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n")
+       bufw.WriteString("Host: " + u.Host + "\r\n")
+       bufw.WriteString("Upgrade: ssh\r\n")
+       bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
+       bufw.WriteString("X-Arvados-Authorization: " + auth + "\r\n")
+       bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
+       bufw.WriteString("\r\n")
+       bufw.Flush()
+       resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
+       if err != nil {
+               err = fmt.Errorf("error reading http response from gateway: %w", err)
+               netconn.Close()
+               return
+       }
+       if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
+               strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
+               err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
+               netconn.Close()
+               return
+       }
+       sshconn.Conn = netconn
+       sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
+       return
+}
index 2944524344e9028fa22cf0c9d18327cb39193733..a09b66cedf7213d901469d2512ebe57ae2c0d82c 100644 (file)
@@ -186,6 +186,13 @@ func (rtr *router) addRoutes() {
                                return rtr.backend.ContainerUnlock(ctx, *opts.(*arvados.GetOptions))
                        },
                },
+               {
+                       arvados.EndpointContainerSSH,
+                       func() interface{} { return &arvados.ContainerSSHOptions{} },
+                       func(ctx context.Context, opts interface{}) (interface{}, error) {
+                               return rtr.backend.ContainerSSH(ctx, *opts.(*arvados.ContainerSSHOptions))
+                       },
+               },
                {
                        arvados.EndpointSpecimenCreate,
                        func() interface{} { return &arvados.CreateOptions{} },
index cd98b64718a0b1f52702f4997f432d3d5210f353..e80542e3e1b43598d61aa06132ebaca2be6f44ba 100644 (file)
@@ -5,6 +5,7 @@
 package rpc
 
 import (
+       "bufio"
        "bytes"
        "context"
        "crypto/tls"
@@ -12,6 +13,7 @@ import (
        "errors"
        "fmt"
        "io"
+       "io/ioutil"
        "net"
        "net/http"
        "net/url"
@@ -21,6 +23,7 @@ import (
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/auth"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
 )
 
 type TokenProvider func(context.Context) ([]string, error)
@@ -286,6 +289,61 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption
        return resp, err
 }
 
+// ContainerSSH returns a connection to the out-of-band SSH server for
+// a running container. If the returned error is nil, the caller is
+// responsible for closing sshconn.Conn.
+func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+       netconn, err := tls.Dial("tcp", net.JoinHostPort(conn.baseURL.Host, "https"), nil)
+       if err != nil {
+               return
+       }
+       bufr := bufio.NewReader(netconn)
+       bufw := bufio.NewWriter(netconn)
+
+       u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", options.UUID, -1))
+       if err != nil {
+               netconn.Close()
+               return
+       }
+       u.RawQuery = url.Values{"detach_keys": {options.DetachKeys}}.Encode()
+       tokens, err := conn.tokenProvider(ctx)
+       if err != nil {
+               netconn.Close()
+               return
+       } else if len(tokens) < 1 {
+               err = httpserver.ErrorWithStatus(errors.New("unauthorized"), http.StatusUnauthorized)
+               netconn.Close()
+               return
+       }
+       bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n")
+       bufw.WriteString("Authorization: Bearer " + tokens[0] + "\r\n")
+       bufw.WriteString("Host: " + u.Host + "\r\n")
+       bufw.WriteString("Upgrade: ssh\r\n")
+       bufw.WriteString("\r\n")
+       bufw.Flush()
+       resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
+       if err != nil {
+               netconn.Close()
+               return
+       }
+       if resp.StatusCode != http.StatusSwitchingProtocols {
+               defer resp.Body.Close()
+               body, _ := ioutil.ReadAll(resp.Body)
+               err = fmt.Errorf("tunnel connection failed: %d %q", resp.StatusCode, body)
+               netconn.Close()
+               return
+       }
+       if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
+               strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
+               err = fmt.Errorf("bad response: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection"))
+               netconn.Close()
+               return
+       }
+       sshconn.Conn = netconn
+       sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
+       return
+}
+
 func (conn *Conn) SpecimenCreate(ctx context.Context, options arvados.CreateOptions) (arvados.Specimen, error) {
        ep := arvados.EndpointSpecimenCreate
        var resp arvados.Specimen
diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go
new file mode 100644 (file)
index 0000000..e55868a
--- /dev/null
@@ -0,0 +1,292 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package crunchrun
+
+import (
+       "crypto/rand"
+       "crypto/rsa"
+       "fmt"
+       "io"
+       "net"
+       "net/http"
+       "os"
+       "os/exec"
+       "sync"
+       "syscall"
+
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
+       "github.com/creack/pty"
+       "github.com/google/shlex"
+       "golang.org/x/crypto/ssh"
+)
+
+// startGatewayServer starts an http server that allows authenticated
+// clients to open an interactive "docker exec" session and (in
+// future) connect to tcp ports inside the docker container.
+func (runner *ContainerRunner) startGatewayServer() error {
+       runner.gatewaySSHConfig = &ssh.ServerConfig{
+               NoClientAuth: true,
+               PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
+                       if c.User() == "root" {
+                               return nil, nil
+                       } else {
+                               return nil, fmt.Errorf("unimplemented: cannot log in as non-root user %q", c.User())
+                       }
+               },
+               PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
+                       if c.User() == "root" {
+                               return &ssh.Permissions{
+                                       Extensions: map[string]string{
+                                               "pubkey-fp": ssh.FingerprintSHA256(pubKey),
+                                       },
+                               }, nil
+                       } else {
+                               return nil, fmt.Errorf("unimplemented: cannot log in as non-root user %q", c.User())
+                       }
+               },
+       }
+       pvt, err := rsa.GenerateKey(rand.Reader, 4096)
+       if err != nil {
+               return err
+       }
+       err = pvt.Validate()
+       if err != nil {
+               return err
+       }
+       signer, err := ssh.NewSignerFromKey(pvt)
+       if err != nil {
+               return err
+       }
+       runner.gatewaySSHConfig.AddHostKey(signer)
+
+       // GatewayAddress (provided by arvados-dispatch-cloud) is
+       // HOST:PORT where HOST is our IP address or hostname as seen
+       // from arvados-controller, and PORT is either the desired
+       // port where we should run our gateway server, or "0" if
+       // we should choose an available port.
+       host, port, err := net.SplitHostPort(os.Getenv("GatewayAddress"))
+       if err != nil {
+               return err
+       }
+       srv := &httpserver.Server{
+               Server: http.Server{
+                       Handler: http.HandlerFunc(runner.handleSSH),
+               },
+               Addr: ":" + port,
+       }
+       err = srv.Start()
+       if err != nil {
+               return err
+       }
+       // Get the port number we are listening on (the port might be
+       // "0" or a port name, in which case this will be different).
+       _, port, err = net.SplitHostPort(srv.Addr)
+       if err != nil {
+               return err
+       }
+       // When changing state to Running, we will set
+       // gateway_address to "HOST:PORT" where HOST is our
+       // external hostname/IP as provided by arvados-dispatch-cloud,
+       // and PORT is the port number we ended up listening on.
+       runner.gatewayAddress = net.JoinHostPort(host, port)
+       return nil
+}
+
+// handleSSH connects to an SSH server that runs commands as root in
+// the container. The tunnel itself can only be created by an
+// authenticated caller, so the SSH server itself is wide open (any
+// password or key will be accepted).
+//
+// Requests must have path "/ssh" and the following headers:
+//
+// Connection: upgrade
+// Upgrade: ssh
+// X-Arvados-Target-Uuid: uuid of container
+// X-Arvados-Authorization: must match GatewayAuthSecret provided by
+// a-d-c (this prevents other containers and shell nodes from
+// connecting directly)
+//
+// Optional header:
+//
+// X-Arvados-Detach-Keys: argument to "docker attach --detach-keys",
+// e.g., "ctrl-p,ctrl-q"
+func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Request) {
+       // In future we'll handle browser traffic too, but for now the
+       // only traffic we expect is an SSH tunnel from
+       // (*lib/controller/localdb.Conn)ContainerSSH()
+       if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" {
+               http.Error(w, "path not found", http.StatusNotFound)
+               return
+       }
+       if want := req.Header.Get("X-Arvados-Target-Uuid"); want != runner.Container.UUID {
+               http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, runner.Container.UUID), http.StatusBadGateway)
+               return
+       }
+       if req.Header.Get("X-Arvados-Authorization") != runner.gatewayAuthSecret {
+               http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
+               return
+       }
+       detachKeys := req.Header.Get("X-Arvados-Detach-Keys")
+       hj, ok := w.(http.Hijacker)
+       if !ok {
+               http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
+               return
+       }
+       netconn, _, err := hj.Hijack()
+       if !ok {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       defer netconn.Close()
+       w.Header().Set("Connection", "upgrade")
+       w.Header().Set("Upgrade", "ssh")
+       netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
+       w.Header().Write(netconn)
+       netconn.Write([]byte("\r\n"))
+
+       ctx := req.Context()
+
+       conn, newchans, reqs, err := ssh.NewServerConn(netconn, runner.gatewaySSHConfig)
+       if err != nil {
+               runner.CrunchLog.Printf("ssh.NewServerConn: %s", err)
+               return
+       }
+       defer conn.Close()
+       go ssh.DiscardRequests(reqs)
+       for newch := range newchans {
+               if newch.ChannelType() != "session" {
+                       newch.Reject(ssh.UnknownChannelType, "unknown channel type")
+                       continue
+               }
+               ch, reqs, err := newch.Accept()
+               if err != nil {
+                       runner.CrunchLog.Printf("accept channel: %s", err)
+                       return
+               }
+               var pty0, tty0 *os.File
+               go func() {
+                       defer pty0.Close()
+                       defer tty0.Close()
+                       // Where to send errors/messages for the
+                       // client to see
+                       logw := io.Writer(ch.Stderr())
+                       // How to end lines when sending
+                       // errors/messages to the client (changes to
+                       // \r\n when using a pty)
+                       eol := "\n"
+                       // Env vars to add to child process
+                       termEnv := []string(nil)
+                       for req := range reqs {
+                               ok := false
+                               switch req.Type {
+                               case "shell", "exec":
+                                       ok = true
+                                       var payload struct {
+                                               Command string
+                                       }
+                                       ssh.Unmarshal(req.Payload, &payload)
+                                       execargs, err := shlex.Split(payload.Command)
+                                       if err != nil {
+                                               fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
+                                               return
+                                       }
+                                       if len(execargs) == 0 {
+                                               execargs = []string{"/bin/bash", "-login"}
+                                       }
+                                       go func() {
+                                               cmd := exec.CommandContext(ctx, "docker", "exec", "-i", "--detach-keys="+detachKeys)
+                                               cmd.Stdin = ch
+                                               cmd.Stdout = ch
+                                               cmd.Stderr = ch.Stderr()
+                                               if tty0 != nil {
+                                                       cmd.Args = append(cmd.Args, "-t")
+                                                       cmd.Stdin = tty0
+                                                       cmd.Stdout = tty0
+                                                       cmd.Stderr = tty0
+                                                       var wg sync.WaitGroup
+                                                       defer wg.Wait()
+                                                       wg.Add(2)
+                                                       go func() { io.Copy(ch, pty0); wg.Done() }()
+                                                       go func() { io.Copy(pty0, ch); wg.Done() }()
+                                                       // Send our own debug messages to tty as well.
+                                                       logw = tty0
+                                               }
+                                               cmd.Args = append(cmd.Args, runner.ContainerID)
+                                               cmd.Args = append(cmd.Args, execargs...)
+                                               cmd.SysProcAttr = &syscall.SysProcAttr{
+                                                       Setctty: tty0 != nil,
+                                                       Setsid:  true,
+                                               }
+                                               cmd.Env = append(os.Environ(), termEnv...)
+                                               err := cmd.Run()
+                                               errClose := ch.CloseWrite()
+                                               var resp struct {
+                                                       Status uint32
+                                               }
+                                               if err, ok := err.(*exec.ExitError); ok {
+                                                       if status, ok := err.Sys().(syscall.WaitStatus); ok {
+                                                               resp.Status = uint32(status.ExitStatus())
+                                                       }
+                                               }
+                                               if resp.Status == 0 && (err != nil || errClose != nil) {
+                                                       resp.Status = 1
+                                               }
+                                               ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
+                                               ch.Close()
+                                       }()
+                               case "pty-req":
+                                       eol = "\r\n"
+                                       p, t, err := pty.Open()
+                                       if err != nil {
+                                               fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
+                                               break
+                                       }
+                                       pty0, tty0 = p, t
+                                       ok = true
+                                       var payload struct {
+                                               Term string
+                                               Cols uint32
+                                               Rows uint32
+                                               X    uint32
+                                               Y    uint32
+                                       }
+                                       ssh.Unmarshal(req.Payload, &payload)
+                                       termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
+                                       err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
+                                       if err != nil {
+                                               fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
+                                       }
+                               case "window-change":
+                                       var payload struct {
+                                               Cols uint32
+                                               Rows uint32
+                                               X    uint32
+                                               Y    uint32
+                                       }
+                                       ssh.Unmarshal(req.Payload, &payload)
+                                       err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
+                                       if err != nil {
+                                               fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
+                                               break
+                                       }
+                                       ok = true
+                               case "env":
+                                       // TODO: implement "env"
+                                       // requests by setting env
+                                       // vars in the docker-exec
+                                       // command (not docker-exec's
+                                       // own environment, which
+                                       // would be a gaping security
+                                       // hole).
+                               default:
+                                       // fmt.Fprintf(logw, "declining %q req"+eol, req.Type)
+                               }
+                               if req.WantReply {
+                                       req.Reply(ok, nil)
+                               }
+                       }
+               }()
+       }
+}
index 341938354cd963997730eddecd35946f1cb44af9..b252e0dce162fca3473dc0a0c64a9f846295e4b9 100644 (file)
@@ -33,6 +33,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        "git.arvados.org/arvados.git/sdk/go/manifest"
+       "golang.org/x/crypto/ssh"
        "golang.org/x/net/context"
 
        dockertypes "github.com/docker/docker/api/types"
@@ -178,6 +179,10 @@ type ContainerRunner struct {
        arvMountLog   *ThrottledLogger
 
        containerWatchdogInterval time.Duration
+
+       gatewayAddress    string
+       gatewaySSHConfig  *ssh.ServerConfig
+       gatewayAuthSecret string
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -1469,7 +1474,7 @@ func (runner *ContainerRunner) UpdateContainerRunning() error {
                return ErrCancelled
        }
        return runner.DispatcherArvClient.Update("containers", runner.Container.UUID,
-               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
+               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gatewayAddress}}, nil)
 }
 
 // ContainerToken returns the api_token the container (and any
@@ -1868,6 +1873,14 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
                return 1
        }
 
+       cr.gatewayAuthSecret = os.Getenv("GatewayAuthSecret")
+       os.Unsetenv("GatewayAuthSecret")
+       err = cr.startGatewayServer()
+       if err != nil {
+               log.Printf("error starting gateway server: %s", err)
+               return 1
+       }
+
        parentTemp, tmperr := cr.MkTempDir("", "crunch-run."+containerID+".")
        if tmperr != nil {
                log.Printf("%s: %v", containerID, tmperr)
index a25ed60150718f83829a003d6b0e8267a382a430..e092e7adab5bfe0e3d9b5ff354ea4ff7ff371010 100644 (file)
@@ -5,8 +5,10 @@
 package worker
 
 import (
+       "crypto/hmac"
        "crypto/md5"
        "crypto/rand"
+       "crypto/sha256"
        "errors"
        "fmt"
        "io"
@@ -116,6 +118,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
                timeoutTERM:                    duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
                timeoutSignal:                  duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
                timeoutStaleRunLock:            duration(cluster.Containers.CloudVMs.TimeoutStaleRunLock, defaultTimeoutStaleRunLock),
+               systemRootToken:                cluster.SystemRootToken,
                installPublicKey:               installPublicKey,
                tagKeyPrefix:                   cluster.Containers.CloudVMs.TagKeyPrefix,
                stop:                           make(chan bool),
@@ -154,6 +157,7 @@ type Pool struct {
        timeoutTERM                    time.Duration
        timeoutSignal                  time.Duration
        timeoutStaleRunLock            time.Duration
+       systemRootToken                string
        installPublicKey               ssh.PublicKey
        tagKeyPrefix                   string
 
@@ -990,6 +994,12 @@ func (wp *Pool) waitUntilLoaded() {
        }
 }
 
+func (wp *Pool) gatewayAuthSecret(uuid string) string {
+       h := hmac.New(sha256.New, []byte(wp.systemRootToken))
+       fmt.Fprint(h, "%s", uuid)
+       return fmt.Sprintf("%x", h.Sum(nil))
+}
+
 // Return a random string of n hexadecimal digits (n*4 random bits). n
 // must be even.
 func randomHex(n int) string {
index 47521213427610a531fa269df32b046b17ba72aa..0fd99aeeef136cdc1113f55466b1342b7c975cc1 100644 (file)
@@ -8,6 +8,7 @@ import (
        "bytes"
        "encoding/json"
        "fmt"
+       "net"
        "syscall"
        "time"
 
@@ -48,6 +49,8 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
                "ARVADOS_API_HOST":  wkr.wp.arvClient.APIHost,
                "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
                "InstanceType":      instJSON.String(),
+               "GatewayAddress":    net.JoinHostPort(wkr.instance.Address(), "0"),
+               "GatewayAuthSecret": wkr.wp.gatewayAuthSecret(uuid),
        }
        if wkr.wp.arvClient.Insecure {
                env["ARVADOS_API_HOST_INSECURE"] = "1"
index 5a2cfb8800402496f3f8fe400cf38c786e57d6eb..fa4471804809fc60b2c233b8f8175d308a6ae369 100644 (file)
@@ -5,8 +5,10 @@
 package arvados
 
 import (
+       "bufio"
        "context"
        "encoding/json"
+       "net"
 )
 
 type APIEndpoint struct {
@@ -41,6 +43,7 @@ var (
        EndpointContainerDelete               = APIEndpoint{"DELETE", "arvados/v1/containers/{uuid}", ""}
        EndpointContainerLock                 = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/lock", ""}
        EndpointContainerUnlock               = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/unlock", ""}
+       EndpointContainerSSH                  = APIEndpoint{"GET", "arvados/v1/connect/{uuid}/ssh", ""} // move to /containers after #17014 fixes routing
        EndpointUserActivate                  = APIEndpoint{"POST", "arvados/v1/users/{uuid}/activate", ""}
        EndpointUserCreate                    = APIEndpoint{"POST", "arvados/v1/users", "user"}
        EndpointUserCurrent                   = APIEndpoint{"GET", "arvados/v1/users/current", ""}
@@ -60,6 +63,16 @@ var (
        EndpointAPIClientAuthorizationCurrent = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/current", ""}
 )
 
+type ContainerSSHOptions struct {
+       UUID       string `json:"uuid"`
+       DetachKeys string `json:"detach_keys"`
+}
+
+type ContainerSSHConnection struct {
+       Conn  net.Conn          `json:"-"`
+       Bufrw *bufio.ReadWriter `json:"-"`
+}
+
 type GetOptions struct {
        UUID         string   `json:"uuid,omitempty"`
        Select       []string `json:"select"`
@@ -175,6 +188,7 @@ type API interface {
        ContainerDelete(ctx context.Context, options DeleteOptions) (Container, error)
        ContainerLock(ctx context.Context, options GetOptions) (Container, error)
        ContainerUnlock(ctx context.Context, options GetOptions) (Container, error)
+       ContainerSSH(ctx context.Context, options ContainerSSHOptions) (ContainerSSHConnection, error)
        SpecimenCreate(ctx context.Context, options CreateOptions) (Specimen, error)
        SpecimenUpdate(ctx context.Context, options UpdateOptions) (Specimen, error)
        SpecimenGet(ctx context.Context, options GetOptions) (Specimen, error)
index 265944e81d52fdab08d55e767b9626a52f40c3c2..4d1511b412dd315f134713c60ac1eaad337444dc 100644 (file)
@@ -30,6 +30,7 @@ type Container struct {
        RuntimeStatus        map[string]interface{} `json:"runtime_status"`
        StartedAt            *time.Time             `json:"started_at"`  // nil if not yet started
        FinishedAt           *time.Time             `json:"finished_at"` // nil if not yet finished
+       GatewayAddress       string                 `json:"gateway_address"`
 }
 
 // ContainerRequest is an arvados#container_request resource.
diff --git a/sdk/go/arvados/container_gateway.go b/sdk/go/arvados/container_gateway.go
new file mode 100644 (file)
index 0000000..07f8c07
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+       "context"
+       "io"
+       "net/http"
+
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+)
+
+func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+       hj, ok := w.(http.Hijacker)
+       if !ok {
+               http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
+               return
+       }
+       w.Header().Set("Connection", "upgrade")
+       w.Header().Set("Upgrade", "ssh")
+       w.WriteHeader(http.StatusSwitchingProtocols)
+       conn, bufrw, err := hj.Hijack()
+       if err != nil {
+               ctxlog.FromContext(req.Context()).WithError(err).Error("error hijacking ResponseWriter")
+               return
+       }
+       defer conn.Close()
+
+       ctx, cancel := context.WithCancel(context.Background())
+       go func() {
+               defer cancel()
+               _, err := io.CopyN(conn, sshconn.Bufrw, int64(sshconn.Bufrw.Reader.Buffered()))
+               if err == nil {
+                       _, err = io.Copy(conn, sshconn.Conn)
+               }
+               if err != nil {
+                       ctxlog.FromContext(req.Context()).WithError(err).Error("error copying downstream")
+               }
+       }()
+       go func() {
+               defer cancel()
+               _, err := io.CopyN(sshconn.Conn, bufrw, int64(bufrw.Reader.Buffered()))
+               if err == nil {
+                       _, err = io.Copy(sshconn.Conn, conn)
+               }
+               if err != nil {
+                       ctxlog.FromContext(req.Context()).WithError(err).Error("error copying upstream")
+               }
+       }()
+       <-ctx.Done()
+}
index 5833c2251f9b8db26a5ebf5834130d96fc4690d0..a4da593ecdf70212486361e7d78a458b9aae0afd 100644 (file)
@@ -76,6 +76,7 @@ class Container < ArvadosModel
     t.add :runtime_user_uuid
     t.add :runtime_auth_scopes
     t.add :lock_count
+    t.add :gateway_address
   end
 
   # Supported states for a container
@@ -478,7 +479,7 @@ class Container < ArvadosModel
     when Running
       permitted.push :priority, *progress_attrs
       if self.state_changed?
-        permitted.push :started_at
+        permitted.push :started_at, :gateway_address
       end
 
     when Complete
diff --git a/services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb b/services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb
new file mode 100644 (file)
index 0000000..8683b51
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddGatewayAddressToContainers < ActiveRecord::Migration[5.2]
+  def change
+    add_column :containers, :gateway_address, :string
+  end
+end
index 12a28c6c723609bd14b2c20129b8c749695bdc28..249ec67ac315f17b571f88c14928a6b663ea0390 100644 (file)
@@ -521,7 +521,8 @@ CREATE TABLE public.containers (
     runtime_user_uuid text,
     runtime_auth_scopes jsonb,
     runtime_token text,
-    lock_count integer DEFAULT 0 NOT NULL
+    lock_count integer DEFAULT 0 NOT NULL,
+    gateway_address character varying
 );
 
 
@@ -3187,6 +3188,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20200914203202'),
 ('20201103170213'),
 ('20201105190435'),
-('20201202174753');
+('20201202174753'),
+('20210108033940');