From: Tom Clegg Date: Mon, 11 Jan 2021 20:53:28 +0000 (-0500) Subject: 17170: Add "arvados-client shell" subcommand and backend support. X-Git-Tag: 2.2.0~135^2~36 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/56e130608f8977d20b21c54f6ab8973d71e045a0 17170: Add "arvados-client shell" subcommand and backend support. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/cmd/arvados-client/cmd.go b/cmd/arvados-client/cmd.go index 47fcd5ad7d..aefcce79a4 100644 --- a/cmd/arvados-client/cmd.go +++ b/cmd/arvados-client/cmd.go @@ -57,6 +57,8 @@ var ( "mount": mount.Command, "deduplication-report": deduplicationreport.Command, "costanalyzer": costanalyzer.Command, + "shell": shellCommand{}, + "connect-ssh": connectSSHCommand{}, }) ) diff --git a/cmd/arvados-client/container_gateway.go b/cmd/arvados-client/container_gateway.go new file mode 100644 index 0000000000..46fa6932b3 --- /dev/null +++ b/cmd/arvados-client/container_gateway.go @@ -0,0 +1,159 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "flag" + "fmt" + "io" + "net/url" + "os" + "os/exec" + "strings" + "syscall" + + "git.arvados.org/arvados.git/lib/controller/rpc" + "git.arvados.org/arvados.git/sdk/go/arvados" +) + +// shellCommand connects the terminal to an interactive shell on a +// running container. +type shellCommand struct{} + +func (shellCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + f := flag.NewFlagSet(prog, flag.ContinueOnError) + f.SetOutput(stderr) + f.Usage = func() { + fmt.Print(stderr, prog+`: open an interactive shell on a running container. + +Usage: `+prog+` [options] container-uuid [ssh-options] [remote-command [args...]] + +Options: +`) + f.PrintDefaults() + } + detachKeys := f.String("detach-keys", "ctrl-],ctrl-]", "set detach key sequence, as in docker-attach(1)") + err := f.Parse(args) + if err != nil { + fmt.Println(stderr, err) + f.Usage() + return 2 + } + + if f.NArg() < 1 { + f.Usage() + return 2 + } + targetUUID := f.Args()[0] + sshargs := f.Args()[1:] + + selfbin, err := os.Readlink("/proc/self/exe") + if err != nil { + fmt.Fprintln(stderr, err) + return 2 + } + sshargs = append([]string{ + "-o", "ProxyCommand " + selfbin + " connect-ssh -detach-keys='" + strings.Replace(*detachKeys, "'", "'\\''", -1) + "' " + targetUUID, + "-o", "StrictHostKeyChecking no", + "root@" + targetUUID}, + sshargs...) + cmd := exec.Command("ssh", sshargs...) + cmd.Stdin = stdin + cmd.Stdout = stdout + cmd.Stderr = stderr + err = cmd.Run() + if err == nil { + return 0 + } else if exiterr, ok := err.(*exec.ExitError); !ok { + fmt.Fprintln(stderr, err) + return 1 + } else if status, ok := exiterr.Sys().(syscall.WaitStatus); !ok { + fmt.Fprintln(stderr, err) + return 1 + } else { + return status.ExitStatus() + } +} + +// connectSSHCommand connects stdin/stdout to a container's gateway +// server (see lib/crunchrun/ssh.go). +// +// It is intended to be invoked with OpenSSH client's ProxyCommand +// config. +type connectSSHCommand struct{} + +func (connectSSHCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + f := flag.NewFlagSet(prog, flag.ContinueOnError) + f.SetOutput(stderr) + f.Usage = func() { + fmt.Fprint(stderr, prog+`: connect to the gateway service for a running container. + +Usage: `+prog+` [options] container-uuid + +Options: +`) + f.PrintDefaults() + } + detachKeys := f.String("detach-keys", "", "set detach key sequence, as in docker-attach(1)") + if err := f.Parse(args); err != nil { + fmt.Fprintln(stderr, err) + f.Usage() + return 2 + } else if f.NArg() != 1 { + f.Usage() + return 2 + } + targetUUID := f.Args()[0] + insecure := os.Getenv("ARVADOS_API_HOST_INSECURE") + rpcconn := rpc.NewConn("", + &url.URL{ + Scheme: "https", + Host: os.Getenv("ARVADOS_API_HOST"), + }, + insecure == "1" || insecure == "yes" || insecure == "true", + func(context.Context) ([]string, error) { + return []string{os.Getenv("ARVADOS_API_TOKEN")}, nil + }) + // if strings.Contains(targetUUID, "-xvhdp-") { + // cr, err := rpcconn.ContainerRequestGet(context.TODO(), arvados.GetOptions{UUID: targetUUID}) + // if err != nil { + // fmt.Fprintln(stderr, err) + // return 1 + // } + // if cr.ContainerUUID == "" { + // fmt.Fprintf(stderr, "no container assigned, container request state is %s\n", strings.ToLower(cr.State)) + // return 1 + // } + // targetUUID = cr.ContainerUUID + // } + sshconn, err := rpcconn.ContainerSSH(context.TODO(), arvados.ContainerSSHOptions{ + UUID: targetUUID, + DetachKeys: *detachKeys, + }) + if err != nil { + fmt.Fprintln(stderr, err) + return 1 + } + defer sshconn.Conn.Close() + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer cancel() + _, err := io.Copy(stdout, sshconn.Conn) + if err != nil && ctx.Err() == nil { + fmt.Fprintf(stderr, "receive: %v\n", err) + } + }() + go func() { + defer cancel() + _, err := io.Copy(sshconn.Conn, stdin) + if err != nil && ctx.Err() == nil { + fmt.Fprintf(stderr, "send: %v\n", err) + } + }() + <-ctx.Done() + return 0 +} diff --git a/doc/install/install-api-server.html.textile.liquid b/doc/install/install-api-server.html.textile.liquid index ca55be53e3..7d0353c9e7 100644 --- a/doc/install/install-api-server.html.textile.liquid +++ b/doc/install/install-api-server.html.textile.liquid @@ -153,11 +153,13 @@ server { proxy_connect_timeout 90s; proxy_read_timeout 300s; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header Host $http_host; + proxy_set_header Host $http_host; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; proxy_set_header X-External-Client $external_client; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Real-IP $remote_addr; } } diff --git a/go.mod b/go.mod index 262978d912..88dcb86c72 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092 github.com/coreos/go-oidc v2.1.0+incompatible github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 + github.com/creack/pty v1.1.7 github.com/dnaeon/go-vcr v1.0.1 // indirect github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1 @@ -33,6 +34,7 @@ require ( github.com/go-asn1-ber/asn1-ber v1.4.1 // indirect github.com/go-ldap/ldap v3.0.3+incompatible github.com/gogo/protobuf v1.1.1 + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/gorilla/context v1.1.1 // indirect github.com/gorilla/mux v1.6.1-0.20180107155708-5bbbb5b2b572 github.com/hashicorp/golang-lru v0.5.1 diff --git a/go.sum b/go.sum index 85d205112f..91b5689eb3 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,8 @@ github.com/coreos/go-oidc v2.1.0+incompatible h1:sdJrfw8akMnCuUlaZU3tE/uYXFgfqom github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 h1:e3u8KWFMR3irlDo1Z/tL8Hsz1MJmCLkSoX5AZRMKZkg= github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/creack/pty v1.1.7 h1:6pwm8kMQKCmgUg0ZHTm5+/YvRK0s3THD/28+T6/kk4A= +github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -134,6 +136,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go index 130368124c..a32382ce25 100644 --- a/lib/controller/federation/conn.go +++ b/lib/controller/federation/conn.go @@ -336,6 +336,10 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption return conn.chooseBackend(options.UUID).ContainerUnlock(ctx, options) } +func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ContainerSSHConnection, error) { + return conn.chooseBackend(options.UUID).ContainerSSH(ctx, options) +} + func (conn *Conn) SpecimenList(ctx context.Context, options arvados.ListOptions) (arvados.SpecimenList, error) { return conn.generated_SpecimenList(ctx, options) } diff --git a/lib/controller/handler.go b/lib/controller/handler.go index 6669e020fd..7847be0a49 100644 --- a/lib/controller/handler.go +++ b/lib/controller/handler.go @@ -25,6 +25,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/health" "git.arvados.org/arvados.git/sdk/go/httpserver" "github.com/jmoiron/sqlx" + // sqlx needs lib/pq to talk to PostgreSQL _ "github.com/lib/pq" ) @@ -100,6 +101,7 @@ func (h *Handler) setup() { mux.Handle("/arvados/v1/collections/", rtr) mux.Handle("/arvados/v1/users", rtr) mux.Handle("/arvados/v1/users/", rtr) + mux.Handle("/arvados/v1/connect/", rtr) mux.Handle("/login", rtr) mux.Handle("/logout", rtr) } diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go new file mode 100644 index 0000000000..01b4065f9b --- /dev/null +++ b/lib/controller/localdb/container_gateway.go @@ -0,0 +1,80 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package localdb + +import ( + "bufio" + "context" + "crypto/hmac" + "crypto/sha256" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "strings" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/httpserver" +) + +// ContainerSSH returns a connection to the SSH server in the +// appropriate crunch-run process on the worker node where the +// specified container is running. +// +// If the returned error is nil, the caller is responsible for closing +// sshconn.Conn. +func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) { + ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID}) + if err != nil { + return + } + if ctr.GatewayAddress == "" || ctr.State != arvados.ContainerStateRunning { + err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusBadGateway) + return + } + netconn, err := net.Dial("tcp", ctr.GatewayAddress) + if err != nil { + return + } + bufr := bufio.NewReader(netconn) + bufw := bufio.NewWriter(netconn) + + // Note this auth header does not protect from replay/mitm + // attacks (TODO: use TLS for that). It only authenticates us + // to crunch-run. + h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken)) + fmt.Fprint(h, "%s", opts.UUID) + auth := fmt.Sprintf("%x", h.Sum(nil)) + + u := url.URL{ + Scheme: "http", + Host: ctr.GatewayAddress, + Path: "/ssh", + } + bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n") + bufw.WriteString("Host: " + u.Host + "\r\n") + bufw.WriteString("Upgrade: ssh\r\n") + bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n") + bufw.WriteString("X-Arvados-Authorization: " + auth + "\r\n") + bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n") + bufw.WriteString("\r\n") + bufw.Flush() + resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"}) + if err != nil { + err = fmt.Errorf("error reading http response from gateway: %w", err) + netconn.Close() + return + } + if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" || + strings.ToLower(resp.Header.Get("Connection")) != "upgrade" { + err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway) + netconn.Close() + return + } + sshconn.Conn = netconn + sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw} + return +} diff --git a/lib/controller/router/router.go b/lib/controller/router/router.go index 2944524344..a09b66cedf 100644 --- a/lib/controller/router/router.go +++ b/lib/controller/router/router.go @@ -186,6 +186,13 @@ func (rtr *router) addRoutes() { return rtr.backend.ContainerUnlock(ctx, *opts.(*arvados.GetOptions)) }, }, + { + arvados.EndpointContainerSSH, + func() interface{} { return &arvados.ContainerSSHOptions{} }, + func(ctx context.Context, opts interface{}) (interface{}, error) { + return rtr.backend.ContainerSSH(ctx, *opts.(*arvados.ContainerSSHOptions)) + }, + }, { arvados.EndpointSpecimenCreate, func() interface{} { return &arvados.CreateOptions{} }, diff --git a/lib/controller/rpc/conn.go b/lib/controller/rpc/conn.go index cd98b64718..e80542e3e1 100644 --- a/lib/controller/rpc/conn.go +++ b/lib/controller/rpc/conn.go @@ -5,6 +5,7 @@ package rpc import ( + "bufio" "bytes" "context" "crypto/tls" @@ -12,6 +13,7 @@ import ( "errors" "fmt" "io" + "io/ioutil" "net" "net/http" "net/url" @@ -21,6 +23,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/auth" + "git.arvados.org/arvados.git/sdk/go/httpserver" ) type TokenProvider func(context.Context) ([]string, error) @@ -286,6 +289,61 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption return resp, err } +// ContainerSSH returns a connection to the out-of-band SSH server for +// a running container. If the returned error is nil, the caller is +// responsible for closing sshconn.Conn. +func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) { + netconn, err := tls.Dial("tcp", net.JoinHostPort(conn.baseURL.Host, "https"), nil) + if err != nil { + return + } + bufr := bufio.NewReader(netconn) + bufw := bufio.NewWriter(netconn) + + u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", options.UUID, -1)) + if err != nil { + netconn.Close() + return + } + u.RawQuery = url.Values{"detach_keys": {options.DetachKeys}}.Encode() + tokens, err := conn.tokenProvider(ctx) + if err != nil { + netconn.Close() + return + } else if len(tokens) < 1 { + err = httpserver.ErrorWithStatus(errors.New("unauthorized"), http.StatusUnauthorized) + netconn.Close() + return + } + bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n") + bufw.WriteString("Authorization: Bearer " + tokens[0] + "\r\n") + bufw.WriteString("Host: " + u.Host + "\r\n") + bufw.WriteString("Upgrade: ssh\r\n") + bufw.WriteString("\r\n") + bufw.Flush() + resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"}) + if err != nil { + netconn.Close() + return + } + if resp.StatusCode != http.StatusSwitchingProtocols { + defer resp.Body.Close() + body, _ := ioutil.ReadAll(resp.Body) + err = fmt.Errorf("tunnel connection failed: %d %q", resp.StatusCode, body) + netconn.Close() + return + } + if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" || + strings.ToLower(resp.Header.Get("Connection")) != "upgrade" { + err = fmt.Errorf("bad response: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection")) + netconn.Close() + return + } + sshconn.Conn = netconn + sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw} + return +} + func (conn *Conn) SpecimenCreate(ctx context.Context, options arvados.CreateOptions) (arvados.Specimen, error) { ep := arvados.EndpointSpecimenCreate var resp arvados.Specimen diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go new file mode 100644 index 0000000000..e55868ac33 --- /dev/null +++ b/lib/crunchrun/container_gateway.go @@ -0,0 +1,292 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package crunchrun + +import ( + "crypto/rand" + "crypto/rsa" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "sync" + "syscall" + + "git.arvados.org/arvados.git/sdk/go/httpserver" + "github.com/creack/pty" + "github.com/google/shlex" + "golang.org/x/crypto/ssh" +) + +// startGatewayServer starts an http server that allows authenticated +// clients to open an interactive "docker exec" session and (in +// future) connect to tcp ports inside the docker container. +func (runner *ContainerRunner) startGatewayServer() error { + runner.gatewaySSHConfig = &ssh.ServerConfig{ + NoClientAuth: true, + PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) { + if c.User() == "root" { + return nil, nil + } else { + return nil, fmt.Errorf("unimplemented: cannot log in as non-root user %q", c.User()) + } + }, + PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) { + if c.User() == "root" { + return &ssh.Permissions{ + Extensions: map[string]string{ + "pubkey-fp": ssh.FingerprintSHA256(pubKey), + }, + }, nil + } else { + return nil, fmt.Errorf("unimplemented: cannot log in as non-root user %q", c.User()) + } + }, + } + pvt, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + return err + } + err = pvt.Validate() + if err != nil { + return err + } + signer, err := ssh.NewSignerFromKey(pvt) + if err != nil { + return err + } + runner.gatewaySSHConfig.AddHostKey(signer) + + // GatewayAddress (provided by arvados-dispatch-cloud) is + // HOST:PORT where HOST is our IP address or hostname as seen + // from arvados-controller, and PORT is either the desired + // port where we should run our gateway server, or "0" if + // we should choose an available port. + host, port, err := net.SplitHostPort(os.Getenv("GatewayAddress")) + if err != nil { + return err + } + srv := &httpserver.Server{ + Server: http.Server{ + Handler: http.HandlerFunc(runner.handleSSH), + }, + Addr: ":" + port, + } + err = srv.Start() + if err != nil { + return err + } + // Get the port number we are listening on (the port might be + // "0" or a port name, in which case this will be different). + _, port, err = net.SplitHostPort(srv.Addr) + if err != nil { + return err + } + // When changing state to Running, we will set + // gateway_address to "HOST:PORT" where HOST is our + // external hostname/IP as provided by arvados-dispatch-cloud, + // and PORT is the port number we ended up listening on. + runner.gatewayAddress = net.JoinHostPort(host, port) + return nil +} + +// handleSSH connects to an SSH server that runs commands as root in +// the container. The tunnel itself can only be created by an +// authenticated caller, so the SSH server itself is wide open (any +// password or key will be accepted). +// +// Requests must have path "/ssh" and the following headers: +// +// Connection: upgrade +// Upgrade: ssh +// X-Arvados-Target-Uuid: uuid of container +// X-Arvados-Authorization: must match GatewayAuthSecret provided by +// a-d-c (this prevents other containers and shell nodes from +// connecting directly) +// +// Optional header: +// +// X-Arvados-Detach-Keys: argument to "docker attach --detach-keys", +// e.g., "ctrl-p,ctrl-q" +func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Request) { + // In future we'll handle browser traffic too, but for now the + // only traffic we expect is an SSH tunnel from + // (*lib/controller/localdb.Conn)ContainerSSH() + if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" { + http.Error(w, "path not found", http.StatusNotFound) + return + } + if want := req.Header.Get("X-Arvados-Target-Uuid"); want != runner.Container.UUID { + http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, runner.Container.UUID), http.StatusBadGateway) + return + } + if req.Header.Get("X-Arvados-Authorization") != runner.gatewayAuthSecret { + http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized) + return + } + detachKeys := req.Header.Get("X-Arvados-Detach-Keys") + hj, ok := w.(http.Hijacker) + if !ok { + http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError) + return + } + netconn, _, err := hj.Hijack() + if !ok { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer netconn.Close() + w.Header().Set("Connection", "upgrade") + w.Header().Set("Upgrade", "ssh") + netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n")) + w.Header().Write(netconn) + netconn.Write([]byte("\r\n")) + + ctx := req.Context() + + conn, newchans, reqs, err := ssh.NewServerConn(netconn, runner.gatewaySSHConfig) + if err != nil { + runner.CrunchLog.Printf("ssh.NewServerConn: %s", err) + return + } + defer conn.Close() + go ssh.DiscardRequests(reqs) + for newch := range newchans { + if newch.ChannelType() != "session" { + newch.Reject(ssh.UnknownChannelType, "unknown channel type") + continue + } + ch, reqs, err := newch.Accept() + if err != nil { + runner.CrunchLog.Printf("accept channel: %s", err) + return + } + var pty0, tty0 *os.File + go func() { + defer pty0.Close() + defer tty0.Close() + // Where to send errors/messages for the + // client to see + logw := io.Writer(ch.Stderr()) + // How to end lines when sending + // errors/messages to the client (changes to + // \r\n when using a pty) + eol := "\n" + // Env vars to add to child process + termEnv := []string(nil) + for req := range reqs { + ok := false + switch req.Type { + case "shell", "exec": + ok = true + var payload struct { + Command string + } + ssh.Unmarshal(req.Payload, &payload) + execargs, err := shlex.Split(payload.Command) + if err != nil { + fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err) + return + } + if len(execargs) == 0 { + execargs = []string{"/bin/bash", "-login"} + } + go func() { + cmd := exec.CommandContext(ctx, "docker", "exec", "-i", "--detach-keys="+detachKeys) + cmd.Stdin = ch + cmd.Stdout = ch + cmd.Stderr = ch.Stderr() + if tty0 != nil { + cmd.Args = append(cmd.Args, "-t") + cmd.Stdin = tty0 + cmd.Stdout = tty0 + cmd.Stderr = tty0 + var wg sync.WaitGroup + defer wg.Wait() + wg.Add(2) + go func() { io.Copy(ch, pty0); wg.Done() }() + go func() { io.Copy(pty0, ch); wg.Done() }() + // Send our own debug messages to tty as well. + logw = tty0 + } + cmd.Args = append(cmd.Args, runner.ContainerID) + cmd.Args = append(cmd.Args, execargs...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setctty: tty0 != nil, + Setsid: true, + } + cmd.Env = append(os.Environ(), termEnv...) + err := cmd.Run() + errClose := ch.CloseWrite() + var resp struct { + Status uint32 + } + if err, ok := err.(*exec.ExitError); ok { + if status, ok := err.Sys().(syscall.WaitStatus); ok { + resp.Status = uint32(status.ExitStatus()) + } + } + if resp.Status == 0 && (err != nil || errClose != nil) { + resp.Status = 1 + } + ch.SendRequest("exit-status", false, ssh.Marshal(&resp)) + ch.Close() + }() + case "pty-req": + eol = "\r\n" + p, t, err := pty.Open() + if err != nil { + fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err) + break + } + pty0, tty0 = p, t + ok = true + var payload struct { + Term string + Cols uint32 + Rows uint32 + X uint32 + Y uint32 + } + ssh.Unmarshal(req.Payload, &payload) + termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"} + err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)}) + if err != nil { + fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err) + } + case "window-change": + var payload struct { + Cols uint32 + Rows uint32 + X uint32 + Y uint32 + } + ssh.Unmarshal(req.Payload, &payload) + err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)}) + if err != nil { + fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err) + break + } + ok = true + case "env": + // TODO: implement "env" + // requests by setting env + // vars in the docker-exec + // command (not docker-exec's + // own environment, which + // would be a gaping security + // hole). + default: + // fmt.Fprintf(logw, "declining %q req"+eol, req.Type) + } + if req.WantReply { + req.Reply(ok, nil) + } + } + }() + } +} diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index 341938354c..b252e0dce1 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -33,6 +33,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/keepclient" "git.arvados.org/arvados.git/sdk/go/manifest" + "golang.org/x/crypto/ssh" "golang.org/x/net/context" dockertypes "github.com/docker/docker/api/types" @@ -178,6 +179,10 @@ type ContainerRunner struct { arvMountLog *ThrottledLogger containerWatchdogInterval time.Duration + + gatewayAddress string + gatewaySSHConfig *ssh.ServerConfig + gatewayAuthSecret string } // setupSignals sets up signal handling to gracefully terminate the underlying @@ -1469,7 +1474,7 @@ func (runner *ContainerRunner) UpdateContainerRunning() error { return ErrCancelled } return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, - arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil) + arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gatewayAddress}}, nil) } // ContainerToken returns the api_token the container (and any @@ -1868,6 +1873,14 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s return 1 } + cr.gatewayAuthSecret = os.Getenv("GatewayAuthSecret") + os.Unsetenv("GatewayAuthSecret") + err = cr.startGatewayServer() + if err != nil { + log.Printf("error starting gateway server: %s", err) + return 1 + } + parentTemp, tmperr := cr.MkTempDir("", "crunch-run."+containerID+".") if tmperr != nil { log.Printf("%s: %v", containerID, tmperr) diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index a25ed60150..e092e7adab 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -5,8 +5,10 @@ package worker import ( + "crypto/hmac" "crypto/md5" "crypto/rand" + "crypto/sha256" "errors" "fmt" "io" @@ -116,6 +118,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe timeoutTERM: duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM), timeoutSignal: duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal), timeoutStaleRunLock: duration(cluster.Containers.CloudVMs.TimeoutStaleRunLock, defaultTimeoutStaleRunLock), + systemRootToken: cluster.SystemRootToken, installPublicKey: installPublicKey, tagKeyPrefix: cluster.Containers.CloudVMs.TagKeyPrefix, stop: make(chan bool), @@ -154,6 +157,7 @@ type Pool struct { timeoutTERM time.Duration timeoutSignal time.Duration timeoutStaleRunLock time.Duration + systemRootToken string installPublicKey ssh.PublicKey tagKeyPrefix string @@ -990,6 +994,12 @@ func (wp *Pool) waitUntilLoaded() { } } +func (wp *Pool) gatewayAuthSecret(uuid string) string { + h := hmac.New(sha256.New, []byte(wp.systemRootToken)) + fmt.Fprint(h, "%s", uuid) + return fmt.Sprintf("%x", h.Sum(nil)) +} + // Return a random string of n hexadecimal digits (n*4 random bits). n // must be even. func randomHex(n int) string { diff --git a/lib/dispatchcloud/worker/runner.go b/lib/dispatchcloud/worker/runner.go index 4752121342..0fd99aeeef 100644 --- a/lib/dispatchcloud/worker/runner.go +++ b/lib/dispatchcloud/worker/runner.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/json" "fmt" + "net" "syscall" "time" @@ -48,6 +49,8 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner { "ARVADOS_API_HOST": wkr.wp.arvClient.APIHost, "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken, "InstanceType": instJSON.String(), + "GatewayAddress": net.JoinHostPort(wkr.instance.Address(), "0"), + "GatewayAuthSecret": wkr.wp.gatewayAuthSecret(uuid), } if wkr.wp.arvClient.Insecure { env["ARVADOS_API_HOST_INSECURE"] = "1" diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go index 5a2cfb8800..fa44718048 100644 --- a/sdk/go/arvados/api.go +++ b/sdk/go/arvados/api.go @@ -5,8 +5,10 @@ package arvados import ( + "bufio" "context" "encoding/json" + "net" ) type APIEndpoint struct { @@ -41,6 +43,7 @@ var ( EndpointContainerDelete = APIEndpoint{"DELETE", "arvados/v1/containers/{uuid}", ""} EndpointContainerLock = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/lock", ""} EndpointContainerUnlock = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/unlock", ""} + EndpointContainerSSH = APIEndpoint{"GET", "arvados/v1/connect/{uuid}/ssh", ""} // move to /containers after #17014 fixes routing EndpointUserActivate = APIEndpoint{"POST", "arvados/v1/users/{uuid}/activate", ""} EndpointUserCreate = APIEndpoint{"POST", "arvados/v1/users", "user"} EndpointUserCurrent = APIEndpoint{"GET", "arvados/v1/users/current", ""} @@ -60,6 +63,16 @@ var ( EndpointAPIClientAuthorizationCurrent = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/current", ""} ) +type ContainerSSHOptions struct { + UUID string `json:"uuid"` + DetachKeys string `json:"detach_keys"` +} + +type ContainerSSHConnection struct { + Conn net.Conn `json:"-"` + Bufrw *bufio.ReadWriter `json:"-"` +} + type GetOptions struct { UUID string `json:"uuid,omitempty"` Select []string `json:"select"` @@ -175,6 +188,7 @@ type API interface { ContainerDelete(ctx context.Context, options DeleteOptions) (Container, error) ContainerLock(ctx context.Context, options GetOptions) (Container, error) ContainerUnlock(ctx context.Context, options GetOptions) (Container, error) + ContainerSSH(ctx context.Context, options ContainerSSHOptions) (ContainerSSHConnection, error) SpecimenCreate(ctx context.Context, options CreateOptions) (Specimen, error) SpecimenUpdate(ctx context.Context, options UpdateOptions) (Specimen, error) SpecimenGet(ctx context.Context, options GetOptions) (Specimen, error) diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go index 265944e81d..4d1511b412 100644 --- a/sdk/go/arvados/container.go +++ b/sdk/go/arvados/container.go @@ -30,6 +30,7 @@ type Container struct { RuntimeStatus map[string]interface{} `json:"runtime_status"` StartedAt *time.Time `json:"started_at"` // nil if not yet started FinishedAt *time.Time `json:"finished_at"` // nil if not yet finished + GatewayAddress string `json:"gateway_address"` } // ContainerRequest is an arvados#container_request resource. diff --git a/sdk/go/arvados/container_gateway.go b/sdk/go/arvados/container_gateway.go new file mode 100644 index 0000000000..07f8c0793c --- /dev/null +++ b/sdk/go/arvados/container_gateway.go @@ -0,0 +1,53 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package arvados + +import ( + "context" + "io" + "net/http" + + "git.arvados.org/arvados.git/sdk/go/ctxlog" +) + +func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http.Request) { + hj, ok := w.(http.Hijacker) + if !ok { + http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError) + return + } + w.Header().Set("Connection", "upgrade") + w.Header().Set("Upgrade", "ssh") + w.WriteHeader(http.StatusSwitchingProtocols) + conn, bufrw, err := hj.Hijack() + if err != nil { + ctxlog.FromContext(req.Context()).WithError(err).Error("error hijacking ResponseWriter") + return + } + defer conn.Close() + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer cancel() + _, err := io.CopyN(conn, sshconn.Bufrw, int64(sshconn.Bufrw.Reader.Buffered())) + if err == nil { + _, err = io.Copy(conn, sshconn.Conn) + } + if err != nil { + ctxlog.FromContext(req.Context()).WithError(err).Error("error copying downstream") + } + }() + go func() { + defer cancel() + _, err := io.CopyN(sshconn.Conn, bufrw, int64(bufrw.Reader.Buffered())) + if err == nil { + _, err = io.Copy(sshconn.Conn, conn) + } + if err != nil { + ctxlog.FromContext(req.Context()).WithError(err).Error("error copying upstream") + } + }() + <-ctx.Done() +} diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb index 5833c2251f..a4da593ecd 100644 --- a/services/api/app/models/container.rb +++ b/services/api/app/models/container.rb @@ -76,6 +76,7 @@ class Container < ArvadosModel t.add :runtime_user_uuid t.add :runtime_auth_scopes t.add :lock_count + t.add :gateway_address end # Supported states for a container @@ -478,7 +479,7 @@ class Container < ArvadosModel when Running permitted.push :priority, *progress_attrs if self.state_changed? - permitted.push :started_at + permitted.push :started_at, :gateway_address end when Complete diff --git a/services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb b/services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb new file mode 100644 index 0000000000..8683b51901 --- /dev/null +++ b/services/api/db/migrate/20210108033940_add_gateway_address_to_containers.rb @@ -0,0 +1,9 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class AddGatewayAddressToContainers < ActiveRecord::Migration[5.2] + def change + add_column :containers, :gateway_address, :string + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 12a28c6c72..249ec67ac3 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -521,7 +521,8 @@ CREATE TABLE public.containers ( runtime_user_uuid text, runtime_auth_scopes jsonb, runtime_token text, - lock_count integer DEFAULT 0 NOT NULL + lock_count integer DEFAULT 0 NOT NULL, + gateway_address character varying ); @@ -3187,6 +3188,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20200914203202'), ('20201103170213'), ('20201105190435'), -('20201202174753'); +('20201202174753'), +('20210108033940');