17170: Use TLS for controller->crunch-run traffic.
authorTom Clegg <tom@curii.com>
Wed, 13 Jan 2021 02:03:40 +0000 (21:03 -0500)
committerTom Clegg <tom@curii.com>
Wed, 13 Jan 2021 02:03:40 +0000 (21:03 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/controller/localdb/container_gateway.go
lib/crunchrun/container_gateway.go
lib/crunchrun/crunchrun.go
lib/selfsigned/cert.go [new file with mode: 0644]
lib/selfsigned/cert_test.go [new file with mode: 0644]

index 31d44e5e0d88e4f1cab146f28df5384f5c0ff15c..dd296c569cb3db9e1edd3ce8039d5c5170593c8b 100644 (file)
@@ -9,9 +9,10 @@ import (
        "context"
        "crypto/hmac"
        "crypto/sha256"
+       "crypto/tls"
+       "crypto/x509"
        "errors"
        "fmt"
-       "net"
        "net/http"
        "net/url"
        "strings"
@@ -36,20 +37,53 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
                err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusBadGateway)
                return
        }
-       netconn, err := net.Dial("tcp", ctr.GatewayAddress)
+       // crunch-run uses a self-signed / unverifiable TLS
+       // certificate, so we use the following scheme to ensure we're
+       // not talking to a MITM.
+       //
+       // 1. Compute ctrKey = HMAC-SHA256(sysRootToken,ctrUUID) --
+       // this will be the same ctrKey that a-d-c supplied to
+       // crunch-run in the GatewayAuthSecret env var.
+       //
+       // 2. Compute requestAuth = HMAC-SHA256(ctrKey,serverCert) and
+       // send it to crunch-run as the X-Arvados-Authorization
+       // header, proving that we know ctrKey. (Note a MITM cannot
+       // replay the proof to a real crunch-run server, because the
+       // real crunch-run server would have a different cert.)
+       //
+       // 3. Compute respondAuth = HMAC-SHA256(ctrKey,requestAuth)
+       // and ensure the server returns it in the
+       // X-Arvados-Authorization-Response header, proving that the
+       // server knows ctrKey.
+       var requestAuth, respondAuth string
+       netconn, err := tls.Dial("tcp", ctr.GatewayAddress, &tls.Config{
+               InsecureSkipVerify: true,
+               VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
+                       if len(rawCerts) == 0 {
+                               return errors.New("no certificate received, cannot compute authorization header")
+                       }
+                       h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
+                       fmt.Fprint(h, "%s", opts.UUID)
+                       authKey := fmt.Sprintf("%x", h.Sum(nil))
+                       h = hmac.New(sha256.New, []byte(authKey))
+                       h.Write(rawCerts[0])
+                       requestAuth = fmt.Sprintf("%x", h.Sum(nil))
+                       h.Reset()
+                       h.Write([]byte(requestAuth))
+                       respondAuth = fmt.Sprintf("%x", h.Sum(nil))
+                       return nil
+               },
+       })
        if err != nil {
                return
        }
+       if respondAuth == "" {
+               err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
+               return
+       }
        bufr := bufio.NewReader(netconn)
        bufw := bufio.NewWriter(netconn)
 
-       // Note this auth header does not protect from replay/mitm
-       // attacks (TODO: use TLS for that). It only authenticates us
-       // to crunch-run.
-       h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
-       fmt.Fprint(h, "%s", opts.UUID)
-       auth := fmt.Sprintf("%x", h.Sum(nil))
-
        u := url.URL{
                Scheme: "http",
                Host:   ctr.GatewayAddress,
@@ -59,14 +93,19 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
        bufw.WriteString("Host: " + u.Host + "\r\n")
        bufw.WriteString("Upgrade: ssh\r\n")
        bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
-       bufw.WriteString("X-Arvados-Authorization: " + auth + "\r\n")
+       bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
        bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
        bufw.WriteString("X-Arvados-Login-Username: " + opts.LoginUsername + "\r\n")
        bufw.WriteString("\r\n")
        bufw.Flush()
        resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
        if err != nil {
-               err = fmt.Errorf("error reading http response from gateway: %w", err)
+               err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
+               netconn.Close()
+               return
+       }
+       if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
+               err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
                netconn.Close()
                return
        }
index 92c11383f42dbb732dacc740a68a55355556f799..8b2fe4144059d03293129631f02a3738d0eb83fc 100644 (file)
@@ -5,8 +5,11 @@
 package crunchrun
 
 import (
+       "crypto/hmac"
        "crypto/rand"
        "crypto/rsa"
+       "crypto/sha256"
+       "crypto/tls"
        "fmt"
        "io"
        "net"
@@ -16,17 +19,32 @@ import (
        "sync"
        "syscall"
 
+       "git.arvados.org/arvados.git/lib/selfsigned"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
        "github.com/creack/pty"
        "github.com/google/shlex"
        "golang.org/x/crypto/ssh"
 )
 
+type Gateway struct {
+       DockerContainerID *string
+       ContainerUUID     string
+       Address           string // listen host:port; if port=0, Start() will change it to the selected port
+       AuthSecret        string
+       Log               interface {
+               Printf(fmt string, args ...interface{})
+       }
+
+       sshConfig   ssh.ServerConfig
+       requestAuth string
+       respondAuth string
+}
+
 // startGatewayServer starts an http server that allows authenticated
 // clients to open an interactive "docker exec" session and (in
 // future) connect to tcp ports inside the docker container.
-func (runner *ContainerRunner) startGatewayServer() error {
-       runner.gatewaySSHConfig = &ssh.ServerConfig{
+func (gw *Gateway) Start() error {
+       gw.sshConfig = ssh.ServerConfig{
                NoClientAuth: true,
                PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
                        if c.User() == "_" {
@@ -47,7 +65,7 @@ func (runner *ContainerRunner) startGatewayServer() error {
                        }
                },
        }
-       pvt, err := rsa.GenerateKey(rand.Reader, 4096)
+       pvt, err := rsa.GenerateKey(rand.Reader, 2048)
        if err != nil {
                return err
        }
@@ -59,20 +77,34 @@ func (runner *ContainerRunner) startGatewayServer() error {
        if err != nil {
                return err
        }
-       runner.gatewaySSHConfig.AddHostKey(signer)
+       gw.sshConfig.AddHostKey(signer)
 
-       // GatewayAddress (provided by arvados-dispatch-cloud) is
+       // Address (typically provided by arvados-dispatch-cloud) is
        // HOST:PORT where HOST is our IP address or hostname as seen
        // from arvados-controller, and PORT is either the desired
-       // port where we should run our gateway server, or "0" if
-       // we should choose an available port.
-       host, port, err := net.SplitHostPort(os.Getenv("GatewayAddress"))
+       // port where we should run our gateway server, or "0" if we
+       // should choose an available port.
+       host, port, err := net.SplitHostPort(gw.Address)
        if err != nil {
                return err
        }
+       cert, err := selfsigned.CertGenerator{}.Generate()
+       if err != nil {
+               return err
+       }
+       h := hmac.New(sha256.New, []byte(gw.AuthSecret))
+       h.Write(cert.Certificate[0])
+       gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
+       h.Reset()
+       h.Write([]byte(gw.requestAuth))
+       gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
+
        srv := &httpserver.Server{
                Server: http.Server{
-                       Handler: http.HandlerFunc(runner.handleSSH),
+                       Handler: http.HandlerFunc(gw.handleSSH),
+                       TLSConfig: &tls.Config{
+                               Certificates: []tls.Certificate{cert},
+                       },
                },
                Addr: ":" + port,
        }
@@ -90,7 +122,7 @@ func (runner *ContainerRunner) startGatewayServer() error {
        // gateway_address to "HOST:PORT" where HOST is our
        // external hostname/IP as provided by arvados-dispatch-cloud,
        // and PORT is the port number we ended up listening on.
-       runner.gatewayAddress = net.JoinHostPort(host, port)
+       gw.Address = net.JoinHostPort(host, port)
        return nil
 }
 
@@ -104,15 +136,15 @@ func (runner *ContainerRunner) startGatewayServer() error {
 // Connection: upgrade
 // Upgrade: ssh
 // X-Arvados-Target-Uuid: uuid of container
-// X-Arvados-Authorization: must match GatewayAuthSecret provided by
-// a-d-c (this prevents other containers and shell nodes from
-// connecting directly)
+// X-Arvados-Authorization: must match
+// hmac(AuthSecret,certfingerprint) (this prevents other containers
+// and shell nodes from connecting directly)
 //
 // Optional header:
 //
 // X-Arvados-Detach-Keys: argument to "docker attach --detach-keys",
 // e.g., "ctrl-p,ctrl-q"
-func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Request) {
+func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
        // In future we'll handle browser traffic too, but for now the
        // only traffic we expect is an SSH tunnel from
        // (*lib/controller/localdb.Conn)ContainerSSH()
@@ -120,11 +152,11 @@ func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Reques
                http.Error(w, "path not found", http.StatusNotFound)
                return
        }
-       if want := req.Header.Get("X-Arvados-Target-Uuid"); want != runner.Container.UUID {
-               http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, runner.Container.UUID), http.StatusBadGateway)
+       if want := req.Header.Get("X-Arvados-Target-Uuid"); want != gw.ContainerUUID {
+               http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
                return
        }
-       if req.Header.Get("X-Arvados-Authorization") != runner.gatewayAuthSecret {
+       if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
                http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
                return
        }
@@ -146,15 +178,16 @@ func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Reques
        defer netconn.Close()
        w.Header().Set("Connection", "upgrade")
        w.Header().Set("Upgrade", "ssh")
+       w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
        netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
        w.Header().Write(netconn)
        netconn.Write([]byte("\r\n"))
 
        ctx := req.Context()
 
-       conn, newchans, reqs, err := ssh.NewServerConn(netconn, runner.gatewaySSHConfig)
+       conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
        if err != nil {
-               runner.CrunchLog.Printf("ssh.NewServerConn: %s", err)
+               gw.Log.Printf("ssh.NewServerConn: %s", err)
                return
        }
        defer conn.Close()
@@ -166,7 +199,7 @@ func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Reques
                }
                ch, reqs, err := newch.Accept()
                if err != nil {
-                       runner.CrunchLog.Printf("accept channel: %s", err)
+                       gw.Log.Printf("accept channel: %s", err)
                        return
                }
                var pty0, tty0 *os.File
@@ -217,7 +250,7 @@ func (runner *ContainerRunner) handleSSH(w http.ResponseWriter, req *http.Reques
                                                        // Send our own debug messages to tty as well.
                                                        logw = tty0
                                                }
-                                               cmd.Args = append(cmd.Args, runner.ContainerID)
+                                               cmd.Args = append(cmd.Args, *gw.DockerContainerID)
                                                cmd.Args = append(cmd.Args, execargs...)
                                                cmd.SysProcAttr = &syscall.SysProcAttr{
                                                        Setctty: tty0 != nil,
index b252e0dce162fca3473dc0a0c64a9f846295e4b9..f28593fe0cf3232c0da5ad4b45f6cfb682934e72 100644 (file)
@@ -33,7 +33,6 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        "git.arvados.org/arvados.git/sdk/go/manifest"
-       "golang.org/x/crypto/ssh"
        "golang.org/x/net/context"
 
        dockertypes "github.com/docker/docker/api/types"
@@ -180,9 +179,7 @@ type ContainerRunner struct {
 
        containerWatchdogInterval time.Duration
 
-       gatewayAddress    string
-       gatewaySSHConfig  *ssh.ServerConfig
-       gatewayAuthSecret string
+       gateway Gateway
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -1474,7 +1471,7 @@ func (runner *ContainerRunner) UpdateContainerRunning() error {
                return ErrCancelled
        }
        return runner.DispatcherArvClient.Update("containers", runner.Container.UUID,
-               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gatewayAddress}}, nil)
+               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gateway.Address}}, nil)
 }
 
 // ContainerToken returns the api_token the container (and any
@@ -1873,9 +1870,15 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
                return 1
        }
 
-       cr.gatewayAuthSecret = os.Getenv("GatewayAuthSecret")
+       cr.gateway = Gateway{
+               Address:           os.Getenv("GatewayAddress"),
+               AuthSecret:        os.Getenv("GatewayAuthSecret"),
+               ContainerUUID:     containerID,
+               DockerContainerID: &cr.ContainerID,
+               Log:               cr.CrunchLog,
+       }
        os.Unsetenv("GatewayAuthSecret")
-       err = cr.startGatewayServer()
+       err = cr.gateway.Start()
        if err != nil {
                log.Printf("error starting gateway server: %s", err)
                return 1
diff --git a/lib/selfsigned/cert.go b/lib/selfsigned/cert.go
new file mode 100644 (file)
index 0000000..ae521dd
--- /dev/null
@@ -0,0 +1,76 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package selfsigned
+
+import (
+       "crypto/rand"
+       "crypto/rsa"
+       "crypto/tls"
+       "crypto/x509"
+       "crypto/x509/pkix"
+       "fmt"
+       "math/big"
+       "net"
+       "time"
+)
+
+type CertGenerator struct {
+       Bits  int
+       Hosts []string
+       IsCA  bool
+}
+
+func (gen CertGenerator) Generate() (cert tls.Certificate, err error) {
+       keyUsage := x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment
+       if gen.IsCA {
+               keyUsage |= x509.KeyUsageCertSign
+       }
+       notBefore := time.Now()
+       notAfter := time.Now().Add(time.Hour * 24 * 365)
+       snMax := new(big.Int).Lsh(big.NewInt(1), 128)
+       sn, err := rand.Int(rand.Reader, snMax)
+       if err != nil {
+               err = fmt.Errorf("Failed to generate serial number: %w", err)
+               return
+       }
+       template := x509.Certificate{
+               SerialNumber: sn,
+               Subject: pkix.Name{
+                       Organization: []string{"N/A"},
+               },
+               NotBefore:             notBefore,
+               NotAfter:              notAfter,
+               KeyUsage:              keyUsage,
+               ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
+               BasicConstraintsValid: true,
+               IsCA:                  gen.IsCA,
+       }
+       for _, h := range gen.Hosts {
+               if ip := net.ParseIP(h); ip != nil {
+                       template.IPAddresses = append(template.IPAddresses, ip)
+               } else {
+                       template.DNSNames = append(template.DNSNames, h)
+               }
+       }
+       bits := gen.Bits
+       if bits == 0 {
+               bits = 4096
+       }
+       priv, err := rsa.GenerateKey(rand.Reader, bits)
+       if err != nil {
+               err = fmt.Errorf("error generating key: %w", err)
+               return
+       }
+       certder, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
+       if err != nil {
+               err = fmt.Errorf("error creating certificate: %w", err)
+               return
+       }
+       cert = tls.Certificate{
+               Certificate: [][]byte{certder},
+               PrivateKey:  priv,
+       }
+       return
+}
diff --git a/lib/selfsigned/cert_test.go b/lib/selfsigned/cert_test.go
new file mode 100644 (file)
index 0000000..16ed8bd
--- /dev/null
@@ -0,0 +1,26 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package selfsigned
+
+import (
+       "testing"
+)
+
+func TestCert(t *testing.T) {
+       cert, err := CertGenerator{Bits: 1024, Hosts: []string{"localhost"}, IsCA: false}.Generate()
+       if err != nil {
+               t.Error(err)
+       }
+       if len(cert.Certificate) < 1 {
+               t.Error("no certificate!")
+       }
+       cert, err = CertGenerator{Bits: 2048, Hosts: []string{"localhost"}, IsCA: true}.Generate()
+       if err != nil {
+               t.Error(err)
+       }
+       if len(cert.Certificate) < 1 {
+               t.Error("no certificate!")
+       }
+}