17170: Use TLS for controller->crunch-run traffic.
[arvados.git] / lib / crunchrun / container_gateway.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "crypto/hmac"
9         "crypto/rand"
10         "crypto/rsa"
11         "crypto/sha256"
12         "crypto/tls"
13         "fmt"
14         "io"
15         "net"
16         "net/http"
17         "os"
18         "os/exec"
19         "sync"
20         "syscall"
21
22         "git.arvados.org/arvados.git/lib/selfsigned"
23         "git.arvados.org/arvados.git/sdk/go/httpserver"
24         "github.com/creack/pty"
25         "github.com/google/shlex"
26         "golang.org/x/crypto/ssh"
27 )
28
29 type Gateway struct {
30         DockerContainerID *string
31         ContainerUUID     string
32         Address           string // listen host:port; if port=0, Start() will change it to the selected port
33         AuthSecret        string
34         Log               interface {
35                 Printf(fmt string, args ...interface{})
36         }
37
38         sshConfig   ssh.ServerConfig
39         requestAuth string
40         respondAuth string
41 }
42
43 // startGatewayServer starts an http server that allows authenticated
44 // clients to open an interactive "docker exec" session and (in
45 // future) connect to tcp ports inside the docker container.
46 func (gw *Gateway) Start() error {
47         gw.sshConfig = ssh.ServerConfig{
48                 NoClientAuth: true,
49                 PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
50                         if c.User() == "_" {
51                                 return nil, nil
52                         } else {
53                                 return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
54                         }
55                 },
56                 PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
57                         if c.User() == "_" {
58                                 return &ssh.Permissions{
59                                         Extensions: map[string]string{
60                                                 "pubkey-fp": ssh.FingerprintSHA256(pubKey),
61                                         },
62                                 }, nil
63                         } else {
64                                 return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
65                         }
66                 },
67         }
68         pvt, err := rsa.GenerateKey(rand.Reader, 2048)
69         if err != nil {
70                 return err
71         }
72         err = pvt.Validate()
73         if err != nil {
74                 return err
75         }
76         signer, err := ssh.NewSignerFromKey(pvt)
77         if err != nil {
78                 return err
79         }
80         gw.sshConfig.AddHostKey(signer)
81
82         // Address (typically provided by arvados-dispatch-cloud) is
83         // HOST:PORT where HOST is our IP address or hostname as seen
84         // from arvados-controller, and PORT is either the desired
85         // port where we should run our gateway server, or "0" if we
86         // should choose an available port.
87         host, port, err := net.SplitHostPort(gw.Address)
88         if err != nil {
89                 return err
90         }
91         cert, err := selfsigned.CertGenerator{}.Generate()
92         if err != nil {
93                 return err
94         }
95         h := hmac.New(sha256.New, []byte(gw.AuthSecret))
96         h.Write(cert.Certificate[0])
97         gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
98         h.Reset()
99         h.Write([]byte(gw.requestAuth))
100         gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
101
102         srv := &httpserver.Server{
103                 Server: http.Server{
104                         Handler: http.HandlerFunc(gw.handleSSH),
105                         TLSConfig: &tls.Config{
106                                 Certificates: []tls.Certificate{cert},
107                         },
108                 },
109                 Addr: ":" + port,
110         }
111         err = srv.Start()
112         if err != nil {
113                 return err
114         }
115         // Get the port number we are listening on (the port might be
116         // "0" or a port name, in which case this will be different).
117         _, port, err = net.SplitHostPort(srv.Addr)
118         if err != nil {
119                 return err
120         }
121         // When changing state to Running, we will set
122         // gateway_address to "HOST:PORT" where HOST is our
123         // external hostname/IP as provided by arvados-dispatch-cloud,
124         // and PORT is the port number we ended up listening on.
125         gw.Address = net.JoinHostPort(host, port)
126         return nil
127 }
128
129 // handleSSH connects to an SSH server that runs commands as root in
130 // the container. The tunnel itself can only be created by an
131 // authenticated caller, so the SSH server itself is wide open (any
132 // password or key will be accepted).
133 //
134 // Requests must have path "/ssh" and the following headers:
135 //
136 // Connection: upgrade
137 // Upgrade: ssh
138 // X-Arvados-Target-Uuid: uuid of container
139 // X-Arvados-Authorization: must match
140 // hmac(AuthSecret,certfingerprint) (this prevents other containers
141 // and shell nodes from connecting directly)
142 //
143 // Optional header:
144 //
145 // X-Arvados-Detach-Keys: argument to "docker attach --detach-keys",
146 // e.g., "ctrl-p,ctrl-q"
147 func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
148         // In future we'll handle browser traffic too, but for now the
149         // only traffic we expect is an SSH tunnel from
150         // (*lib/controller/localdb.Conn)ContainerSSH()
151         if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" {
152                 http.Error(w, "path not found", http.StatusNotFound)
153                 return
154         }
155         if want := req.Header.Get("X-Arvados-Target-Uuid"); want != gw.ContainerUUID {
156                 http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
157                 return
158         }
159         if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
160                 http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
161                 return
162         }
163         detachKeys := req.Header.Get("X-Arvados-Detach-Keys")
164         username := req.Header.Get("X-Arvados-Login-Username")
165         if username == "" {
166                 username = "root"
167         }
168         hj, ok := w.(http.Hijacker)
169         if !ok {
170                 http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
171                 return
172         }
173         netconn, _, err := hj.Hijack()
174         if !ok {
175                 http.Error(w, err.Error(), http.StatusInternalServerError)
176                 return
177         }
178         defer netconn.Close()
179         w.Header().Set("Connection", "upgrade")
180         w.Header().Set("Upgrade", "ssh")
181         w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
182         netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
183         w.Header().Write(netconn)
184         netconn.Write([]byte("\r\n"))
185
186         ctx := req.Context()
187
188         conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
189         if err != nil {
190                 gw.Log.Printf("ssh.NewServerConn: %s", err)
191                 return
192         }
193         defer conn.Close()
194         go ssh.DiscardRequests(reqs)
195         for newch := range newchans {
196                 if newch.ChannelType() != "session" {
197                         newch.Reject(ssh.UnknownChannelType, "unknown channel type")
198                         continue
199                 }
200                 ch, reqs, err := newch.Accept()
201                 if err != nil {
202                         gw.Log.Printf("accept channel: %s", err)
203                         return
204                 }
205                 var pty0, tty0 *os.File
206                 go func() {
207                         defer pty0.Close()
208                         defer tty0.Close()
209                         // Where to send errors/messages for the
210                         // client to see
211                         logw := io.Writer(ch.Stderr())
212                         // How to end lines when sending
213                         // errors/messages to the client (changes to
214                         // \r\n when using a pty)
215                         eol := "\n"
216                         // Env vars to add to child process
217                         termEnv := []string(nil)
218                         for req := range reqs {
219                                 ok := false
220                                 switch req.Type {
221                                 case "shell", "exec":
222                                         ok = true
223                                         var payload struct {
224                                                 Command string
225                                         }
226                                         ssh.Unmarshal(req.Payload, &payload)
227                                         execargs, err := shlex.Split(payload.Command)
228                                         if err != nil {
229                                                 fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
230                                                 return
231                                         }
232                                         if len(execargs) == 0 {
233                                                 execargs = []string{"/bin/bash", "-login"}
234                                         }
235                                         go func() {
236                                                 cmd := exec.CommandContext(ctx, "docker", "exec", "-i", "--detach-keys="+detachKeys, "--user="+username)
237                                                 cmd.Stdin = ch
238                                                 cmd.Stdout = ch
239                                                 cmd.Stderr = ch.Stderr()
240                                                 if tty0 != nil {
241                                                         cmd.Args = append(cmd.Args, "-t")
242                                                         cmd.Stdin = tty0
243                                                         cmd.Stdout = tty0
244                                                         cmd.Stderr = tty0
245                                                         var wg sync.WaitGroup
246                                                         defer wg.Wait()
247                                                         wg.Add(2)
248                                                         go func() { io.Copy(ch, pty0); wg.Done() }()
249                                                         go func() { io.Copy(pty0, ch); wg.Done() }()
250                                                         // Send our own debug messages to tty as well.
251                                                         logw = tty0
252                                                 }
253                                                 cmd.Args = append(cmd.Args, *gw.DockerContainerID)
254                                                 cmd.Args = append(cmd.Args, execargs...)
255                                                 cmd.SysProcAttr = &syscall.SysProcAttr{
256                                                         Setctty: tty0 != nil,
257                                                         Setsid:  true,
258                                                 }
259                                                 cmd.Env = append(os.Environ(), termEnv...)
260                                                 err := cmd.Run()
261                                                 errClose := ch.CloseWrite()
262                                                 var resp struct {
263                                                         Status uint32
264                                                 }
265                                                 if err, ok := err.(*exec.ExitError); ok {
266                                                         if status, ok := err.Sys().(syscall.WaitStatus); ok {
267                                                                 resp.Status = uint32(status.ExitStatus())
268                                                         }
269                                                 }
270                                                 if resp.Status == 0 && (err != nil || errClose != nil) {
271                                                         resp.Status = 1
272                                                 }
273                                                 ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
274                                                 ch.Close()
275                                         }()
276                                 case "pty-req":
277                                         eol = "\r\n"
278                                         p, t, err := pty.Open()
279                                         if err != nil {
280                                                 fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
281                                                 break
282                                         }
283                                         pty0, tty0 = p, t
284                                         ok = true
285                                         var payload struct {
286                                                 Term string
287                                                 Cols uint32
288                                                 Rows uint32
289                                                 X    uint32
290                                                 Y    uint32
291                                         }
292                                         ssh.Unmarshal(req.Payload, &payload)
293                                         termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
294                                         err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
295                                         if err != nil {
296                                                 fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
297                                         }
298                                 case "window-change":
299                                         var payload struct {
300                                                 Cols uint32
301                                                 Rows uint32
302                                                 X    uint32
303                                                 Y    uint32
304                                         }
305                                         ssh.Unmarshal(req.Payload, &payload)
306                                         err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
307                                         if err != nil {
308                                                 fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
309                                                 break
310                                         }
311                                         ok = true
312                                 case "env":
313                                         // TODO: implement "env"
314                                         // requests by setting env
315                                         // vars in the docker-exec
316                                         // command (not docker-exec's
317                                         // own environment, which
318                                         // would be a gaping security
319                                         // hole).
320                                 default:
321                                         // fmt.Fprintf(logw, "declining %q req"+eol, req.Type)
322                                 }
323                                 if req.WantReply {
324                                         req.Reply(ok, nil)
325                                 }
326                         }
327                 }()
328         }
329 }