ba52f8ab43cd6f49a107777b0d28f9c14fec92ab
[arvados.git] / lib / crunchrun / container_gateway.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "crypto/hmac"
9         "crypto/rand"
10         "crypto/rsa"
11         "crypto/sha256"
12         "crypto/tls"
13         "fmt"
14         "io"
15         "net"
16         "net/http"
17         "net/url"
18         "os"
19         "os/exec"
20         "sync"
21         "syscall"
22         "time"
23
24         "git.arvados.org/arvados.git/lib/controller/rpc"
25         "git.arvados.org/arvados.git/lib/selfsigned"
26         "git.arvados.org/arvados.git/sdk/go/arvados"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "github.com/creack/pty"
31         "github.com/google/shlex"
32         "github.com/hashicorp/yamux"
33         "golang.org/x/crypto/ssh"
34         "golang.org/x/net/context"
35 )
36
37 type GatewayTarget interface {
38         // Command that will execute cmd inside the container
39         InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error)
40
41         // IP address inside container
42         IPAddress() (string, error)
43 }
44
45 type GatewayTargetStub struct{}
46
47 func (GatewayTargetStub) IPAddress() (string, error) { return "127.0.0.1", nil }
48 func (GatewayTargetStub) InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error) {
49         return exec.CommandContext(ctx, cmd[0], cmd[1:]...), nil
50 }
51
52 type Gateway struct {
53         ContainerUUID string
54         // Caller should set Address to "", or "host:0" or "host:port"
55         // where host is a known external IP address; port is a
56         // desired port number to listen on; and ":0" chooses an
57         // available dynamic port.
58         //
59         // If Address is "", Start() listens only on the loopback
60         // interface (and changes Address to "127.0.0.1:port").
61         // Otherwise it listens on all interfaces.
62         //
63         // If Address is "host:0", Start() updates Address to
64         // "host:port".
65         Address    string
66         AuthSecret string
67         Target     GatewayTarget
68         Log        interface {
69                 Printf(fmt string, args ...interface{})
70         }
71         // If non-nil, set up a ContainerGatewayTunnel, so that the
72         // controller can connect to us even if our external IP
73         // address is unknown or not routable from controller.
74         ArvadosClient *arvados.Client
75
76         sshConfig   ssh.ServerConfig
77         requestAuth string
78         respondAuth string
79 }
80
81 // Start starts an http server that allows authenticated clients to open an
82 // interactive "docker exec" session and (in future) connect to tcp ports
83 // inside the docker container.
84 func (gw *Gateway) Start() error {
85         gw.sshConfig = ssh.ServerConfig{
86                 NoClientAuth: true,
87                 PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
88                         if c.User() == "_" {
89                                 return nil, nil
90                         }
91                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
92                 },
93                 PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
94                         if c.User() == "_" {
95                                 return &ssh.Permissions{
96                                         Extensions: map[string]string{
97                                                 "pubkey-fp": ssh.FingerprintSHA256(pubKey),
98                                         },
99                                 }, nil
100                         }
101                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
102                 },
103         }
104         pvt, err := rsa.GenerateKey(rand.Reader, 2048)
105         if err != nil {
106                 return err
107         }
108         err = pvt.Validate()
109         if err != nil {
110                 return err
111         }
112         signer, err := ssh.NewSignerFromKey(pvt)
113         if err != nil {
114                 return err
115         }
116         gw.sshConfig.AddHostKey(signer)
117
118         // Address (typically provided by arvados-dispatch-cloud) is
119         // HOST:PORT where HOST is our IP address or hostname as seen
120         // from arvados-controller, and PORT is either the desired
121         // port where we should run our gateway server, or "0" if we
122         // should choose an available port.
123         extAddr := gw.Address
124         // Generally we can't know which local interface corresponds
125         // to an externally reachable IP address, so if we expect to
126         // be reachable by external hosts, we listen on all
127         // interfaces.
128         listenHost := ""
129         if extAddr == "" {
130                 // If the dispatcher doesn't tell us our external IP
131                 // address, controller will only be able to connect
132                 // through the tunnel (see runTunnel), so our gateway
133                 // server only needs to listen on the loopback
134                 // interface.
135                 extAddr = "127.0.0.1:0"
136                 listenHost = "127.0.0.1"
137         }
138         extHost, extPort, err := net.SplitHostPort(extAddr)
139         if err != nil {
140                 return err
141         }
142         cert, err := selfsigned.CertGenerator{}.Generate()
143         if err != nil {
144                 return err
145         }
146         h := hmac.New(sha256.New, []byte(gw.AuthSecret))
147         h.Write(cert.Certificate[0])
148         gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
149         h.Reset()
150         h.Write([]byte(gw.requestAuth))
151         gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
152
153         srv := &httpserver.Server{
154                 Server: http.Server{
155                         Handler: http.HandlerFunc(gw.handleSSH),
156                         TLSConfig: &tls.Config{
157                                 Certificates: []tls.Certificate{cert},
158                         },
159                 },
160                 Addr: net.JoinHostPort(listenHost, extPort),
161         }
162         err = srv.Start()
163         if err != nil {
164                 return err
165         }
166         // Get the port number we are listening on (extPort might be
167         // "0" or a port name, in which case this will be different).
168         _, listenPort, err := net.SplitHostPort(srv.Addr)
169         if err != nil {
170                 return err
171         }
172         // When changing state to Running, the caller will want to set
173         // gateway_address to a "HOST:PORT" that, if controller
174         // connects to it, will reach this gateway server.
175         //
176         // The most likely thing to work is: HOST is our external
177         // hostname/IP as provided by the caller
178         // (arvados-dispatch-cloud) or 127.0.0.1 to indicate
179         // non-tunnel connections aren't available; and PORT is the
180         // port number we are listening on.
181         gw.Address = net.JoinHostPort(extHost, listenPort)
182         if gw.ArvadosClient != nil {
183                 go gw.maintainTunnel(gw.Address)
184         }
185         return nil
186 }
187
188 func (gw *Gateway) maintainTunnel(addr string) {
189         for ; ; time.Sleep(5 * time.Second) {
190                 err := gw.runTunnel(addr)
191                 gw.Log.Printf("runTunnel: %s", err)
192         }
193 }
194
195 // runTunnel connects to controller and sets up a tunnel through
196 // which controller can connect to the gateway server at the given
197 // addr.
198 func (gw *Gateway) runTunnel(addr string) error {
199         ctx := auth.NewContext(context.Background(), auth.NewCredentials(gw.ArvadosClient.AuthToken))
200         arpc := rpc.NewConn("", &url.URL{Scheme: "https", Host: gw.ArvadosClient.APIHost}, gw.ArvadosClient.Insecure, rpc.PassthroughTokenProvider)
201         tun, err := arpc.ContainerGatewayTunnel(ctx, arvados.ContainerGatewayTunnelOptions{
202                 UUID:       gw.ContainerUUID,
203                 AuthSecret: gw.AuthSecret,
204         })
205         if err != nil {
206                 return fmt.Errorf("error creating gateway tunnel: %s", err)
207         }
208         mux, err := yamux.Client(tun.Conn, nil)
209         if err != nil {
210                 return fmt.Errorf("error setting up mux client end: %s", err)
211         }
212         for {
213                 muxconn, err := mux.Accept()
214                 if err != nil {
215                         return err
216                 }
217                 gw.Log.Printf("receiving connection from tunnel, remoteAddr %s", muxconn.RemoteAddr().String())
218                 go func() {
219                         defer muxconn.Close()
220                         gwconn, err := net.Dial("tcp", addr)
221                         if err != nil {
222                                 gw.Log.Printf("error connecting to %s on behalf of tunnel connection: %s", addr, err)
223                                 return
224                         }
225                         defer gwconn.Close()
226                         var wg sync.WaitGroup
227                         wg.Add(2)
228                         go func() {
229                                 defer wg.Done()
230                                 io.Copy(gwconn, muxconn)
231                         }()
232                         go func() {
233                                 defer wg.Done()
234                                 io.Copy(muxconn, gwconn)
235                         }()
236                         wg.Wait()
237                 }()
238         }
239 }
240
241 // handleSSH connects to an SSH server that allows the caller to run
242 // interactive commands as root (or any other desired user) inside the
243 // container. The tunnel itself can only be created by an
244 // authenticated caller, so the SSH server itself is wide open (any
245 // password or key will be accepted).
246 //
247 // Requests must have path "/ssh" and the following headers:
248 //
249 // Connection: upgrade
250 // Upgrade: ssh
251 // X-Arvados-Target-Uuid: uuid of container
252 // X-Arvados-Authorization: must match
253 // hmac(AuthSecret,certfingerprint) (this prevents other containers
254 // and shell nodes from connecting directly)
255 //
256 // Optional headers:
257 //
258 // X-Arvados-Detach-Keys: argument to "docker exec --detach-keys",
259 // e.g., "ctrl-p,ctrl-q"
260 // X-Arvados-Login-Username: argument to "docker exec --user": account
261 // used to run command(s) inside the container.
262 func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
263         // In future we'll handle browser traffic too, but for now the
264         // only traffic we expect is an SSH tunnel from
265         // (*lib/controller/localdb.Conn)ContainerSSH()
266         if req.Method != "POST" || req.Header.Get("Upgrade") != "ssh" {
267                 http.Error(w, "path not found", http.StatusNotFound)
268                 return
269         }
270         req.ParseForm()
271         if want := req.Form.Get("uuid"); want != gw.ContainerUUID {
272                 http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
273                 return
274         }
275         if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
276                 http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
277                 return
278         }
279         detachKeys := req.Form.Get("detach_keys")
280         username := req.Form.Get("login_username")
281         if username == "" {
282                 username = "root"
283         }
284         hj, ok := w.(http.Hijacker)
285         if !ok {
286                 http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
287                 return
288         }
289         netconn, _, err := hj.Hijack()
290         if !ok {
291                 http.Error(w, err.Error(), http.StatusInternalServerError)
292                 return
293         }
294         defer netconn.Close()
295         w.Header().Set("Connection", "upgrade")
296         w.Header().Set("Upgrade", "ssh")
297         w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
298         netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
299         w.Header().Write(netconn)
300         netconn.Write([]byte("\r\n"))
301
302         ctx := req.Context()
303
304         conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
305         if err == io.EOF {
306                 return
307         } else if err != nil {
308                 gw.Log.Printf("ssh.NewServerConn: %s", err)
309                 return
310         }
311         defer conn.Close()
312         go ssh.DiscardRequests(reqs)
313         for newch := range newchans {
314                 switch newch.ChannelType() {
315                 case "direct-tcpip":
316                         go gw.handleDirectTCPIP(ctx, newch)
317                 case "session":
318                         go gw.handleSession(ctx, newch, detachKeys, username)
319                 default:
320                         go newch.Reject(ssh.UnknownChannelType, fmt.Sprintf("unsupported channel type %q", newch.ChannelType()))
321                 }
322         }
323 }
324
325 func (gw *Gateway) handleDirectTCPIP(ctx context.Context, newch ssh.NewChannel) {
326         ch, reqs, err := newch.Accept()
327         if err != nil {
328                 gw.Log.Printf("accept direct-tcpip channel: %s", err)
329                 return
330         }
331         defer ch.Close()
332         go ssh.DiscardRequests(reqs)
333
334         // RFC 4254 7.2 (copy of channelOpenDirectMsg in
335         // golang.org/x/crypto/ssh)
336         var msg struct {
337                 Raddr string
338                 Rport uint32
339                 Laddr string
340                 Lport uint32
341         }
342         err = ssh.Unmarshal(newch.ExtraData(), &msg)
343         if err != nil {
344                 fmt.Fprintf(ch.Stderr(), "unmarshal direct-tcpip extradata: %s\n", err)
345                 return
346         }
347         switch msg.Raddr {
348         case "localhost", "0.0.0.0", "127.0.0.1", "::1", "::":
349         default:
350                 fmt.Fprintf(ch.Stderr(), "cannot forward to ports on %q, only localhost\n", msg.Raddr)
351                 return
352         }
353
354         dstaddr, err := gw.Target.IPAddress()
355         if err != nil {
356                 fmt.Fprintf(ch.Stderr(), "container has no IP address: %s\n", err)
357                 return
358         } else if dstaddr == "" {
359                 fmt.Fprintf(ch.Stderr(), "container has no IP address\n")
360                 return
361         }
362
363         dst := net.JoinHostPort(dstaddr, fmt.Sprintf("%d", msg.Rport))
364         tcpconn, err := net.Dial("tcp", dst)
365         if err != nil {
366                 fmt.Fprintf(ch.Stderr(), "%s: %s\n", dst, err)
367                 return
368         }
369         go func() {
370                 n, _ := io.Copy(ch, tcpconn)
371                 ctxlog.FromContext(ctx).Debugf("tcpip: sent %d bytes\n", n)
372                 ch.CloseWrite()
373         }()
374         n, _ := io.Copy(tcpconn, ch)
375         ctxlog.FromContext(ctx).Debugf("tcpip: received %d bytes\n", n)
376 }
377
378 func (gw *Gateway) handleSession(ctx context.Context, newch ssh.NewChannel, detachKeys, username string) {
379         ch, reqs, err := newch.Accept()
380         if err != nil {
381                 gw.Log.Printf("accept session channel: %s", err)
382                 return
383         }
384         var pty0, tty0 *os.File
385         // Where to send errors/messages for the client to see
386         logw := io.Writer(ch.Stderr())
387         // How to end lines when sending errors/messages to the client
388         // (changes to \r\n when using a pty)
389         eol := "\n"
390         // Env vars to add to child process
391         termEnv := []string(nil)
392         for req := range reqs {
393                 ok := false
394                 switch req.Type {
395                 case "shell", "exec":
396                         ok = true
397                         var payload struct {
398                                 Command string
399                         }
400                         ssh.Unmarshal(req.Payload, &payload)
401                         execargs, err := shlex.Split(payload.Command)
402                         if err != nil {
403                                 fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
404                                 return
405                         }
406                         if len(execargs) == 0 {
407                                 execargs = []string{"/bin/bash", "-login"}
408                         }
409                         go func() {
410                                 var resp struct {
411                                         Status uint32
412                                 }
413                                 defer func() {
414                                         ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
415                                         ch.Close()
416                                 }()
417
418                                 cmd, err := gw.Target.InjectCommand(ctx, detachKeys, username, tty0 != nil, execargs)
419                                 if err != nil {
420                                         fmt.Fprintln(ch.Stderr(), err)
421                                         ch.CloseWrite()
422                                         resp.Status = 1
423                                         return
424                                 }
425                                 cmd.Stdin = ch
426                                 cmd.Stdout = ch
427                                 cmd.Stderr = ch.Stderr()
428                                 if tty0 != nil {
429                                         cmd.Stdin = tty0
430                                         cmd.Stdout = tty0
431                                         cmd.Stderr = tty0
432                                         var wg sync.WaitGroup
433                                         defer wg.Wait()
434                                         wg.Add(2)
435                                         go func() { io.Copy(ch, pty0); wg.Done() }()
436                                         go func() { io.Copy(pty0, ch); wg.Done() }()
437                                         // Send our own debug messages to tty as well.
438                                         logw = tty0
439                                 }
440                                 cmd.SysProcAttr = &syscall.SysProcAttr{
441                                         Setctty: tty0 != nil,
442                                         Setsid:  true,
443                                 }
444                                 cmd.Env = append(os.Environ(), termEnv...)
445                                 err = cmd.Run()
446                                 if exiterr, ok := err.(*exec.ExitError); ok {
447                                         if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
448                                                 resp.Status = uint32(status.ExitStatus())
449                                         }
450                                 } else if err != nil {
451                                         // Propagate errors like `exec: "docker": executable file not found in $PATH`
452                                         fmt.Fprintln(ch.Stderr(), err)
453                                 }
454                                 errClose := ch.CloseWrite()
455                                 if resp.Status == 0 && (err != nil || errClose != nil) {
456                                         resp.Status = 1
457                                 }
458                         }()
459                 case "pty-req":
460                         eol = "\r\n"
461                         p, t, err := pty.Open()
462                         if err != nil {
463                                 fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
464                                 break
465                         }
466                         defer p.Close()
467                         defer t.Close()
468                         pty0, tty0 = p, t
469                         ok = true
470                         var payload struct {
471                                 Term string
472                                 Cols uint32
473                                 Rows uint32
474                                 X    uint32
475                                 Y    uint32
476                         }
477                         ssh.Unmarshal(req.Payload, &payload)
478                         termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
479                         err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
480                         if err != nil {
481                                 fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
482                         }
483                 case "window-change":
484                         var payload struct {
485                                 Cols uint32
486                                 Rows uint32
487                                 X    uint32
488                                 Y    uint32
489                         }
490                         ssh.Unmarshal(req.Payload, &payload)
491                         err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
492                         if err != nil {
493                                 fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
494                                 break
495                         }
496                         ok = true
497                 case "env":
498                         // TODO: implement "env"
499                         // requests by setting env
500                         // vars in the docker-exec
501                         // command (not docker-exec's
502                         // own environment, which
503                         // would be a gaping security
504                         // hole).
505                 default:
506                         // fmt.Fprintf(logw, "declining %q req"+eol, req.Type)
507                 }
508                 if req.WantReply {
509                         req.Reply(ok, nil)
510                 }
511         }
512 }