15370: Re-enable docker tests.
[arvados.git] / lib / crunchrun / container_gateway.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "crypto/hmac"
9         "crypto/rand"
10         "crypto/rsa"
11         "crypto/sha256"
12         "crypto/tls"
13         "fmt"
14         "io"
15         "net"
16         "net/http"
17         "os"
18         "os/exec"
19         "sync"
20         "syscall"
21
22         "git.arvados.org/arvados.git/lib/selfsigned"
23         "git.arvados.org/arvados.git/sdk/go/ctxlog"
24         "git.arvados.org/arvados.git/sdk/go/httpserver"
25         "github.com/creack/pty"
26         "github.com/google/shlex"
27         "golang.org/x/crypto/ssh"
28         "golang.org/x/net/context"
29 )
30
31 type GatewayTarget interface {
32         // Command that will execute cmd inside the container
33         InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error)
34
35         // IP address inside container
36         IPAddress() (string, error)
37 }
38
39 type GatewayTargetStub struct{}
40
41 func (GatewayTargetStub) IPAddress() (string, error) { return "127.0.0.1", nil }
42 func (GatewayTargetStub) InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error) {
43         return exec.CommandContext(ctx, cmd[0], cmd[1:]...), nil
44 }
45
46 type Gateway struct {
47         ContainerUUID string
48         Address       string // listen host:port; if port=0, Start() will change it to the selected port
49         AuthSecret    string
50         Target        GatewayTarget
51         Log           interface {
52                 Printf(fmt string, args ...interface{})
53         }
54
55         sshConfig   ssh.ServerConfig
56         requestAuth string
57         respondAuth string
58 }
59
60 // Start starts an http server that allows authenticated clients to open an
61 // interactive "docker exec" session and (in future) connect to tcp ports
62 // inside the docker container.
63 func (gw *Gateway) Start() error {
64         gw.sshConfig = ssh.ServerConfig{
65                 NoClientAuth: true,
66                 PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
67                         if c.User() == "_" {
68                                 return nil, nil
69                         }
70                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
71                 },
72                 PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
73                         if c.User() == "_" {
74                                 return &ssh.Permissions{
75                                         Extensions: map[string]string{
76                                                 "pubkey-fp": ssh.FingerprintSHA256(pubKey),
77                                         },
78                                 }, nil
79                         }
80                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
81                 },
82         }
83         pvt, err := rsa.GenerateKey(rand.Reader, 2048)
84         if err != nil {
85                 return err
86         }
87         err = pvt.Validate()
88         if err != nil {
89                 return err
90         }
91         signer, err := ssh.NewSignerFromKey(pvt)
92         if err != nil {
93                 return err
94         }
95         gw.sshConfig.AddHostKey(signer)
96
97         // Address (typically provided by arvados-dispatch-cloud) is
98         // HOST:PORT where HOST is our IP address or hostname as seen
99         // from arvados-controller, and PORT is either the desired
100         // port where we should run our gateway server, or "0" if we
101         // should choose an available port.
102         host, port, err := net.SplitHostPort(gw.Address)
103         if err != nil {
104                 return err
105         }
106         cert, err := selfsigned.CertGenerator{}.Generate()
107         if err != nil {
108                 return err
109         }
110         h := hmac.New(sha256.New, []byte(gw.AuthSecret))
111         h.Write(cert.Certificate[0])
112         gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
113         h.Reset()
114         h.Write([]byte(gw.requestAuth))
115         gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
116
117         srv := &httpserver.Server{
118                 Server: http.Server{
119                         Handler: http.HandlerFunc(gw.handleSSH),
120                         TLSConfig: &tls.Config{
121                                 Certificates: []tls.Certificate{cert},
122                         },
123                 },
124                 Addr: ":" + port,
125         }
126         err = srv.Start()
127         if err != nil {
128                 return err
129         }
130         // Get the port number we are listening on (the port might be
131         // "0" or a port name, in which case this will be different).
132         _, port, err = net.SplitHostPort(srv.Addr)
133         if err != nil {
134                 return err
135         }
136         // When changing state to Running, we will set
137         // gateway_address to "HOST:PORT" where HOST is our
138         // external hostname/IP as provided by arvados-dispatch-cloud,
139         // and PORT is the port number we ended up listening on.
140         gw.Address = net.JoinHostPort(host, port)
141         return nil
142 }
143
144 // handleSSH connects to an SSH server that allows the caller to run
145 // interactive commands as root (or any other desired user) inside the
146 // container. The tunnel itself can only be created by an
147 // authenticated caller, so the SSH server itself is wide open (any
148 // password or key will be accepted).
149 //
150 // Requests must have path "/ssh" and the following headers:
151 //
152 // Connection: upgrade
153 // Upgrade: ssh
154 // X-Arvados-Target-Uuid: uuid of container
155 // X-Arvados-Authorization: must match
156 // hmac(AuthSecret,certfingerprint) (this prevents other containers
157 // and shell nodes from connecting directly)
158 //
159 // Optional headers:
160 //
161 // X-Arvados-Detach-Keys: argument to "docker exec --detach-keys",
162 // e.g., "ctrl-p,ctrl-q"
163 // X-Arvados-Login-Username: argument to "docker exec --user": account
164 // used to run command(s) inside the container.
165 func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
166         // In future we'll handle browser traffic too, but for now the
167         // only traffic we expect is an SSH tunnel from
168         // (*lib/controller/localdb.Conn)ContainerSSH()
169         if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" {
170                 http.Error(w, "path not found", http.StatusNotFound)
171                 return
172         }
173         if want := req.Header.Get("X-Arvados-Target-Uuid"); want != gw.ContainerUUID {
174                 http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
175                 return
176         }
177         if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
178                 http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
179                 return
180         }
181         detachKeys := req.Header.Get("X-Arvados-Detach-Keys")
182         username := req.Header.Get("X-Arvados-Login-Username")
183         if username == "" {
184                 username = "root"
185         }
186         hj, ok := w.(http.Hijacker)
187         if !ok {
188                 http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
189                 return
190         }
191         netconn, _, err := hj.Hijack()
192         if !ok {
193                 http.Error(w, err.Error(), http.StatusInternalServerError)
194                 return
195         }
196         defer netconn.Close()
197         w.Header().Set("Connection", "upgrade")
198         w.Header().Set("Upgrade", "ssh")
199         w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
200         netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
201         w.Header().Write(netconn)
202         netconn.Write([]byte("\r\n"))
203
204         ctx := req.Context()
205
206         conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
207         if err != nil {
208                 gw.Log.Printf("ssh.NewServerConn: %s", err)
209                 return
210         }
211         defer conn.Close()
212         go ssh.DiscardRequests(reqs)
213         for newch := range newchans {
214                 switch newch.ChannelType() {
215                 case "direct-tcpip":
216                         go gw.handleDirectTCPIP(ctx, newch)
217                 case "session":
218                         go gw.handleSession(ctx, newch, detachKeys, username)
219                 default:
220                         go newch.Reject(ssh.UnknownChannelType, fmt.Sprintf("unsupported channel type %q", newch.ChannelType()))
221                 }
222         }
223 }
224
225 func (gw *Gateway) handleDirectTCPIP(ctx context.Context, newch ssh.NewChannel) {
226         ch, reqs, err := newch.Accept()
227         if err != nil {
228                 gw.Log.Printf("accept direct-tcpip channel: %s", err)
229                 return
230         }
231         defer ch.Close()
232         go ssh.DiscardRequests(reqs)
233
234         // RFC 4254 7.2 (copy of channelOpenDirectMsg in
235         // golang.org/x/crypto/ssh)
236         var msg struct {
237                 Raddr string
238                 Rport uint32
239                 Laddr string
240                 Lport uint32
241         }
242         err = ssh.Unmarshal(newch.ExtraData(), &msg)
243         if err != nil {
244                 fmt.Fprintf(ch.Stderr(), "unmarshal direct-tcpip extradata: %s\n", err)
245                 return
246         }
247         switch msg.Raddr {
248         case "localhost", "0.0.0.0", "127.0.0.1", "::1", "::":
249         default:
250                 fmt.Fprintf(ch.Stderr(), "cannot forward to ports on %q, only localhost\n", msg.Raddr)
251                 return
252         }
253
254         dstaddr, err := gw.Target.IPAddress()
255         if err != nil {
256                 fmt.Fprintf(ch.Stderr(), "container has no IP address: %s\n", err)
257                 return
258         } else if dstaddr == "" {
259                 fmt.Fprintf(ch.Stderr(), "container has no IP address\n")
260                 return
261         }
262
263         dst := net.JoinHostPort(dstaddr, fmt.Sprintf("%d", msg.Rport))
264         tcpconn, err := net.Dial("tcp", dst)
265         if err != nil {
266                 fmt.Fprintf(ch.Stderr(), "%s: %s\n", dst, err)
267                 return
268         }
269         go func() {
270                 n, _ := io.Copy(ch, tcpconn)
271                 ctxlog.FromContext(ctx).Debugf("tcpip: sent %d bytes\n", n)
272                 ch.CloseWrite()
273         }()
274         n, _ := io.Copy(tcpconn, ch)
275         ctxlog.FromContext(ctx).Debugf("tcpip: received %d bytes\n", n)
276 }
277
278 func (gw *Gateway) handleSession(ctx context.Context, newch ssh.NewChannel, detachKeys, username string) {
279         ch, reqs, err := newch.Accept()
280         if err != nil {
281                 gw.Log.Printf("accept session channel: %s", err)
282                 return
283         }
284         var pty0, tty0 *os.File
285         // Where to send errors/messages for the client to see
286         logw := io.Writer(ch.Stderr())
287         // How to end lines when sending errors/messages to the client
288         // (changes to \r\n when using a pty)
289         eol := "\n"
290         // Env vars to add to child process
291         termEnv := []string(nil)
292         for req := range reqs {
293                 ok := false
294                 switch req.Type {
295                 case "shell", "exec":
296                         ok = true
297                         var payload struct {
298                                 Command string
299                         }
300                         ssh.Unmarshal(req.Payload, &payload)
301                         execargs, err := shlex.Split(payload.Command)
302                         if err != nil {
303                                 fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
304                                 return
305                         }
306                         if len(execargs) == 0 {
307                                 execargs = []string{"/bin/bash", "-login"}
308                         }
309                         go func() {
310                                 var resp struct {
311                                         Status uint32
312                                 }
313                                 defer func() {
314                                         ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
315                                         ch.Close()
316                                 }()
317
318                                 cmd, err := gw.Target.InjectCommand(ctx, detachKeys, username, tty0 != nil, execargs)
319                                 if err != nil {
320                                         fmt.Fprintln(ch.Stderr(), err)
321                                         ch.CloseWrite()
322                                         resp.Status = 1
323                                         return
324                                 }
325                                 cmd.Stdin = ch
326                                 cmd.Stdout = ch
327                                 cmd.Stderr = ch.Stderr()
328                                 if tty0 != nil {
329                                         cmd.Stdin = tty0
330                                         cmd.Stdout = tty0
331                                         cmd.Stderr = tty0
332                                         var wg sync.WaitGroup
333                                         defer wg.Wait()
334                                         wg.Add(2)
335                                         go func() { io.Copy(ch, pty0); wg.Done() }()
336                                         go func() { io.Copy(pty0, ch); wg.Done() }()
337                                         // Send our own debug messages to tty as well.
338                                         logw = tty0
339                                 }
340                                 cmd.SysProcAttr = &syscall.SysProcAttr{
341                                         Setctty: tty0 != nil,
342                                         Setsid:  true,
343                                 }
344                                 cmd.Env = append(os.Environ(), termEnv...)
345                                 err = cmd.Run()
346                                 if exiterr, ok := err.(*exec.ExitError); ok {
347                                         if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
348                                                 resp.Status = uint32(status.ExitStatus())
349                                         }
350                                 } else if err != nil {
351                                         // Propagate errors like `exec: "docker": executable file not found in $PATH`
352                                         fmt.Fprintln(ch.Stderr(), err)
353                                 }
354                                 errClose := ch.CloseWrite()
355                                 if resp.Status == 0 && (err != nil || errClose != nil) {
356                                         resp.Status = 1
357                                 }
358                         }()
359                 case "pty-req":
360                         eol = "\r\n"
361                         p, t, err := pty.Open()
362                         if err != nil {
363                                 fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
364                                 break
365                         }
366                         defer p.Close()
367                         defer t.Close()
368                         pty0, tty0 = p, t
369                         ok = true
370                         var payload struct {
371                                 Term string
372                                 Cols uint32
373                                 Rows uint32
374                                 X    uint32
375                                 Y    uint32
376                         }
377                         ssh.Unmarshal(req.Payload, &payload)
378                         termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
379                         err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
380                         if err != nil {
381                                 fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
382                         }
383                 case "window-change":
384                         var payload struct {
385                                 Cols uint32
386                                 Rows uint32
387                                 X    uint32
388                                 Y    uint32
389                         }
390                         ssh.Unmarshal(req.Payload, &payload)
391                         err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
392                         if err != nil {
393                                 fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
394                                 break
395                         }
396                         ok = true
397                 case "env":
398                         // TODO: implement "env"
399                         // requests by setting env
400                         // vars in the docker-exec
401                         // command (not docker-exec's
402                         // own environment, which
403                         // would be a gaping security
404                         // hole).
405                 default:
406                         // fmt.Fprintf(logw, "declining %q req"+eol, req.Type)
407                 }
408                 if req.WantReply {
409                         req.Reply(ok, nil)
410                 }
411         }
412 }