17119: Merge branch 'master' into 17119-add-filter-groups
[arvados.git] / lib / crunchrun / container_gateway.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "crypto/hmac"
9         "crypto/rand"
10         "crypto/rsa"
11         "crypto/sha256"
12         "crypto/tls"
13         "fmt"
14         "io"
15         "net"
16         "net/http"
17         "os"
18         "os/exec"
19         "sync"
20         "syscall"
21
22         "git.arvados.org/arvados.git/lib/selfsigned"
23         "git.arvados.org/arvados.git/sdk/go/httpserver"
24         "github.com/creack/pty"
25         "github.com/google/shlex"
26         "golang.org/x/crypto/ssh"
27 )
28
29 type Gateway struct {
30         DockerContainerID *string
31         ContainerUUID     string
32         Address           string // listen host:port; if port=0, Start() will change it to the selected port
33         AuthSecret        string
34         Log               interface {
35                 Printf(fmt string, args ...interface{})
36         }
37
38         sshConfig   ssh.ServerConfig
39         requestAuth string
40         respondAuth string
41 }
42
43 // Start starts an http server that allows authenticated clients to open an
44 // interactive "docker exec" session and (in future) connect to tcp ports
45 // inside the docker container.
46 func (gw *Gateway) Start() error {
47         gw.sshConfig = ssh.ServerConfig{
48                 NoClientAuth: true,
49                 PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
50                         if c.User() == "_" {
51                                 return nil, nil
52                         }
53                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
54                 },
55                 PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
56                         if c.User() == "_" {
57                                 return &ssh.Permissions{
58                                         Extensions: map[string]string{
59                                                 "pubkey-fp": ssh.FingerprintSHA256(pubKey),
60                                         },
61                                 }, nil
62                         }
63                         return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
64                 },
65         }
66         pvt, err := rsa.GenerateKey(rand.Reader, 2048)
67         if err != nil {
68                 return err
69         }
70         err = pvt.Validate()
71         if err != nil {
72                 return err
73         }
74         signer, err := ssh.NewSignerFromKey(pvt)
75         if err != nil {
76                 return err
77         }
78         gw.sshConfig.AddHostKey(signer)
79
80         // Address (typically provided by arvados-dispatch-cloud) is
81         // HOST:PORT where HOST is our IP address or hostname as seen
82         // from arvados-controller, and PORT is either the desired
83         // port where we should run our gateway server, or "0" if we
84         // should choose an available port.
85         host, port, err := net.SplitHostPort(gw.Address)
86         if err != nil {
87                 return err
88         }
89         cert, err := selfsigned.CertGenerator{}.Generate()
90         if err != nil {
91                 return err
92         }
93         h := hmac.New(sha256.New, []byte(gw.AuthSecret))
94         h.Write(cert.Certificate[0])
95         gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
96         h.Reset()
97         h.Write([]byte(gw.requestAuth))
98         gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
99
100         srv := &httpserver.Server{
101                 Server: http.Server{
102                         Handler: http.HandlerFunc(gw.handleSSH),
103                         TLSConfig: &tls.Config{
104                                 Certificates: []tls.Certificate{cert},
105                         },
106                 },
107                 Addr: ":" + port,
108         }
109         err = srv.Start()
110         if err != nil {
111                 return err
112         }
113         // Get the port number we are listening on (the port might be
114         // "0" or a port name, in which case this will be different).
115         _, port, err = net.SplitHostPort(srv.Addr)
116         if err != nil {
117                 return err
118         }
119         // When changing state to Running, we will set
120         // gateway_address to "HOST:PORT" where HOST is our
121         // external hostname/IP as provided by arvados-dispatch-cloud,
122         // and PORT is the port number we ended up listening on.
123         gw.Address = net.JoinHostPort(host, port)
124         return nil
125 }
126
127 // handleSSH connects to an SSH server that allows the caller to run
128 // interactive commands as root (or any other desired user) inside the
129 // container. The tunnel itself can only be created by an
130 // authenticated caller, so the SSH server itself is wide open (any
131 // password or key will be accepted).
132 //
133 // Requests must have path "/ssh" and the following headers:
134 //
135 // Connection: upgrade
136 // Upgrade: ssh
137 // X-Arvados-Target-Uuid: uuid of container
138 // X-Arvados-Authorization: must match
139 // hmac(AuthSecret,certfingerprint) (this prevents other containers
140 // and shell nodes from connecting directly)
141 //
142 // Optional headers:
143 //
144 // X-Arvados-Detach-Keys: argument to "docker exec --detach-keys",
145 // e.g., "ctrl-p,ctrl-q"
146 // X-Arvados-Login-Username: argument to "docker exec --user": account
147 // used to run command(s) inside the container.
148 func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
149         // In future we'll handle browser traffic too, but for now the
150         // only traffic we expect is an SSH tunnel from
151         // (*lib/controller/localdb.Conn)ContainerSSH()
152         if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" {
153                 http.Error(w, "path not found", http.StatusNotFound)
154                 return
155         }
156         if want := req.Header.Get("X-Arvados-Target-Uuid"); want != gw.ContainerUUID {
157                 http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
158                 return
159         }
160         if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
161                 http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
162                 return
163         }
164         detachKeys := req.Header.Get("X-Arvados-Detach-Keys")
165         username := req.Header.Get("X-Arvados-Login-Username")
166         if username == "" {
167                 username = "root"
168         }
169         hj, ok := w.(http.Hijacker)
170         if !ok {
171                 http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
172                 return
173         }
174         netconn, _, err := hj.Hijack()
175         if !ok {
176                 http.Error(w, err.Error(), http.StatusInternalServerError)
177                 return
178         }
179         defer netconn.Close()
180         w.Header().Set("Connection", "upgrade")
181         w.Header().Set("Upgrade", "ssh")
182         w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
183         netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
184         w.Header().Write(netconn)
185         netconn.Write([]byte("\r\n"))
186
187         ctx := req.Context()
188
189         conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
190         if err != nil {
191                 gw.Log.Printf("ssh.NewServerConn: %s", err)
192                 return
193         }
194         defer conn.Close()
195         go ssh.DiscardRequests(reqs)
196         for newch := range newchans {
197                 if newch.ChannelType() != "session" {
198                         newch.Reject(ssh.UnknownChannelType, fmt.Sprintf("unsupported channel type %q", newch.ChannelType()))
199                         continue
200                 }
201                 ch, reqs, err := newch.Accept()
202                 if err != nil {
203                         gw.Log.Printf("accept channel: %s", err)
204                         return
205                 }
206                 var pty0, tty0 *os.File
207                 go func() {
208                         // Where to send errors/messages for the
209                         // client to see
210                         logw := io.Writer(ch.Stderr())
211                         // How to end lines when sending
212                         // errors/messages to the client (changes to
213                         // \r\n when using a pty)
214                         eol := "\n"
215                         // Env vars to add to child process
216                         termEnv := []string(nil)
217                         for req := range reqs {
218                                 ok := false
219                                 switch req.Type {
220                                 case "shell", "exec":
221                                         ok = true
222                                         var payload struct {
223                                                 Command string
224                                         }
225                                         ssh.Unmarshal(req.Payload, &payload)
226                                         execargs, err := shlex.Split(payload.Command)
227                                         if err != nil {
228                                                 fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
229                                                 return
230                                         }
231                                         if len(execargs) == 0 {
232                                                 execargs = []string{"/bin/bash", "-login"}
233                                         }
234                                         go func() {
235                                                 cmd := exec.CommandContext(ctx, "docker", "exec", "-i", "--detach-keys="+detachKeys, "--user="+username)
236                                                 cmd.Stdin = ch
237                                                 cmd.Stdout = ch
238                                                 cmd.Stderr = ch.Stderr()
239                                                 if tty0 != nil {
240                                                         cmd.Args = append(cmd.Args, "-t")
241                                                         cmd.Stdin = tty0
242                                                         cmd.Stdout = tty0
243                                                         cmd.Stderr = tty0
244                                                         var wg sync.WaitGroup
245                                                         defer wg.Wait()
246                                                         wg.Add(2)
247                                                         go func() { io.Copy(ch, pty0); wg.Done() }()
248                                                         go func() { io.Copy(pty0, ch); wg.Done() }()
249                                                         // Send our own debug messages to tty as well.
250                                                         logw = tty0
251                                                 }
252                                                 cmd.Args = append(cmd.Args, *gw.DockerContainerID)
253                                                 cmd.Args = append(cmd.Args, execargs...)
254                                                 cmd.SysProcAttr = &syscall.SysProcAttr{
255                                                         Setctty: tty0 != nil,
256                                                         Setsid:  true,
257                                                 }
258                                                 cmd.Env = append(os.Environ(), termEnv...)
259                                                 err := cmd.Run()
260                                                 var resp struct {
261                                                         Status uint32
262                                                 }
263                                                 if exiterr, ok := err.(*exec.ExitError); ok {
264                                                         if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
265                                                                 resp.Status = uint32(status.ExitStatus())
266                                                         }
267                                                 } else if err != nil {
268                                                         // Propagate errors like `exec: "docker": executable file not found in $PATH`
269                                                         fmt.Fprintln(ch.Stderr(), err)
270                                                 }
271                                                 errClose := ch.CloseWrite()
272                                                 if resp.Status == 0 && (err != nil || errClose != nil) {
273                                                         resp.Status = 1
274                                                 }
275                                                 ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
276                                                 ch.Close()
277                                         }()
278                                 case "pty-req":
279                                         eol = "\r\n"
280                                         p, t, err := pty.Open()
281                                         if err != nil {
282                                                 fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
283                                                 break
284                                         }
285                                         defer p.Close()
286                                         defer t.Close()
287                                         pty0, tty0 = p, t
288                                         ok = true
289                                         var payload struct {
290                                                 Term string
291                                                 Cols uint32
292                                                 Rows uint32
293                                                 X    uint32
294                                                 Y    uint32
295                                         }
296                                         ssh.Unmarshal(req.Payload, &payload)
297                                         termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
298                                         err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
299                                         if err != nil {
300                                                 fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
301                                         }
302                                 case "window-change":
303                                         var payload struct {
304                                                 Cols uint32
305                                                 Rows uint32
306                                                 X    uint32
307                                                 Y    uint32
308                                         }
309                                         ssh.Unmarshal(req.Payload, &payload)
310                                         err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
311                                         if err != nil {
312                                                 fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
313                                                 break
314                                         }
315                                         ok = true
316                                 case "env":
317                                         // TODO: implement "env"
318                                         // requests by setting env
319                                         // vars in the docker-exec
320                                         // command (not docker-exec's
321                                         // own environment, which
322                                         // would be a gaping security
323                                         // hole).
324                                 default:
325                                         // fmt.Fprintf(logw, "declining %q req"+eol, req.Type)
326                                 }
327                                 if req.WantReply {
328                                         req.Reply(ok, nil)
329                                 }
330                         }
331                 }()
332         }
333 }