1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
26 "git.arvados.org/arvados.git/lib/controller/rpc"
27 "git.arvados.org/arvados.git/lib/selfsigned"
28 "git.arvados.org/arvados.git/lib/webdavfs"
29 "git.arvados.org/arvados.git/sdk/go/arvados"
30 "git.arvados.org/arvados.git/sdk/go/auth"
31 "git.arvados.org/arvados.git/sdk/go/ctxlog"
32 "git.arvados.org/arvados.git/sdk/go/httpserver"
33 "github.com/creack/pty"
34 "github.com/google/shlex"
35 "github.com/hashicorp/yamux"
36 "golang.org/x/crypto/ssh"
37 "golang.org/x/net/webdav"
40 type GatewayTarget interface {
41 // Command that will execute cmd inside the container
42 InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error)
44 // IP address inside container
45 IPAddress() (string, error)
48 type GatewayTargetStub struct{}
50 func (GatewayTargetStub) IPAddress() (string, error) { return "127.0.0.1", nil }
51 func (GatewayTargetStub) InjectCommand(ctx context.Context, detachKeys, username string, usingTTY bool, cmd []string) (*exec.Cmd, error) {
52 return exec.CommandContext(ctx, cmd[0], cmd[1:]...), nil
57 // Caller should set Address to "", or "host:0" or "host:port"
58 // where host is a known external IP address; port is a
59 // desired port number to listen on; and ":0" chooses an
60 // available dynamic port.
62 // If Address is "", Start() listens only on the loopback
63 // interface (and changes Address to "127.0.0.1:port").
64 // Otherwise it listens on all interfaces.
66 // If Address is "host:0", Start() updates Address to
72 Printf(fmt string, args ...interface{})
74 // If non-nil, set up a ContainerGatewayTunnel, so that the
75 // controller can connect to us even if our external IP
76 // address is unknown or not routable from controller.
77 ArvadosClient *arvados.Client
79 // When a tunnel is connected or reconnected, this func (if
80 // not nil) will be called with the InternalURL of the
81 // controller process at the other end of the tunnel.
82 UpdateTunnelURL func(url string)
84 // Source for serving WebDAV requests with
85 // X-Webdav-Source: /log
86 LogCollection arvados.CollectionFileSystem
88 sshConfig ssh.ServerConfig
93 // Start starts an http server that allows authenticated clients to open an
94 // interactive "docker exec" session and (in future) connect to tcp ports
95 // inside the docker container.
96 func (gw *Gateway) Start() error {
97 gw.sshConfig = ssh.ServerConfig{
99 PasswordCallback: func(c ssh.ConnMetadata, pass []byte) (*ssh.Permissions, error) {
103 return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
105 PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
107 return &ssh.Permissions{
108 Extensions: map[string]string{
109 "pubkey-fp": ssh.FingerprintSHA256(pubKey),
113 return nil, fmt.Errorf("cannot specify user %q via ssh client", c.User())
116 pvt, err := rsa.GenerateKey(rand.Reader, 2048)
124 signer, err := ssh.NewSignerFromKey(pvt)
128 gw.sshConfig.AddHostKey(signer)
130 // Address (typically provided by arvados-dispatch-cloud) is
131 // HOST:PORT where HOST is our IP address or hostname as seen
132 // from arvados-controller, and PORT is either the desired
133 // port where we should run our gateway server, or "0" if we
134 // should choose an available port.
135 extAddr := gw.Address
136 // Generally we can't know which local interface corresponds
137 // to an externally reachable IP address, so if we expect to
138 // be reachable by external hosts, we listen on all
142 // If the dispatcher doesn't tell us our external IP
143 // address, controller will only be able to connect
144 // through the tunnel (see runTunnel), so our gateway
145 // server only needs to listen on the loopback
147 extAddr = "127.0.0.1:0"
148 listenHost = "127.0.0.1"
150 extHost, extPort, err := net.SplitHostPort(extAddr)
154 cert, err := selfsigned.CertGenerator{}.Generate()
158 h := hmac.New(sha256.New, []byte(gw.AuthSecret))
159 h.Write(cert.Certificate[0])
160 gw.requestAuth = fmt.Sprintf("%x", h.Sum(nil))
162 h.Write([]byte(gw.requestAuth))
163 gw.respondAuth = fmt.Sprintf("%x", h.Sum(nil))
165 srv := &httpserver.Server{
168 TLSConfig: &tls.Config{
169 Certificates: []tls.Certificate{cert},
172 Addr: net.JoinHostPort(listenHost, extPort),
180 gw.Log.Printf("gateway server stopped: %s", err)
182 // Get the port number we are listening on (extPort might be
183 // "0" or a port name, in which case this will be different).
184 _, listenPort, err := net.SplitHostPort(srv.Addr)
188 // When changing state to Running, the caller will want to set
189 // gateway_address to a "HOST:PORT" that, if controller
190 // connects to it, will reach this gateway server.
192 // The most likely thing to work is: HOST is our external
193 // hostname/IP as provided by the caller
194 // (arvados-dispatch-cloud) or 127.0.0.1 to indicate
195 // non-tunnel connections aren't available; and PORT is the
196 // port number we are listening on.
197 gw.Address = net.JoinHostPort(extHost, listenPort)
198 gw.Log.Printf("gateway server listening at %s", gw.Address)
199 if gw.ArvadosClient != nil {
200 go gw.maintainTunnel(gw.Address)
205 func (gw *Gateway) maintainTunnel(addr string) {
206 for ; ; time.Sleep(5 * time.Second) {
207 err := gw.runTunnel(addr)
208 // Note: err is never nil here, see runTunnel comment.
209 gw.Log.Printf("runTunnel: %s", err)
213 // runTunnel connects to controller and sets up a tunnel through
214 // which controller can connect to the gateway server at the given
217 // runTunnel aims to run forever (i.e., until the current process
218 // exits). If it returns at all, it returns a non-nil error indicating
219 // why the tunnel was shut down.
220 func (gw *Gateway) runTunnel(addr string) error {
221 ctx := auth.NewContext(context.Background(), auth.NewCredentials(gw.ArvadosClient.AuthToken))
222 arpc := rpc.NewConn("", &url.URL{Scheme: "https", Host: gw.ArvadosClient.APIHost}, gw.ArvadosClient.Insecure, rpc.PassthroughTokenProvider)
223 tun, err := arpc.ContainerGatewayTunnel(ctx, arvados.ContainerGatewayTunnelOptions{
224 UUID: gw.ContainerUUID,
225 AuthSecret: gw.AuthSecret,
228 return fmt.Errorf("error creating gateway tunnel: %w", err)
230 mux, err := yamux.Client(tun.Conn, nil)
232 return fmt.Errorf("error setting up mux client end: %s", err)
234 if url := tun.Header.Get("X-Arvados-Internal-Url"); url != "" && gw.UpdateTunnelURL != nil {
235 gw.UpdateTunnelURL(url)
238 muxconn, err := mux.AcceptStream()
243 defer muxconn.Close()
244 gwconn, err := net.Dial("tcp", addr)
246 gw.Log.Printf("tunnel connection %d: error connecting to %s: %s", muxconn.StreamID(), addr, err)
250 var wg sync.WaitGroup
254 _, err := io.Copy(gwconn, muxconn)
256 gw.Log.Printf("tunnel connection %d: mux end: %s", muxconn.StreamID(), err)
262 _, err := io.Copy(muxconn, gwconn)
264 gw.Log.Printf("tunnel connection %d: gateway end: %s", muxconn.StreamID(), err)
273 var webdavMethod = map[string]bool{
279 func (gw *Gateway) ServeHTTP(w http.ResponseWriter, req *http.Request) {
280 w.Header().Set("Vary", "X-Arvados-Authorization, X-Arvados-Container-Gateway-Uuid, X-Arvados-Container-Target-Port, X-Webdav-Prefix, X-Webdav-Source")
281 reqUUID := req.Header.Get("X-Arvados-Container-Gateway-Uuid")
283 // older controller versions only send UUID as query param
285 reqUUID = req.Form.Get("uuid")
287 if reqUUID != gw.ContainerUUID {
288 http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", reqUUID, gw.ContainerUUID), http.StatusBadGateway)
291 if req.Header.Get("X-Arvados-Authorization") != gw.requestAuth {
292 http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
295 w.Header().Set("X-Arvados-Authorization-Response", gw.respondAuth)
297 case req.Method == "POST" && req.Header.Get("Upgrade") == "ssh":
299 // (*lib/controller/localdb.Conn)ContainerSSH()
301 case req.Header.Get("X-Webdav-Source") == "/log":
302 // WebDAV request for container log data
303 if !webdavMethod[req.Method] {
304 http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
307 gw.handleLogsWebDAV(w, req)
308 case req.Header.Get("X-Arvados-Container-Target-Port") != "":
309 // HTTP forwarded through
310 // (*lib/controller/localdb.Conn)ContainerHTTPProxy()
311 gw.handleForwardedHTTP(w, req)
313 http.Error(w, "path not found", http.StatusNotFound)
317 func (gw *Gateway) handleLogsWebDAV(w http.ResponseWriter, r *http.Request) {
318 prefix := r.Header.Get("X-Webdav-Prefix")
319 if !strings.HasPrefix(r.URL.Path, prefix) {
320 http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
323 if gw.LogCollection == nil {
324 http.Error(w, "Not found", http.StatusNotFound)
327 wh := webdav.Handler{
329 FileSystem: &webdavfs.FS{
330 FileSystem: gw.LogCollection,
333 AlwaysReadEOF: r.Method == "PROPFIND",
335 LockSystem: webdavfs.NoLockSystem,
336 Logger: gw.webdavLogger,
341 func (gw *Gateway) webdavLogger(r *http.Request, err error) {
342 if err != nil && !os.IsNotExist(err) {
343 ctxlog.FromContext(r.Context()).WithError(err).Info("error reported by webdav handler")
345 ctxlog.FromContext(r.Context()).WithError(err).Debug("webdav request log")
349 func (gw *Gateway) handleForwardedHTTP(w http.ResponseWriter, reqIn *http.Request) {
350 port := reqIn.Header.Get("X-Arvados-Container-Target-Port")
353 if gw.Target != nil {
354 host, err = gw.Target.IPAddress()
356 http.Error(w, "container has no IP address: "+err.Error(), http.StatusServiceUnavailable)
361 http.Error(w, "container has no IP address", http.StatusServiceUnavailable)
364 client := http.Client{
365 CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse },
366 // Transport: &http.Transport{
367 // DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
368 // return (&net.Dialer{}).DialContext(ctx, "tcp", net.JoinHostPort(host, port))
374 url.Host = net.JoinHostPort(host, port)
375 req, err := http.NewRequestWithContext(reqIn.Context(), reqIn.Method, url.String(), reqIn.Body)
376 req.Host = reqIn.Host
377 req.Header = reqIn.Header
378 req.Header.Del("X-Arvados-Container-Gateway-Uuid")
379 req.Header.Del("X-Arvados-Container-Target-Port")
380 req.Header.Del("X-Arvados-Authorization")
381 req.Header.Add("Via", "HTTP/1.1 arvados-crunch-run")
382 resp, err := client.Do(req)
384 http.Error(w, err.Error(), http.StatusBadGateway)
387 defer resp.Body.Close()
388 for k, v := range resp.Header {
391 w.WriteHeader(resp.StatusCode)
392 io.Copy(w, resp.Body)
395 // handleSSH connects to an SSH server that allows the caller to run
396 // interactive commands as root (or any other desired user) inside the
397 // container. The tunnel itself can only be created by an
398 // authenticated caller, so the SSH server itself is wide open (any
399 // password or key will be accepted).
401 // Requests must have path "/ssh" and the following headers:
403 // Connection: upgrade
405 // X-Arvados-Container-Gateway-Uuid: uuid of container
406 // X-Arvados-Authorization: must match
407 // hmac(AuthSecret,certfingerprint) (this prevents other containers
408 // and shell nodes from connecting directly)
412 // X-Arvados-Detach-Keys: argument to "docker exec --detach-keys",
413 // e.g., "ctrl-p,ctrl-q"
414 // X-Arvados-Login-Username: argument to "docker exec --user": account
415 // used to run command(s) inside the container.
416 func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
418 detachKeys := req.Form.Get("detach_keys")
419 username := req.Form.Get("login_username")
423 netconn, _, err := http.NewResponseController(w).Hijack()
425 http.Error(w, "connection upgrade failed: "+err.Error(), http.StatusInternalServerError)
428 defer netconn.Close()
429 w.Header().Set("Connection", "upgrade")
430 w.Header().Set("Upgrade", "ssh")
431 netconn.Write([]byte("HTTP/1.1 101 Switching Protocols\r\n"))
432 w.Header().Write(netconn)
433 netconn.Write([]byte("\r\n"))
434 httpserver.ExemptFromDeadline(req)
438 conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
441 } else if err != nil {
442 gw.Log.Printf("ssh.NewServerConn: %s", err)
446 go ssh.DiscardRequests(reqs)
447 for newch := range newchans {
448 switch newch.ChannelType() {
450 go gw.handleDirectTCPIP(ctx, newch)
452 go gw.handleSession(ctx, newch, detachKeys, username)
454 go newch.Reject(ssh.UnknownChannelType, fmt.Sprintf("unsupported channel type %q", newch.ChannelType()))
459 func (gw *Gateway) handleDirectTCPIP(ctx context.Context, newch ssh.NewChannel) {
460 ch, reqs, err := newch.Accept()
462 gw.Log.Printf("accept direct-tcpip channel: %s", err)
466 go ssh.DiscardRequests(reqs)
468 // RFC 4254 7.2 (copy of channelOpenDirectMsg in
469 // golang.org/x/crypto/ssh)
476 err = ssh.Unmarshal(newch.ExtraData(), &msg)
478 fmt.Fprintf(ch.Stderr(), "unmarshal direct-tcpip extradata: %s\n", err)
482 case "localhost", "0.0.0.0", "127.0.0.1", "::1", "::":
484 fmt.Fprintf(ch.Stderr(), "cannot forward to ports on %q, only localhost\n", msg.Raddr)
488 dstaddr, err := gw.Target.IPAddress()
490 fmt.Fprintf(ch.Stderr(), "container has no IP address: %s\n", err)
492 } else if dstaddr == "" {
493 fmt.Fprintf(ch.Stderr(), "container has no IP address\n")
497 dst := net.JoinHostPort(dstaddr, fmt.Sprintf("%d", msg.Rport))
498 tcpconn, err := net.Dial("tcp", dst)
500 fmt.Fprintf(ch.Stderr(), "%s: %s\n", dst, err)
504 n, _ := io.Copy(ch, tcpconn)
505 ctxlog.FromContext(ctx).Debugf("tcpip: sent %d bytes\n", n)
508 n, _ := io.Copy(tcpconn, ch)
509 ctxlog.FromContext(ctx).Debugf("tcpip: received %d bytes\n", n)
512 func (gw *Gateway) handleSession(ctx context.Context, newch ssh.NewChannel, detachKeys, username string) {
513 ch, reqs, err := newch.Accept()
515 gw.Log.Printf("error accepting session channel: %s", err)
520 var pty0, tty0 *os.File
521 // Where to send errors/messages for the client to see
522 logw := io.Writer(ch.Stderr())
523 // How to end lines when sending errors/messages to the client
524 // (changes to \r\n when using a pty)
526 // Env vars to add to child process
527 termEnv := []string(nil)
530 wantClose := make(chan struct{})
534 case r, ok := <-reqs:
544 case "shell", "exec":
545 if started++; started != 1 {
546 // RFC 4254 6.5: "Only one of these
547 // requests can succeed per channel."
554 ssh.Unmarshal(req.Payload, &payload)
555 execargs, err := shlex.Split(payload.Command)
557 fmt.Fprintf(logw, "error parsing supplied command: %s"+eol, err)
560 if len(execargs) == 0 {
561 execargs = []string{"/bin/bash", "-login"}
568 ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
572 cmd, err := gw.Target.InjectCommand(ctx, detachKeys, username, tty0 != nil, execargs)
574 fmt.Fprintln(ch.Stderr(), err)
585 // Send our own debug messages to tty as well.
588 // StdinPipe may seem
589 // superfluous here, but it's
590 // not: it causes cmd.Run() to
591 // return when the subprocess
592 // exits. Without it, Run()
593 // waits for stdin to close,
594 // which causes "ssh ... echo
595 // ok" (with the client's
596 // stdin connected to a
597 // terminal or something) to
599 stdin, err := cmd.StdinPipe()
601 fmt.Fprintln(ch.Stderr(), err)
611 cmd.Stderr = ch.Stderr()
613 cmd.SysProcAttr = &syscall.SysProcAttr{
614 Setctty: tty0 != nil,
617 cmd.Env = append(os.Environ(), termEnv...)
619 if exiterr, ok := err.(*exec.ExitError); ok {
620 if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
621 resp.Status = uint32(status.ExitStatus())
623 } else if err != nil {
624 // Propagate errors like `exec: "docker": executable file not found in $PATH`
625 fmt.Fprintln(ch.Stderr(), err)
627 errClose := ch.CloseWrite()
628 if resp.Status == 0 && (err != nil || errClose != nil) {
634 p, t, err := pty.Open()
636 fmt.Fprintf(ch.Stderr(), "pty failed: %s"+eol, err)
650 ssh.Unmarshal(req.Payload, &payload)
651 termEnv = []string{"TERM=" + payload.Term, "USE_TTY=1"}
652 err = pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
654 fmt.Fprintf(logw, "pty-req: setsize failed: %s"+eol, err)
656 case "window-change":
663 ssh.Unmarshal(req.Payload, &payload)
664 err := pty.Setsize(pty0, &pty.Winsize{Rows: uint16(payload.Rows), Cols: uint16(payload.Cols), X: uint16(payload.X), Y: uint16(payload.Y)})
666 fmt.Fprintf(logw, "window-change: setsize failed: %s"+eol, err)
671 // TODO: implement "env"
672 // requests by setting env
673 // vars in the docker-exec
674 // command (not docker-exec's
675 // own environment, which
676 // would be a gaping security
679 // fmt.Fprintf(logw, "declined request %q on ssh channel"+eol, req.Type)