19166: Set up tunnel for container gateway requests
[arvados.git] / lib / controller / localdb / container_gateway.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package localdb
6
7 import (
8         "bufio"
9         "bytes"
10         "context"
11         "crypto/hmac"
12         "crypto/sha256"
13         "crypto/subtle"
14         "crypto/tls"
15         "crypto/x509"
16         "errors"
17         "fmt"
18         "net"
19         "net/http"
20         "net/url"
21         "strings"
22
23         "git.arvados.org/arvados.git/sdk/go/arvados"
24         "git.arvados.org/arvados.git/sdk/go/auth"
25         "git.arvados.org/arvados.git/sdk/go/ctxlog"
26         "git.arvados.org/arvados.git/sdk/go/httpserver"
27         "github.com/hashicorp/yamux"
28 )
29
30 // ContainerSSH returns a connection to the SSH server in the
31 // appropriate crunch-run process on the worker node where the
32 // specified container is running.
33 //
34 // If the returned error is nil, the caller is responsible for closing
35 // sshconn.Conn.
36 func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
37         user, err := conn.railsProxy.UserGetCurrent(ctx, arvados.GetOptions{})
38         if err != nil {
39                 return
40         }
41         ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
42         if err != nil {
43                 return
44         }
45         ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
46         if !user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
47                 if !conn.cluster.Containers.ShellAccess.User {
48                         err = httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
49                         return
50                 }
51                 var crs arvados.ContainerRequestList
52                 crs, err = conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
53                 if err != nil {
54                         return
55                 }
56                 for _, cr := range crs.Items {
57                         if cr.ModifiedByUserUUID != user.UUID {
58                                 err = httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
59                                 return
60                         }
61                 }
62                 if crs.ItemsAvailable != len(crs.Items) {
63                         err = httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
64                         return
65                 }
66         }
67
68         conn.gwTunnelsLock.Lock()
69         tunnel := conn.gwTunnels[opts.UUID]
70         conn.gwTunnelsLock.Unlock()
71
72         if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
73                 err = httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
74                 return
75         } else if ctr.State != arvados.ContainerStateRunning {
76                 err = httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
77                 return
78         }
79
80         var rawconn net.Conn
81         if ctr.GatewayAddress != "" && !strings.HasPrefix(ctr.GatewayAddress, "127.0.0.1:") {
82                 rawconn, err = net.Dial("tcp", ctr.GatewayAddress)
83         } else if tunnel != nil {
84                 rawconn, err = tunnel.Open()
85         } else if ctr.GatewayAddress == "" {
86                 err = errors.New("container is running but gateway is not available")
87         } else {
88                 err = errors.New("container gateway is running but tunnel is down")
89         }
90         if err != nil {
91                 err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
92                 return
93         }
94
95         // crunch-run uses a self-signed / unverifiable TLS
96         // certificate, so we use the following scheme to ensure we're
97         // not talking to a MITM.
98         //
99         // 1. Compute ctrKey = HMAC-SHA256(sysRootToken,ctrUUID) --
100         // this will be the same ctrKey that a-d-c supplied to
101         // crunch-run in the GatewayAuthSecret env var.
102         //
103         // 2. Compute requestAuth = HMAC-SHA256(ctrKey,serverCert) and
104         // send it to crunch-run as the X-Arvados-Authorization
105         // header, proving that we know ctrKey. (Note a MITM cannot
106         // replay the proof to a real crunch-run server, because the
107         // real crunch-run server would have a different cert.)
108         //
109         // 3. Compute respondAuth = HMAC-SHA256(ctrKey,requestAuth)
110         // and ensure the server returns it in the
111         // X-Arvados-Authorization-Response header, proving that the
112         // server knows ctrKey.
113         var requestAuth, respondAuth string
114         tlsconn := tls.Client(rawconn, &tls.Config{
115                 InsecureSkipVerify: true,
116                 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
117                         if len(rawCerts) == 0 {
118                                 return errors.New("no certificate received, cannot compute authorization header")
119                         }
120                         h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
121                         fmt.Fprint(h, opts.UUID)
122                         authKey := fmt.Sprintf("%x", h.Sum(nil))
123                         h = hmac.New(sha256.New, []byte(authKey))
124                         h.Write(rawCerts[0])
125                         requestAuth = fmt.Sprintf("%x", h.Sum(nil))
126                         h.Reset()
127                         h.Write([]byte(requestAuth))
128                         respondAuth = fmt.Sprintf("%x", h.Sum(nil))
129                         return nil
130                 },
131         })
132         err = tlsconn.HandshakeContext(ctx)
133         if err != nil {
134                 err = httpserver.ErrorWithStatus(err, http.StatusBadGateway)
135                 return
136         }
137         if respondAuth == "" {
138                 tlsconn.Close()
139                 err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
140                 return
141         }
142         bufr := bufio.NewReader(tlsconn)
143         bufw := bufio.NewWriter(tlsconn)
144
145         u := url.URL{
146                 Scheme: "http",
147                 Host:   ctr.GatewayAddress,
148                 Path:   "/ssh",
149         }
150         bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
151         bufw.WriteString("Host: " + u.Host + "\r\n")
152         bufw.WriteString("Upgrade: ssh\r\n")
153         bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
154         bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
155         bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
156         bufw.WriteString("X-Arvados-Login-Username: " + opts.LoginUsername + "\r\n")
157         bufw.WriteString("\r\n")
158         bufw.Flush()
159         resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
160         if err != nil {
161                 err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
162                 tlsconn.Close()
163                 return
164         }
165         if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
166                 err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
167                 tlsconn.Close()
168                 return
169         }
170         if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
171                 strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
172                 err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
173                 tlsconn.Close()
174                 return
175         }
176
177         if !ctr.InteractiveSessionStarted {
178                 _, err = conn.railsProxy.ContainerUpdate(ctxRoot, arvados.UpdateOptions{
179                         UUID: opts.UUID,
180                         Attrs: map[string]interface{}{
181                                 "interactive_session_started": true,
182                         },
183                 })
184                 if err != nil {
185                         tlsconn.Close()
186                         return
187                 }
188         }
189
190         sshconn.Conn = tlsconn
191         sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
192         sshconn.Logger = ctxlog.FromContext(ctx)
193         sshconn.UpgradeHeader = "ssh"
194         return
195 }
196
197 // ContainerGatewayTunnel sets up a tunnel enabling us (controller) to
198 // connect to the caller's (crunch-run's) gateway server.
199 func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, opts arvados.ContainerGatewayTunnelOptions) (resp arvados.ConnectionResponse, err error) {
200         h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
201         fmt.Fprint(h, opts.UUID)
202         authSecret := fmt.Sprintf("%x", h.Sum(nil))
203         if subtle.ConstantTimeCompare([]byte(authSecret), []byte(opts.AuthSecret)) != 1 {
204                 ctxlog.FromContext(ctx).Info("received incorrect auth_secret")
205                 return resp, httpserver.ErrorWithStatus(errors.New("authentication error"), http.StatusUnauthorized)
206         }
207
208         muxconn, clientconn := net.Pipe()
209         tunnel, err := yamux.Server(muxconn, nil)
210         if err != nil {
211                 clientconn.Close()
212                 return resp, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
213         }
214
215         conn.gwTunnelsLock.Lock()
216         if conn.gwTunnels == nil {
217                 conn.gwTunnels = map[string]*yamux.Session{opts.UUID: tunnel}
218         } else {
219                 conn.gwTunnels[opts.UUID] = tunnel
220         }
221         conn.gwTunnelsLock.Unlock()
222
223         go func() {
224                 <-tunnel.CloseChan()
225                 conn.gwTunnelsLock.Lock()
226                 if conn.gwTunnels[opts.UUID] == tunnel {
227                         delete(conn.gwTunnels, opts.UUID)
228                 }
229                 conn.gwTunnelsLock.Unlock()
230         }()
231
232         // Assuming we're acting as the backend of an http server,
233         // lib/controller/router will call resp's ServeHTTP handler,
234         // which upgrades the incoming http connection to a raw socket
235         // and connects it to our yamux.Server through our net.Pipe().
236         resp.Conn = clientconn
237         resp.Bufrw = &bufio.ReadWriter{Reader: bufio.NewReader(&bytes.Buffer{}), Writer: bufio.NewWriter(&bytes.Buffer{})}
238         resp.Logger = ctxlog.FromContext(ctx)
239         resp.UpgradeHeader = "tunnel"
240         return
241 }