1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
25 "git.arvados.org/arvados.git/lib/controller/rpc"
26 "git.arvados.org/arvados.git/lib/service"
27 "git.arvados.org/arvados.git/sdk/go/arvados"
28 "git.arvados.org/arvados.git/sdk/go/auth"
29 "git.arvados.org/arvados.git/sdk/go/ctxlog"
30 "git.arvados.org/arvados.git/sdk/go/httpserver"
31 "github.com/hashicorp/yamux"
35 forceProxyForTest = false
36 forceInternalURLForTest *arvados.URL
39 // ContainerSSH returns a connection to the SSH server in the
40 // appropriate crunch-run process on the worker node where the
41 // specified container is running.
43 // If the returned error is nil, the caller is responsible for closing
45 func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ConnectionResponse, err error) {
46 user, err := conn.railsProxy.UserGetCurrent(ctx, arvados.GetOptions{})
50 ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
54 ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
55 if !user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
56 if !conn.cluster.Containers.ShellAccess.User {
57 return sshconn, httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
59 crs, err := conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
63 for _, cr := range crs.Items {
64 if cr.ModifiedByUserUUID != user.UUID {
65 return sshconn, httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
68 if crs.ItemsAvailable != len(crs.Items) {
69 return sshconn, httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
73 conn.gwTunnelsLock.Lock()
74 tunnel := conn.gwTunnels[opts.UUID]
75 conn.gwTunnelsLock.Unlock()
77 if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
78 return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
79 } else if ctr.State != arvados.ContainerStateRunning {
80 return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
83 // targetHost is the value we'll use in the Host header in our
84 // "Upgrade: ssh" http request. It's just a placeholder
85 // "localhost", unless we decide to connect directly, in which
86 // case we'll set it to the gateway's external ip:host. (The
87 // gateway doesn't even look at it, but we might as well.)
88 targetHost := "localhost"
89 myURL, _ := service.URLFromContext(ctx)
92 if host, _, splitErr := net.SplitHostPort(ctr.GatewayAddress); splitErr == nil && host != "" && host != "127.0.0.1" {
93 // If crunch-run provided a GatewayAddress like
94 // "ipaddr:port", that means "ipaddr" is one of the
95 // external interfaces where the gateway is
96 // listening. In that case, it's the most
97 // reliable/direct option, so we use it even if a
98 // tunnel might also be available.
99 targetHost = ctr.GatewayAddress
100 rawconn, err = net.Dial("tcp", ctr.GatewayAddress)
102 return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
104 } else if tunnel != nil && !(forceProxyForTest && !opts.NoForward) {
105 // If we can't connect directly, and the gateway has
106 // established a yamux tunnel with us, connect through
109 // ...except: forceProxyForTest means we are emulating
110 // a situation where the gateway has established a
111 // yamux tunnel with controller B, and the
112 // ContainerSSH request arrives at controller A. If
113 // opts.NoForward==false then we are acting as A, so
114 // we pretend not to have a tunnel, and fall through
115 // to the "tunurl" case below. If opts.NoForward==true
116 // then the client is A and we are acting as B, so we
117 // connect to our tunnel.
118 rawconn, err = tunnel.Open()
120 return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
122 } else if ctr.GatewayAddress == "" {
123 return sshconn, httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available"), http.StatusServiceUnavailable)
124 } else if tunurl := strings.TrimPrefix(ctr.GatewayAddress, "tunnel "); tunurl != ctr.GatewayAddress &&
126 tunurl != myURL.String() &&
128 // If crunch-run provided a GatewayAddress like
129 // "tunnel https://10.0.0.10:1010/", that means the
130 // gateway has established a yamux tunnel with the
131 // controller process at the indicated InternalURL
132 // (which isn't us, otherwise we would have had
133 // "tunnel != nil" above). We need to proxy through to
134 // the other controller process in order to use the
136 for u := range conn.cluster.Services.Controller.InternalURLs {
137 if u.String() == tunurl {
138 ctxlog.FromContext(ctx).Debugf("proxying ContainerSSH request to other controller at %s", u)
140 arpc := rpc.NewConn(conn.cluster.ClusterID, &u, conn.cluster.TLS.Insecure, rpc.PassthroughTokenProvider)
141 opts.NoForward = true
142 return arpc.ContainerSSH(ctx, opts)
145 ctxlog.FromContext(ctx).Warnf("container gateway provided a tunnel endpoint %s that is not one of Services.Controller.InternalURLs", tunurl)
146 return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel endpoint is invalid"), http.StatusServiceUnavailable)
148 return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel is down"), http.StatusServiceUnavailable)
151 // crunch-run uses a self-signed / unverifiable TLS
152 // certificate, so we use the following scheme to ensure we're
153 // not talking to a MITM.
155 // 1. Compute ctrKey = HMAC-SHA256(sysRootToken,ctrUUID) --
156 // this will be the same ctrKey that a-d-c supplied to
157 // crunch-run in the GatewayAuthSecret env var.
159 // 2. Compute requestAuth = HMAC-SHA256(ctrKey,serverCert) and
160 // send it to crunch-run as the X-Arvados-Authorization
161 // header, proving that we know ctrKey. (Note a MITM cannot
162 // replay the proof to a real crunch-run server, because the
163 // real crunch-run server would have a different cert.)
165 // 3. Compute respondAuth = HMAC-SHA256(ctrKey,requestAuth)
166 // and ensure the server returns it in the
167 // X-Arvados-Authorization-Response header, proving that the
168 // server knows ctrKey.
169 var requestAuth, respondAuth string
170 tlsconn := tls.Client(rawconn, &tls.Config{
171 InsecureSkipVerify: true,
172 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
173 if len(rawCerts) == 0 {
174 return errors.New("no certificate received, cannot compute authorization header")
176 h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
177 fmt.Fprint(h, opts.UUID)
178 authKey := fmt.Sprintf("%x", h.Sum(nil))
179 h = hmac.New(sha256.New, []byte(authKey))
181 requestAuth = fmt.Sprintf("%x", h.Sum(nil))
183 h.Write([]byte(requestAuth))
184 respondAuth = fmt.Sprintf("%x", h.Sum(nil))
188 err = tlsconn.HandshakeContext(ctx)
190 return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("TLS handshake failed: %w", err), http.StatusBadGateway)
192 if respondAuth == "" {
194 return sshconn, httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
196 bufr := bufio.NewReader(tlsconn)
197 bufw := bufio.NewWriter(tlsconn)
204 postform := url.Values{
206 "detach_keys": {opts.DetachKeys},
207 "login_username": {opts.LoginUsername},
208 "no_forward": {fmt.Sprintf("%v", opts.NoForward)},
210 postdata := postform.Encode()
211 bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
212 bufw.WriteString("Host: " + u.Host + "\r\n")
213 bufw.WriteString("Upgrade: ssh\r\n")
214 bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
215 bufw.WriteString("Content-Type: application/x-www-form-urlencoded\r\n")
216 fmt.Fprintf(bufw, "Content-Length: %d\r\n", len(postdata))
217 bufw.WriteString("\r\n")
218 bufw.WriteString(postdata)
220 resp, err := http.ReadResponse(bufr, &http.Request{Method: "POST"})
223 return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
225 defer resp.Body.Close()
226 if resp.StatusCode != http.StatusSwitchingProtocols {
227 body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 1000))
229 return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("unexpected status %s %q", resp.Status, body), http.StatusBadGateway)
231 if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
232 strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
234 return sshconn, httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
236 if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
238 return sshconn, httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
241 if !ctr.InteractiveSessionStarted {
242 _, err = conn.railsProxy.ContainerUpdate(ctxRoot, arvados.UpdateOptions{
244 Attrs: map[string]interface{}{
245 "interactive_session_started": true,
250 return sshconn, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
254 sshconn.Conn = tlsconn
255 sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
256 sshconn.Logger = ctxlog.FromContext(ctx)
257 sshconn.Header = http.Header{"Upgrade": {"ssh"}}
261 // ContainerGatewayTunnel sets up a tunnel enabling us (controller) to
262 // connect to the caller's (crunch-run's) gateway server.
263 func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, opts arvados.ContainerGatewayTunnelOptions) (resp arvados.ConnectionResponse, err error) {
264 h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
265 fmt.Fprint(h, opts.UUID)
266 authSecret := fmt.Sprintf("%x", h.Sum(nil))
267 if subtle.ConstantTimeCompare([]byte(authSecret), []byte(opts.AuthSecret)) != 1 {
268 ctxlog.FromContext(ctx).Info("received incorrect auth_secret")
269 return resp, httpserver.ErrorWithStatus(errors.New("authentication error"), http.StatusUnauthorized)
272 muxconn, clientconn := net.Pipe()
273 tunnel, err := yamux.Server(muxconn, nil)
276 return resp, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
279 conn.gwTunnelsLock.Lock()
280 if conn.gwTunnels == nil {
281 conn.gwTunnels = map[string]*yamux.Session{opts.UUID: tunnel}
283 conn.gwTunnels[opts.UUID] = tunnel
285 conn.gwTunnelsLock.Unlock()
289 conn.gwTunnelsLock.Lock()
290 if conn.gwTunnels[opts.UUID] == tunnel {
291 delete(conn.gwTunnels, opts.UUID)
293 conn.gwTunnelsLock.Unlock()
296 // Assuming we're acting as the backend of an http server,
297 // lib/controller/router will call resp's ServeHTTP handler,
298 // which upgrades the incoming http connection to a raw socket
299 // and connects it to our yamux.Server through our net.Pipe().
300 resp.Conn = clientconn
301 resp.Bufrw = &bufio.ReadWriter{Reader: bufio.NewReader(&bytes.Buffer{}), Writer: bufio.NewWriter(&bytes.Buffer{})}
302 resp.Logger = ctxlog.FromContext(ctx)
303 resp.Header = http.Header{"Upgrade": {"tunnel"}}
304 if u, ok := service.URLFromContext(ctx); ok {
305 resp.Header.Set("X-Arvados-Internal-Url", u.String())
306 } else if forceInternalURLForTest != nil {
307 resp.Header.Set("X-Arvados-Internal-Url", forceInternalURLForTest.String())