import (
"bufio"
+ "bytes"
"context"
"crypto/hmac"
"crypto/sha256"
+ "crypto/subtle"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
+ "io"
+ "io/ioutil"
+ "net"
"net/http"
"net/url"
"strings"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/lib/service"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
+ "github.com/hashicorp/yamux"
+)
+
+var (
+ forceProxyForTest = false
+ forceInternalURLForTest *arvados.URL
)
// ContainerSSH returns a connection to the SSH server in the
//
// If the returned error is nil, the caller is responsible for closing
// sshconn.Conn.
-func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ConnectionResponse, err error) {
user, err := conn.railsProxy.UserGetCurrent(ctx, arvados.GetOptions{})
if err != nil {
- return
+ return sshconn, err
}
ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
if err != nil {
- return
+ return sshconn, err
}
ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
if !user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
if !conn.cluster.Containers.ShellAccess.User {
- err = httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
}
- var crs arvados.ContainerRequestList
- crs, err = conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
+ crs, err := conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
if err != nil {
- return
+ return sshconn, err
}
for _, cr := range crs.Items {
if cr.ModifiedByUserUUID != user.UUID {
- err = httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
}
}
if crs.ItemsAvailable != len(crs.Items) {
- err = httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
}
}
- switch ctr.State {
- case arvados.ContainerStateQueued, arvados.ContainerStateLocked:
- err = httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
- return
- case arvados.ContainerStateRunning:
- if ctr.GatewayAddress == "" {
- err = httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available -- installation problem or feature not supported"), http.StatusServiceUnavailable)
- return
+ conn.gwTunnelsLock.Lock()
+ tunnel := conn.gwTunnels[opts.UUID]
+ conn.gwTunnelsLock.Unlock()
+
+ if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
+ } else if ctr.State != arvados.ContainerStateRunning {
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
+ }
+
+ // targetHost is the value we'll use in the Host header in our
+ // "Upgrade: ssh" http request. It's just a placeholder
+ // "localhost", unless we decide to connect directly, in which
+ // case we'll set it to the gateway's external ip:host. (The
+ // gateway doesn't even look at it, but we might as well.)
+ targetHost := "localhost"
+ myURL, _ := service.URLFromContext(ctx)
+
+ var rawconn net.Conn
+ if host, _, splitErr := net.SplitHostPort(ctr.GatewayAddress); splitErr == nil && host != "" && host != "127.0.0.1" {
+ // If crunch-run provided a GatewayAddress like
+ // "ipaddr:port", that means "ipaddr" is one of the
+ // external interfaces where the gateway is
+ // listening. In that case, it's the most
+ // reliable/direct option, so we use it even if a
+ // tunnel might also be available.
+ targetHost = ctr.GatewayAddress
+ rawconn, err = net.Dial("tcp", ctr.GatewayAddress)
+ if err != nil {
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
}
- default:
- err = httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
- return
+ } else if tunnel != nil && !(forceProxyForTest && !opts.NoForward) {
+ // If we can't connect directly, and the gateway has
+ // established a yamux tunnel with us, connect through
+ // the tunnel.
+ //
+ // ...except: forceProxyForTest means we are emulating
+ // a situation where the gateway has established a
+ // yamux tunnel with controller B, and the
+ // ContainerSSH request arrives at controller A. If
+ // opts.NoForward==false then we are acting as A, so
+ // we pretend not to have a tunnel, and fall through
+ // to the "tunurl" case below. If opts.NoForward==true
+ // then the client is A and we are acting as B, so we
+ // connect to our tunnel.
+ rawconn, err = tunnel.Open()
+ if err != nil {
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+ }
+ } else if ctr.GatewayAddress == "" {
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available"), http.StatusServiceUnavailable)
+ } else if tunurl := strings.TrimPrefix(ctr.GatewayAddress, "tunnel "); tunurl != ctr.GatewayAddress &&
+ tunurl != "" &&
+ tunurl != myURL.String() &&
+ !opts.NoForward {
+ // If crunch-run provided a GatewayAddress like
+ // "tunnel https://10.0.0.10:1010/", that means the
+ // gateway has established a yamux tunnel with the
+ // controller process at the indicated InternalURL
+ // (which isn't us, otherwise we would have had
+ // "tunnel != nil" above). We need to proxy through to
+ // the other controller process in order to use the
+ // tunnel.
+ for u := range conn.cluster.Services.Controller.InternalURLs {
+ if u.String() == tunurl {
+ ctxlog.FromContext(ctx).Debugf("proxying ContainerSSH request to other controller at %s", u)
+ u := url.URL(u)
+ arpc := rpc.NewConn(conn.cluster.ClusterID, &u, conn.cluster.TLS.Insecure, rpc.PassthroughTokenProvider)
+ opts.NoForward = true
+ return arpc.ContainerSSH(ctx, opts)
+ }
+ }
+ ctxlog.FromContext(ctx).Warnf("container gateway provided a tunnel endpoint %s that is not one of Services.Controller.InternalURLs", tunurl)
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel endpoint is invalid"), http.StatusServiceUnavailable)
+ } else {
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel is down"), http.StatusServiceUnavailable)
}
+
// crunch-run uses a self-signed / unverifiable TLS
// certificate, so we use the following scheme to ensure we're
// not talking to a MITM.
// X-Arvados-Authorization-Response header, proving that the
// server knows ctrKey.
var requestAuth, respondAuth string
- netconn, err := tls.Dial("tcp", ctr.GatewayAddress, &tls.Config{
+ tlsconn := tls.Client(rawconn, &tls.Config{
InsecureSkipVerify: true,
VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
return nil
},
})
+ err = tlsconn.HandshakeContext(ctx)
if err != nil {
- err = httpserver.ErrorWithStatus(err, http.StatusBadGateway)
- return
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("TLS handshake failed: %w", err), http.StatusBadGateway)
}
if respondAuth == "" {
- err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
- return
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
}
- bufr := bufio.NewReader(netconn)
- bufw := bufio.NewWriter(netconn)
+ bufr := bufio.NewReader(tlsconn)
+ bufw := bufio.NewWriter(tlsconn)
u := url.URL{
Scheme: "http",
- Host: ctr.GatewayAddress,
+ Host: targetHost,
Path: "/ssh",
}
- bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n")
+ postform := url.Values{
+ "uuid": {opts.UUID},
+ "detach_keys": {opts.DetachKeys},
+ "login_username": {opts.LoginUsername},
+ "no_forward": {fmt.Sprintf("%v", opts.NoForward)},
+ }
+ postdata := postform.Encode()
+ bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
bufw.WriteString("Host: " + u.Host + "\r\n")
bufw.WriteString("Upgrade: ssh\r\n")
- bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
- bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
- bufw.WriteString("X-Arvados-Login-Username: " + opts.LoginUsername + "\r\n")
+ bufw.WriteString("Content-Type: application/x-www-form-urlencoded\r\n")
+ fmt.Fprintf(bufw, "Content-Length: %d\r\n", len(postdata))
bufw.WriteString("\r\n")
+ bufw.WriteString(postdata)
bufw.Flush()
- resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
+ resp, err := http.ReadResponse(bufr, &http.Request{Method: "POST"})
if err != nil {
- err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
- netconn.Close()
- return
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
}
- if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
- err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
- netconn.Close()
- return
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusSwitchingProtocols {
+ body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 1000))
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("unexpected status %s %q", resp.Status, body), http.StatusBadGateway)
}
if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
- err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
- netconn.Close()
- return
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
+ }
+ if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
}
if !ctr.InteractiveSessionStarted {
},
})
if err != nil {
- netconn.Close()
- return
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
}
}
- sshconn.Conn = netconn
+ sshconn.Conn = tlsconn
sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
sshconn.Logger = ctxlog.FromContext(ctx)
+ sshconn.Header = http.Header{"Upgrade": {"ssh"}}
+ return sshconn, nil
+}
+
+// ContainerGatewayTunnel sets up a tunnel enabling us (controller) to
+// connect to the caller's (crunch-run's) gateway server.
+func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, opts arvados.ContainerGatewayTunnelOptions) (resp arvados.ConnectionResponse, err error) {
+ h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
+ fmt.Fprint(h, opts.UUID)
+ authSecret := fmt.Sprintf("%x", h.Sum(nil))
+ if subtle.ConstantTimeCompare([]byte(authSecret), []byte(opts.AuthSecret)) != 1 {
+ ctxlog.FromContext(ctx).Info("received incorrect auth_secret")
+ return resp, httpserver.ErrorWithStatus(errors.New("authentication error"), http.StatusUnauthorized)
+ }
+
+ muxconn, clientconn := net.Pipe()
+ tunnel, err := yamux.Server(muxconn, nil)
+ if err != nil {
+ clientconn.Close()
+ return resp, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
+ }
+
+ conn.gwTunnelsLock.Lock()
+ if conn.gwTunnels == nil {
+ conn.gwTunnels = map[string]*yamux.Session{opts.UUID: tunnel}
+ } else {
+ conn.gwTunnels[opts.UUID] = tunnel
+ }
+ conn.gwTunnelsLock.Unlock()
+
+ go func() {
+ <-tunnel.CloseChan()
+ conn.gwTunnelsLock.Lock()
+ if conn.gwTunnels[opts.UUID] == tunnel {
+ delete(conn.gwTunnels, opts.UUID)
+ }
+ conn.gwTunnelsLock.Unlock()
+ }()
+
+ // Assuming we're acting as the backend of an http server,
+ // lib/controller/router will call resp's ServeHTTP handler,
+ // which upgrades the incoming http connection to a raw socket
+ // and connects it to our yamux.Server through our net.Pipe().
+ resp.Conn = clientconn
+ resp.Bufrw = &bufio.ReadWriter{Reader: bufio.NewReader(&bytes.Buffer{}), Writer: bufio.NewWriter(&bytes.Buffer{})}
+ resp.Logger = ctxlog.FromContext(ctx)
+ resp.Header = http.Header{"Upgrade": {"tunnel"}}
+ if u, ok := service.URLFromContext(ctx); ok {
+ resp.Header.Set("X-Arvados-Internal-Url", u.String())
+ } else if forceInternalURLForTest != nil {
+ resp.Header.Set("X-Arvados-Internal-Url", forceInternalURLForTest.String())
+ }
return
}