17170: Improve error messages.
[arvados.git] / lib / controller / localdb / container_gateway.go
index 31d44e5e0d88e4f1cab146f28df5384f5c0ff15c..807995b3c5a2dc0f9f2eb4797f4ee1df2d071870 100644 (file)
@@ -9,14 +9,16 @@ import (
        "context"
        "crypto/hmac"
        "crypto/sha256"
+       "crypto/tls"
+       "crypto/x509"
        "errors"
        "fmt"
-       "net"
        "net/http"
        "net/url"
        "strings"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
 )
@@ -28,28 +30,92 @@ import (
 // If the returned error is nil, the caller is responsible for closing
 // sshconn.Conn.
 func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+       user, err := conn.railsProxy.UserGetCurrent(ctx, arvados.GetOptions{})
+       if err != nil {
+               return
+       }
        ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
        if err != nil {
                return
        }
-       if ctr.GatewayAddress == "" || ctr.State != arvados.ContainerStateRunning {
-               err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusBadGateway)
+
+       ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
+       crs, err := conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
+       if err != nil {
+               return
+       }
+       for _, cr := range crs.Items {
+               if cr.ModifiedByUserUUID != user.UUID {
+                       err = httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
+                       return
+               }
+       }
+       if crs.ItemsAvailable != len(crs.Items) {
+               err = httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
+               return
+       }
+
+       switch ctr.State {
+       case arvados.ContainerStateQueued, arvados.ContainerStateLocked:
+               err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusServiceUnavailable)
+               return
+       case arvados.ContainerStateRunning:
+               if ctr.GatewayAddress == "" {
+                       err = httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available"), http.StatusServiceUnavailable)
+                       return
+               }
+       default:
+               err = httpserver.ErrorWithStatus(fmt.Errorf("gateway is not available, container is %s", strings.ToLower(string(ctr.State))), http.StatusGone)
                return
        }
-       netconn, err := net.Dial("tcp", ctr.GatewayAddress)
+       // crunch-run uses a self-signed / unverifiable TLS
+       // certificate, so we use the following scheme to ensure we're
+       // not talking to a MITM.
+       //
+       // 1. Compute ctrKey = HMAC-SHA256(sysRootToken,ctrUUID) --
+       // this will be the same ctrKey that a-d-c supplied to
+       // crunch-run in the GatewayAuthSecret env var.
+       //
+       // 2. Compute requestAuth = HMAC-SHA256(ctrKey,serverCert) and
+       // send it to crunch-run as the X-Arvados-Authorization
+       // header, proving that we know ctrKey. (Note a MITM cannot
+       // replay the proof to a real crunch-run server, because the
+       // real crunch-run server would have a different cert.)
+       //
+       // 3. Compute respondAuth = HMAC-SHA256(ctrKey,requestAuth)
+       // and ensure the server returns it in the
+       // X-Arvados-Authorization-Response header, proving that the
+       // server knows ctrKey.
+       var requestAuth, respondAuth string
+       netconn, err := tls.Dial("tcp", ctr.GatewayAddress, &tls.Config{
+               InsecureSkipVerify: true,
+               VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
+                       if len(rawCerts) == 0 {
+                               return errors.New("no certificate received, cannot compute authorization header")
+                       }
+                       h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
+                       fmt.Fprint(h, opts.UUID)
+                       authKey := fmt.Sprintf("%x", h.Sum(nil))
+                       h = hmac.New(sha256.New, []byte(authKey))
+                       h.Write(rawCerts[0])
+                       requestAuth = fmt.Sprintf("%x", h.Sum(nil))
+                       h.Reset()
+                       h.Write([]byte(requestAuth))
+                       respondAuth = fmt.Sprintf("%x", h.Sum(nil))
+                       return nil
+               },
+       })
        if err != nil {
+               err = httpserver.ErrorWithStatus(err, http.StatusBadGateway)
+               return
+       }
+       if respondAuth == "" {
+               err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
                return
        }
        bufr := bufio.NewReader(netconn)
        bufw := bufio.NewWriter(netconn)
 
-       // Note this auth header does not protect from replay/mitm
-       // attacks (TODO: use TLS for that). It only authenticates us
-       // to crunch-run.
-       h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
-       fmt.Fprint(h, "%s", opts.UUID)
-       auth := fmt.Sprintf("%x", h.Sum(nil))
-
        u := url.URL{
                Scheme: "http",
                Host:   ctr.GatewayAddress,
@@ -59,14 +125,19 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
        bufw.WriteString("Host: " + u.Host + "\r\n")
        bufw.WriteString("Upgrade: ssh\r\n")
        bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
-       bufw.WriteString("X-Arvados-Authorization: " + auth + "\r\n")
+       bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
        bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
        bufw.WriteString("X-Arvados-Login-Username: " + opts.LoginUsername + "\r\n")
        bufw.WriteString("\r\n")
        bufw.Flush()
        resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
        if err != nil {
-               err = fmt.Errorf("error reading http response from gateway: %w", err)
+               err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
+               netconn.Close()
+               return
+       }
+       if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
+               err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
                netconn.Close()
                return
        }