From: Tom Clegg Date: Thu, 17 Mar 2022 05:03:28 +0000 (-0400) Subject: 16552: Option to get TLS certificates automatically from LE. X-Git-Tag: 2.5.0~116^2~28 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/5722e7f91d3ab4df898dec0d301c0653ac7995b3 16552: Option to get TLS certificates automatically from LE. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/cmd/arvados-package/install.go b/cmd/arvados-package/install.go index d8dbdcc4a0..f923d5ef8e 100644 --- a/cmd/arvados-package/install.go +++ b/cmd/arvados-package/install.go @@ -92,6 +92,7 @@ rm /etc/apt/sources.list.d/arvados-local.list if opts.Live != "" { cmd.Args = append(cmd.Args, "--env=domain="+opts.Live, + "--env=initargs=-tls=acme", "--env=bootargs=", "--publish=:443:443", "--publish=:4440-4460:4440-4460", @@ -101,6 +102,7 @@ rm /etc/apt/sources.list.d/arvados-local.list } else { cmd.Args = append(cmd.Args, "--env=domain=localhost", + "--env=initargs=-tls=insecure", "--env=bootargs=-shutdown") } cmd.Args = append(cmd.Args, @@ -122,7 +124,7 @@ eatmydata apt-get install --reinstall -y --no-install-recommends arvados-server- SUDO_FORCE_REMOVE=yes apt-get autoremove -y /etc/init.d/postgresql start -arvados-server init -cluster-id x1234 -domain=$domain -login=test -insecure +arvados-server init -cluster-id x1234 -domain=$domain -login=test $initargs exec arvados-server boot -listen-host=0.0.0.0 -no-workbench2=false $bootargs `) cmd.Stdout = stdout diff --git a/lib/boot/cert.go b/lib/boot/cert.go index 916f9f53b2..10fd0aa9f6 100644 --- a/lib/boot/cert.go +++ b/lib/boot/cert.go @@ -6,19 +6,29 @@ package boot import ( "context" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "encoding/pem" + "errors" "fmt" "io/ioutil" "net" + "net/http" + "net/url" "os" "path/filepath" + "strings" + "time" + + "golang.org/x/crypto/acme" + "golang.org/x/crypto/acme/autocert" ) -// Create a root CA key and use it to make a new server -// certificate+key pair. -// -// In future we'll make one root CA key per host instead of one per -// cluster, so it only needs to be imported to a browser once for -// ongoing dev/test usage. +const stagingDirectoryURL = "https://acme-staging-v02.api.letsencrypt.org/directory" + +var errInvalidHost = errors.New("unrecognized target host in incoming TLS request") + type createCertificates struct{} func (createCertificates) String() string { @@ -26,8 +36,180 @@ func (createCertificates) String() string { } func (createCertificates) Run(ctx context.Context, fail func(error), super *Supervisor) error { + if super.cluster.TLS.Automatic { + return bootAutoCert(ctx, fail, super) + } else if super.cluster.TLS.Key == "" && super.cluster.TLS.Certificate == "" { + return createSelfSignedCert(ctx, fail, super) + } else { + return nil + } +} + +// bootAutoCert uses Let's Encrypt to get certificates for all the +// domains appearing in ExternalURLs, writes them to files where Nginx +// can load them, and updates super.cluster.TLS fields (Key and +// Certificiate) to point to those files. +// +// It also runs a background task to keep the files up to date. +// +// After bootAutoCert returns, other service components will get the +// certificates they need by reading these files or by using a +// read-only autocert cache. +// +// Currently this only works when port 80 of every ExternalURL domain +// is routed to this host, i.e., on a single-node cluster. Wildcard +// domains [for WebDAV] are not supported. +func bootAutoCert(ctx context.Context, fail func(error), super *Supervisor) error { + hosts := map[string]bool{} + for _, svc := range super.cluster.Services.Map() { + u := url.URL(svc.ExternalURL) + if u.Scheme == "https" || u.Scheme == "wss" { + hosts[strings.ToLower(u.Hostname())] = true + } + } + mgr := &autocert.Manager{ + Cache: autocert.DirCache(super.tempdir + "/autocert"), + Prompt: autocert.AcceptTOS, + HostPolicy: func(ctx context.Context, host string) error { + if hosts[strings.ToLower(host)] { + return nil + } else { + return errInvalidHost + } + }, + } + if super.cluster.TLS.Staging { + mgr.Client = &acme.Client{DirectoryURL: stagingDirectoryURL} + } + go func() { + err := http.ListenAndServe(":80", mgr.HTTPHandler(nil)) + fail(fmt.Errorf("autocert http-01 challenge handler stopped: %w", err)) + }() + u := url.URL(super.cluster.Services.Controller.ExternalURL) + extHost := u.Hostname() + update := func() error { + for h := range hosts { + cert, err := mgr.GetCertificate(&tls.ClientHelloInfo{ServerName: h}) + if err != nil { + return err + } + if h == extHost { + err = writeCert(super.tempdir, "server.key", "server.crt", cert) + if err != nil { + return err + } + } + } + return nil + } + err := update() + if err != nil { + return err + } + go func() { + for range time.NewTicker(time.Hour).C { + err := update() + if err != nil { + super.logger.WithError(err).Error("error getting certificate from autocert") + } + } + }() + super.cluster.TLS.Key = "file://" + super.tempdir + "/server.key" + super.cluster.TLS.Certificate = "file://" + super.tempdir + "/server.crt" + return nil +} + +// Save cert chain and key in a format Nginx can read. +func writeCert(outdir, keyfile, certfile string, cert *tls.Certificate) error { + keytmp, err := os.CreateTemp(outdir, keyfile+".tmp.*") + if err != nil { + return err + } + defer keytmp.Close() + defer os.Remove(keytmp.Name()) + + certtmp, err := os.CreateTemp(outdir, certfile+".tmp.*") + if err != nil { + return err + } + defer certtmp.Close() + defer os.Remove(certtmp.Name()) + + switch privkey := cert.PrivateKey.(type) { + case *rsa.PrivateKey: + err = pem.Encode(keytmp, &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(privkey), + }) + if err != nil { + return err + } + default: + buf, err := x509.MarshalPKCS8PrivateKey(privkey) + if err != nil { + return err + } + err = pem.Encode(keytmp, &pem.Block{ + Type: "PRIVATE KEY", + Bytes: buf, + }) + if err != nil { + return err + } + } + err = keytmp.Close() + if err != nil { + return err + } + + for _, cert := range cert.Certificate { + err = pem.Encode(certtmp, &pem.Block{ + Type: "CERTIFICATE", + Bytes: cert, + }) + if err != nil { + return err + } + } + err = certtmp.Close() + if err != nil { + return err + } + + err = os.Rename(keytmp.Name(), filepath.Join(outdir, keyfile)) + if err != nil { + return err + } + err = os.Rename(certtmp.Name(), filepath.Join(outdir, certfile)) + if err != nil { + return err + } + return nil +} + +// Create a root CA key and use it to make a new server +// certificate+key pair. +// +// In future we'll make one root CA key per host instead of one per +// cluster, so it only needs to be imported to a browser once for +// ongoing dev/test usage. +func createSelfSignedCert(ctx context.Context, fail func(error), super *Supervisor) error { + san := "DNS:localhost,DNS:localhost.localdomain" + if net.ParseIP(super.ListenHost) != nil { + san += fmt.Sprintf(",IP:%s", super.ListenHost) + } else { + san += fmt.Sprintf(",DNS:%s", super.ListenHost) + } + hostname, err := os.Hostname() + if err != nil { + return fmt.Errorf("hostname: %w", err) + } + if hostname != super.ListenHost { + san += ",DNS:" + hostname + } + // Generate root key - err := super.RunProgram(ctx, super.tempdir, runOptions{}, "openssl", "genrsa", "-out", "rootCA.key", "4096") + err = super.RunProgram(ctx, super.tempdir, runOptions{}, "openssl", "genrsa", "-out", "rootCA.key", "4096") if err != nil { return err } @@ -46,18 +228,6 @@ func (createCertificates) Run(ctx context.Context, fail func(error), super *Supe if err != nil { return err } - hostname, err := os.Hostname() - if err != nil { - return fmt.Errorf("hostname: %w", err) - } - san := "DNS:localhost,DNS:localhost.localdomain,DNS:" + hostname - if super.ListenHost == hostname || super.ListenHost == "localhost" { - // already have it - } else if net.ParseIP(super.ListenHost) != nil { - san += fmt.Sprintf(",IP:%s", super.ListenHost) - } else { - san += fmt.Sprintf(",DNS:%s", super.ListenHost) - } conf := append(defaultconf, []byte(fmt.Sprintf("\n[SAN]\nsubjectAltName=%s\n", san))...) err = ioutil.WriteFile(filepath.Join(super.tempdir, "server.cfg"), conf, 0644) if err != nil { @@ -73,5 +243,7 @@ func (createCertificates) Run(ctx context.Context, fail func(error), super *Supe if err != nil { return err } + super.cluster.TLS.Key = "file://" + super.tempdir + "/server.key" + super.cluster.TLS.Certificate = "file://" + super.tempdir + "/server.crt" return nil } diff --git a/lib/boot/nginx.go b/lib/boot/nginx.go index e67bc1d900..48d3bba474 100644 --- a/lib/boot/nginx.go +++ b/lib/boot/nginx.go @@ -14,6 +14,7 @@ import ( "os/exec" "path/filepath" "regexp" + "strings" "git.arvados.org/arvados.git/sdk/go/arvados" ) @@ -42,7 +43,10 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er } u := url.URL(super.cluster.Services.Controller.ExternalURL) ctrlHost := u.Hostname() - if f, err := os.Open("/var/lib/acme/live/" + ctrlHost + "/privkey"); err == nil { + if strings.HasPrefix(super.cluster.TLS.Certificate, "file:/") && strings.HasPrefix(super.cluster.TLS.Key, "file:/") { + vars["SSLCERT"] = filepath.Clean(super.cluster.TLS.Certificate[5:]) + vars["SSLKEY"] = filepath.Clean(super.cluster.TLS.Key[5:]) + } else if f, err := os.Open("/var/lib/acme/live/" + ctrlHost + "/privkey"); err == nil { f.Close() vars["SSLCERT"] = "/var/lib/acme/live/" + ctrlHost + "/cert" vars["SSLKEY"] = "/var/lib/acme/live/" + ctrlHost + "/privkey" diff --git a/lib/boot/service.go b/lib/boot/service.go index 090e852446..b27a746228 100644 --- a/lib/boot/service.go +++ b/lib/boot/service.go @@ -35,6 +35,7 @@ func (runner runServiceCommand) Run(ctx context.Context, fail func(error), super if err != nil { return err } + super.wait(ctx, createCertificates{}) super.wait(ctx, runner.depends...) for u := range runner.svc.InternalURLs { u := u @@ -82,6 +83,7 @@ func (runner runGoProgram) Run(ctx context.Context, fail func(error), super *Sup return err } + super.wait(ctx, createCertificates{}) super.wait(ctx, runner.depends...) for u := range runner.svc.InternalURLs { u := u diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 472a22c6b2..29d9d9cc41 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -900,10 +900,23 @@ Clusters: Repositories: /var/lib/arvados/git/repositories TLS: + # Use "file:///var/lib/acme/live/example.com/cert" and ".../key" + # to load externally managed certificates. Certificate: "" Key: "" + + # Accept invalid certificates when connecting to servers. Never + # use this in production. Insecure: false + # Agree to Let's Encrypt terms of service and obtain + # certificates automatically for ExternalURL domains. + Automatic: false + + # Use Let's Encrypt staging environment instead of production + # environment. + Staging: false + Containers: # List of supported Docker Registry image formats that compute nodes # are able to use. `arv keep docker` will error out if a user tries diff --git a/lib/install/init.go b/lib/install/init.go index d2fed1dd7a..6954a60d87 100644 --- a/lib/install/init.go +++ b/lib/install/init.go @@ -34,7 +34,7 @@ type initCommand struct { Domain string PostgreSQLPassword string Login string - Insecure bool + TLS string } func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { @@ -62,7 +62,7 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read flags.StringVar(&initcmd.ClusterID, "cluster-id", "", "cluster `id`, like x1234 for a dev cluster") flags.StringVar(&initcmd.Domain, "domain", hostname, "cluster public DNS `name`, like x1234.arvadosapi.com") flags.StringVar(&initcmd.Login, "login", "", "login `backend`: test, pam, or ''") - flags.BoolVar(&initcmd.Insecure, "insecure", false, "accept invalid TLS certificates and configure TrustAllContent (do not use in production!)") + flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, auto, insecure, or none") if ok, code := cmd.ParseFlags(flags, prog, args, "", stderr); !ok { return code } else if *versionFlag { @@ -113,8 +113,8 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read "http://0.0.0.0:9001/": {} Websocket: InternalURLs: - "http://0.0.0.0:9004/": {} - ExternalURL: {{printf "%q" ( print "wss://" .Domain ":4444/websocket" ) }} + "http://0.0.0.0:8005/": {} + ExternalURL: {{printf "%q" ( print "wss://" .Domain ":4436/" ) }} Keepbalance: InternalURLs: "http://0.0.0.0:9019/": {} @@ -155,7 +155,7 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read "http://0.0.0.0:9011/": {} Collections: BlobSigningKey: {{printf "%q" ( .RandomHex 50 )}} - {{if .Insecure}} + {{if eq .TLS "insecure"}} TrustAllContent: true {{end}} Containers: @@ -171,10 +171,17 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read user: arvados password: {{printf "%q" .PostgreSQLPassword}} SystemRootToken: {{printf "%q" ( .RandomHex 50 )}} - {{if .Insecure}} TLS: + {{if eq .TLS "insecure"}} Insecure: true - {{end}} + {{else if eq .TLS "auto"}} + Automatic: true + {{else if eq .TLS "acme"}} + Certificate: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/cert")}} + Key: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/privkey")}} + {{else}} + {} + {{end}} Volumes: {{.ClusterID}}-nyw5e-000000000000000: Driver: Directory diff --git a/lib/service/cmd.go b/lib/service/cmd.go index 4b640c4e47..20441c2a6c 100644 --- a/lib/service/cmd.go +++ b/lib/service/cmd.go @@ -159,7 +159,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout Addr: listenURL.Host, } if listenURL.Scheme == "https" || listenURL.Scheme == "wss" { - tlsconfig, err := tlsConfigWithCertUpdater(cluster, logger) + tlsconfig, err := makeTLSConfig(cluster, logger) if err != nil { logger.WithError(err).Errorf("cannot start %s service on %s", c.svcName, listenURL.String()) return 1 diff --git a/lib/service/tls.go b/lib/service/tls.go index c6307b76ab..21cd3e2ac2 100644 --- a/lib/service/tls.go +++ b/lib/service/tls.go @@ -5,6 +5,7 @@ package service import ( + "context" "crypto/tls" "errors" "fmt" @@ -12,18 +13,69 @@ import ( "os/signal" "strings" "syscall" + "time" "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/sirupsen/logrus" + "golang.org/x/crypto/acme/autocert" ) -func tlsConfigWithCertUpdater(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) { +func makeTLSConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) { + if cluster.TLS.Automatic { + return makeAutocertConfig(cluster, logger) + } else { + return makeFileLoaderConfig(cluster, logger) + } +} + +var errCertUnavailable = errors.New("certificate unavailable, waiting for supervisor to update cache") + +type readonlyDirCache autocert.DirCache + +func (c readonlyDirCache) Get(ctx context.Context, name string) ([]byte, error) { + data, err := autocert.DirCache(c).Get(ctx, name) + if err != nil { + // Returning an error other than autocert.ErrCacheMiss + // causes GetCertificate() to fail early instead of + // trying to obtain a certificate itself (which + // wouldn't work because we're not in a position to + // answer challenges). + return nil, errCertUnavailable + } + return data, nil +} + +func (c readonlyDirCache) Put(ctx context.Context, name string, data []byte) error { + return fmt.Errorf("(bug?) (readonlyDirCache)Put(%s) called", name) +} + +func (c readonlyDirCache) Delete(ctx context.Context, name string) error { + return nil +} + +func makeAutocertConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) { + mgr := &autocert.Manager{ + Cache: readonlyDirCache("/var/lib/arvados/tmp/autocert"), + Prompt: autocert.AcceptTOS, + // HostPolicy accepts all names because this Manager + // doesn't request certs. Whoever writes certs to our + // cache is effectively responsible for HostPolicy. + HostPolicy: func(ctx context.Context, host string) error { return nil }, + // Keep using whatever's in the cache as long as + // possible. Assume some other process (see lib/boot) + // handles renewals. + RenewBefore: time.Second, + } + return mgr.TLSConfig(), nil +} + +func makeFileLoaderConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) { currentCert := make(chan *tls.Certificate, 1) loaded := false - key, cert := cluster.TLS.Key, cluster.TLS.Certificate + key := strings.TrimPrefix(cluster.TLS.Key, "file://") + cert := strings.TrimPrefix(cluster.TLS.Certificate, "file://") if !strings.HasPrefix(key, "file://") || !strings.HasPrefix(cert, "file://") { - return nil, errors.New("cannot use TLS certificate: TLS.Key and TLS.Certificate must be specified with a 'file://' prefix") } key, cert = key[7:], cert[7:] @@ -45,9 +97,14 @@ func tlsConfigWithCertUpdater(cluster *arvados.Cluster, logger logrus.FieldLogge return nil, err } + reload := make(chan os.Signal, 1) + signal.Notify(reload, syscall.SIGHUP) + go func() { + for range time.NewTicker(time.Hour).C { + reload <- nil + } + }() go func() { - reload := make(chan os.Signal, 1) - signal.Notify(reload, syscall.SIGHUP) for range reload { err := update() if err != nil { diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index c90551a610..d9aa92b65d 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -227,6 +227,8 @@ type Cluster struct { Certificate string Key string Insecure bool + Automatic bool + Staging bool } Users struct { ActivatedUsersAreVisibleToOthers bool