From 30ca2a11cbe11e054ea60ed01c3e94d422dddac6 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Fri, 15 Feb 2019 16:15:27 -0500 Subject: [PATCH] 14807: Move secret-tag host key verify mechanism out of Azure driver. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/cloud/azure/azure.go | 73 ++----------------------- lib/cloud/azure/azure_test.go | 28 +++++----- lib/cloud/interfaces.go | 18 +++++- lib/dispatchcloud/test/stub_driver.go | 4 +- lib/dispatchcloud/worker/pool.go | 27 +++++---- lib/dispatchcloud/worker/verify.go | 56 +++++++++++++++++++ lib/dispatchcloud/worker/worker_test.go | 2 +- 7 files changed, 112 insertions(+), 96 deletions(-) create mode 100644 lib/dispatchcloud/worker/verify.go diff --git a/lib/cloud/azure/azure.go b/lib/cloud/azure/azure.go index d8d23d1bc3..8bf5c34783 100644 --- a/lib/cloud/azure/azure.go +++ b/lib/cloud/azure/azure.go @@ -50,6 +50,8 @@ type azureInstanceSetConfig struct { AdminUsername string } +const tagKeyInstanceSecret = "InstanceSecret" + type virtualMachinesClientWrapper interface { createOrUpdate(ctx context.Context, resourceGroupName string, @@ -312,15 +314,12 @@ func (az *azureInstanceSet) Create( instanceType arvados.InstanceType, imageID cloud.ImageID, newTags cloud.InstanceTags, + initCommand cloud.InitCommand, publicKey ssh.PublicKey) (cloud.Instance, error) { az.stopWg.Add(1) defer az.stopWg.Done() - if len(newTags["node-token"]) == 0 { - return nil, fmt.Errorf("Must provide tag 'node-token'") - } - name, err := randutil.String(15, "abcdefghijklmnopqrstuvwxyz0123456789") if err != nil { return nil, err @@ -337,8 +336,6 @@ func (az *azureInstanceSet) Create( tags["dispatch-"+k] = &newstr } - tags["dispatch-instance-type"] = &instanceType.Name - nicParameters := network.Interface{ Location: &az.azconfig.Location, Tags: tags, @@ -372,8 +369,7 @@ func (az *azureInstanceSet) Create( az.azconfig.BlobContainer, name) - customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh -echo '%s-%s' > '/home/%s/node-token'`, name, newTags["node-token"], az.azconfig.AdminUsername))) + customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n")) vmParameters := compute.VirtualMachine{ Location: &az.azconfig.Location, @@ -639,63 +635,6 @@ func (ai *azureInstance) RemoteUser() string { return ai.provider.azconfig.AdminUsername } -func (ai *azureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error { - ai.provider.stopWg.Add(1) - defer ai.provider.stopWg.Done() - - remoteFingerprint := ssh.FingerprintSHA256(receivedKey) - - tags := ai.Tags() - - tg := tags["ssh-pubkey-fingerprint"] - if tg != "" { - if remoteFingerprint == tg { - return nil - } - return fmt.Errorf("Key fingerprint did not match, expected %q got %q", tg, remoteFingerprint) - } - - nodetokenTag := tags["node-token"] - if nodetokenTag == "" { - return fmt.Errorf("Missing node token tag") - } - - sess, err := client.NewSession() - if err != nil { - return err - } - - nodetokenbytes, err := sess.Output("cat /home/" + ai.provider.azconfig.AdminUsername + "/node-token") - if err != nil { - return err - } - - nodetoken := strings.TrimSpace(string(nodetokenbytes)) - - expectedToken := fmt.Sprintf("%s-%s", *ai.vm.Name, nodetokenTag) - - if strings.TrimSpace(nodetoken) != expectedToken { - return fmt.Errorf("Node token did not match, expected %q got %q", expectedToken, nodetoken) - } - - sess, err = client.NewSession() - if err != nil { - return err - } - - keyfingerprintbytes, err := sess.Output("ssh-keygen -E sha256 -l -f /etc/ssh/ssh_host_rsa_key.pub") - if err != nil { - return err - } - - sp := strings.Split(string(keyfingerprintbytes), " ") - - if remoteFingerprint != sp[1] { - return fmt.Errorf("Key fingerprint did not match, expected %q got %q", sp[1], remoteFingerprint) - } - - tags["ssh-pubkey-fingerprint"] = sp[1] - delete(tags, "node-token") - ai.SetTags(tags) - return nil +func (ai *azureInstance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error { + return cloud.ErrNotImplemented } diff --git a/lib/cloud/azure/azure_test.go b/lib/cloud/azure/azure_test.go index 859765ccf0..61649c3980 100644 --- a/lib/cloud/azure/azure_test.go +++ b/lib/cloud/azure/azure_test.go @@ -51,7 +51,6 @@ import ( "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/to" - "github.com/jmcvetta/randutil" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" check "gopkg.in/check.v1" @@ -161,18 +160,16 @@ func (*AzureInstanceSetSuite) TestCreate(c *check.C) { pk, _, _, _, err := ssh.ParseAuthorizedKey(testKey) c.Assert(err, check.IsNil) - nodetoken, err := randutil.String(40, "abcdefghijklmnopqrstuvwxyz0123456789") - c.Assert(err, check.IsNil) - inst, err := ap.Create(cluster.InstanceTypes["tiny"], img, map[string]string{ - "node-token": nodetoken}, - pk) + "TestTagName": "test tag value", + }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk) c.Assert(err, check.IsNil) - tg := inst.Tags() - log.Printf("Result %v %v %v", inst.String(), inst.Address(), tg) + tags := inst.Tags() + c.Check(tags["TestTagName"], check.Equals, "test tag value") + c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags) } @@ -307,19 +304,22 @@ func (*AzureInstanceSetSuite) TestSSH(c *check.C) { c.Assert(err, check.IsNil) if len(l) > 0 { - sshclient, err := SetupSSHClient(c, l[0]) c.Assert(err, check.IsNil) + defer sshclient.Conn.Close() sess, err := sshclient.NewSession() c.Assert(err, check.IsNil) - - out, err := sess.Output("cat /home/crunch/node-token") + defer sess.Close() + _, err = sess.Output("find /var/run/test-file -maxdepth 0 -user root -perm 0600") c.Assert(err, check.IsNil) - log.Printf("%v", string(out)) - - sshclient.Conn.Close() + sess, err = sshclient.NewSession() + c.Assert(err, check.IsNil) + defer sess.Close() + out, err := sess.Output("sudo cat /var/run/test-file") + c.Assert(err, check.IsNil) + c.Check(string(out), check.Equals, "test-file-data") } } diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go index 3f03b650b4..792e737a91 100644 --- a/lib/cloud/interfaces.go +++ b/lib/cloud/interfaces.go @@ -6,6 +6,7 @@ package cloud import ( "encoding/json" + "errors" "io" "time" @@ -57,6 +58,8 @@ type Executor interface { Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error) } +var ErrNotImplemented = errors.New("not implemented") + // An ExecutorTarget is a remote command execution service. type ExecutorTarget interface { // SSH server hostname or IP address, or empty string if @@ -71,6 +74,9 @@ type ExecutorTarget interface { // VerifyHostKey can use it to make outgoing network // connections from the instance -- e.g., to use the cloud's // "this instance's metadata" API. + // + // Return ErrNotImplemented if no verification mechanism is + // available. VerifyHostKey(ssh.PublicKey, *ssh.Client) error } @@ -105,12 +111,18 @@ type Instance interface { // All public methods of an InstanceSet, and all public methods of the // instances it returns, are goroutine safe. type InstanceSet interface { - // Create a new instance. If supported by the driver, add the + // Create a new instance with the given type, image, and + // initial set of tags. If supported by the driver, add the // provided public key to /root/.ssh/authorized_keys. // + // The given InitCommand should be executed on the newly + // created instance. This is optional for a driver whose + // instances' VerifyHostKey() method never returns + // ErrNotImplemented. InitCommand will be under 1 KiB. + // // The returned error should implement RateLimitError and // QuotaError where applicable. - Create(arvados.InstanceType, ImageID, InstanceTags, ssh.PublicKey) (Instance, error) + Create(arvados.InstanceType, ImageID, InstanceTags, InitCommand, ssh.PublicKey) (Instance, error) // Return all instances, including ones that are booting or // shutting down. Optionally, filter out nodes that don't have @@ -128,6 +140,8 @@ type InstanceSet interface { Stop() } +type InitCommand string + // A Driver returns an InstanceSet that uses the given InstanceSetID // and driver-dependent configuration parameters. // diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go index d81382a301..c0d2e61fc2 100644 --- a/lib/dispatchcloud/test/stub_driver.go +++ b/lib/dispatchcloud/test/stub_driver.go @@ -99,7 +99,7 @@ type StubInstanceSet struct { allowInstancesCall time.Time } -func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, authKey ssh.PublicKey) (cloud.Instance, error) { +func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, cmd cloud.InitCommand, authKey ssh.PublicKey) (cloud.Instance, error) { if sis.driver.HoldCloudOps { sis.driver.holdCloudOps <- true } @@ -123,6 +123,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, id: cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())), tags: copyTags(tags), providerType: it.ProviderType, + initCommand: cmd, } svm.SSHService = SSHService{ HostKey: sis.driver.HostKey, @@ -184,6 +185,7 @@ type StubVM struct { sis *StubInstanceSet id cloud.InstanceID tags cloud.InstanceTags + initCommand cloud.InitCommand providerType string SSHService SSHService running map[string]bool diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index 6f7163c8ca..ce66625a29 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -22,9 +22,9 @@ import ( ) const ( - tagKeyInstanceType = "InstanceType" - tagKeyIdleBehavior = "IdleBehavior" - tagKeyNodeToken = "node-token" // deprecated, but required by Azure driver + tagKeyInstanceType = "InstanceType" + tagKeyIdleBehavior = "IdleBehavior" + tagKeyInstanceSecret = "InstanceSecret" ) // An InstanceView shows a worker's current state and recent activity. @@ -261,16 +261,18 @@ func (wp *Pool) Create(it arvados.InstanceType) bool { if time.Now().Before(wp.atQuotaUntil) || wp.throttleCreate.Error() != nil { return false } - tags := cloud.InstanceTags{ - tagKeyInstanceType: it.Name, - tagKeyIdleBehavior: string(IdleBehaviorRun), - tagKeyNodeToken: randomToken(), - } now := time.Now() wp.creating[it] = append(wp.creating[it], now) go func() { defer wp.notify() - inst, err := wp.instanceSet.Create(it, wp.imageID, tags, wp.installPublicKey) + secret := randomHex(instanceSecretLength) + tags := cloud.InstanceTags{ + tagKeyInstanceType: it.Name, + tagKeyIdleBehavior: string(IdleBehaviorRun), + tagKeyInstanceSecret: secret, + } + initCmd := cloud.InitCommand(fmt.Sprintf("umask 0177 && echo -n %q >%s", secret, instanceSecretFilename)) + inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey) wp.mtx.Lock() defer wp.mtx.Unlock() // Remove our timestamp marker from wp.creating @@ -326,6 +328,7 @@ func (wp *Pool) SetIdleBehavior(id cloud.InstanceID, idleBehavior IdleBehavior) // // Caller must have lock. func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) (*worker, bool) { + inst = tagVerifier{inst} id := inst.ID() if wkr := wp.workers[id]; wkr != nil { wkr.executor.SetTarget(inst) @@ -786,8 +789,10 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) { } } -func randomToken() string { - buf := make([]byte, 32) +// Return a random string of n hexadecimal digits (n*4 random bits). n +// must be even. +func randomHex(n int) string { + buf := make([]byte, n/2) _, err := rand.Read(buf) if err != nil { panic(err) diff --git a/lib/dispatchcloud/worker/verify.go b/lib/dispatchcloud/worker/verify.go new file mode 100644 index 0000000000..e22c85d009 --- /dev/null +++ b/lib/dispatchcloud/worker/verify.go @@ -0,0 +1,56 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package worker + +import ( + "bytes" + "errors" + "fmt" + + "git.curoverse.com/arvados.git/lib/cloud" + "golang.org/x/crypto/ssh" +) + +var ( + errBadInstanceSecret = errors.New("bad instance secret") + + // filename on instance, as given to shell (quoted accordingly) + instanceSecretFilename = "/var/run/arvados-instance-secret" + instanceSecretLength = 40 // hex digits +) + +type tagVerifier struct { + cloud.Instance +} + +func (tv tagVerifier) VerifyHostKey(pubKey ssh.PublicKey, client *ssh.Client) error { + expectSecret := tv.Instance.Tags()[tagKeyInstanceSecret] + if err := tv.Instance.VerifyHostKey(pubKey, client); err != cloud.ErrNotImplemented || expectSecret == "" { + // If the wrapped instance indicates it has a way to + // verify the key, return that decision. + return err + } + session, err := client.NewSession() + if err != nil { + return err + } + defer session.Close() + var stdout, stderr bytes.Buffer + session.Stdin = bytes.NewBuffer(nil) + session.Stdout = &stdout + session.Stderr = &stderr + cmd := fmt.Sprintf("cat %s", instanceSecretFilename) + if u := tv.RemoteUser(); u != "root" { + cmd = "sudo " + cmd + } + err = session.Run(cmd) + if err != nil { + return err + } + if stdout.String() != expectSecret { + return errBadInstanceSecret + } + return nil +} diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go index 2eb5255b87..c5b09359fb 100644 --- a/lib/dispatchcloud/worker/worker_test.go +++ b/lib/dispatchcloud/worker/worker_test.go @@ -26,7 +26,7 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) { is, err := (&test.StubDriver{}).InstanceSet(nil, "", logger) c.Assert(err, check.IsNil) - inst, err := is.Create(arvados.InstanceType{}, "", nil, nil) + inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil) c.Assert(err, check.IsNil) type trialT struct { -- 2.39.5