X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7109ea1b2a49bc7fdbdbfd2302eb2457750ce5cd..f30c8ed35e3e1ad7cb3cb51fc6d83f56a04ae8de:/lib/dispatchcloud/driver.go diff --git a/lib/dispatchcloud/driver.go b/lib/dispatchcloud/driver.go index 36b8e80082..fe498d0484 100644 --- a/lib/dispatchcloud/driver.go +++ b/lib/dispatchcloud/driver.go @@ -8,25 +8,31 @@ import ( "fmt" "time" - "git.curoverse.com/arvados.git/lib/cloud" - "git.curoverse.com/arvados.git/lib/cloud/azure" - "git.curoverse.com/arvados.git/lib/cloud/ec2" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/cloud" + "git.arvados.org/arvados.git/lib/cloud/azure" + "git.arvados.org/arvados.git/lib/cloud/ec2" + "git.arvados.org/arvados.git/sdk/go/arvados" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" ) -var drivers = map[string]cloud.Driver{ +// Drivers is a map of available cloud drivers. +// Clusters.*.Containers.CloudVMs.Driver configuration values +// correspond to keys in this map. +var Drivers = map[string]cloud.Driver{ "azure": azure.Driver, "ec2": ec2.Driver, } -func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) { - driver, ok := drivers[cluster.Containers.CloudVMs.Driver] +func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) { + driver, ok := Drivers[cluster.Containers.CloudVMs.Driver] if !ok { return nil, fmt.Errorf("unsupported cloud driver %q", cluster.Containers.CloudVMs.Driver) } - is, err := driver.InstanceSet(cluster.Containers.CloudVMs.DriverParameters, setID, logger) + sharedResourceTags := cloud.SharedResourceTags(cluster.Containers.CloudVMs.ResourceTags) + is, err := driver.InstanceSet(cluster.Containers.CloudVMs.DriverParameters, setID, sharedResourceTags, logger) + is = newInstrumentedInstanceSet(is, reg) if maxops := cluster.Containers.CloudVMs.MaxCloudOpsPerSecond; maxops > 0 { is = rateLimitedInstanceSet{ InstanceSet: is, @@ -82,7 +88,7 @@ func (is defaultTaggingInstanceSet) Create(it arvados.InstanceType, image cloud. return is.InstanceSet.Create(it, image, allTags, init, pk) } -// Filters the instances returned by the wrapped InstanceSet's +// Filter the instances returned by the wrapped InstanceSet's // Instances() method (in case the wrapped InstanceSet didn't do this // itself). type filteringInstanceSet struct { @@ -112,3 +118,68 @@ nextInstance: }).WithError(err).Debugf("filteringInstanceSet returning instances") return returning, err } + +func newInstrumentedInstanceSet(is cloud.InstanceSet, reg *prometheus.Registry) cloud.InstanceSet { + cv := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "driver_operations", + Help: "Number of instance-create/destroy/list operations performed via cloud driver.", + }, []string{"operation", "error"}) + + // Create all counters, so they are reported with zero values + // (instead of being missing) until they are incremented. + for _, op := range []string{"Create", "List", "Destroy", "SetTags"} { + for _, error := range []string{"0", "1"} { + cv.WithLabelValues(op, error).Add(0) + } + } + + reg.MustRegister(cv) + return instrumentedInstanceSet{is, cv} +} + +type instrumentedInstanceSet struct { + cloud.InstanceSet + cv *prometheus.CounterVec +} + +func (is instrumentedInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, init cloud.InitCommand, pk ssh.PublicKey) (cloud.Instance, error) { + inst, err := is.InstanceSet.Create(it, image, tags, init, pk) + is.cv.WithLabelValues("Create", boolLabelValue(err != nil)).Inc() + return instrumentedInstance{inst, is.cv}, err +} + +func (is instrumentedInstanceSet) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) { + instances, err := is.InstanceSet.Instances(tags) + is.cv.WithLabelValues("List", boolLabelValue(err != nil)).Inc() + var instrumented []cloud.Instance + for _, i := range instances { + instrumented = append(instrumented, instrumentedInstance{i, is.cv}) + } + return instrumented, err +} + +type instrumentedInstance struct { + cloud.Instance + cv *prometheus.CounterVec +} + +func (inst instrumentedInstance) Destroy() error { + err := inst.Instance.Destroy() + inst.cv.WithLabelValues("Destroy", boolLabelValue(err != nil)).Inc() + return err +} + +func (inst instrumentedInstance) SetTags(tags cloud.InstanceTags) error { + err := inst.Instance.SetTags(tags) + inst.cv.WithLabelValues("SetTags", boolLabelValue(err != nil)).Inc() + return err +} + +func boolLabelValue(v bool) string { + if v { + return "1" + } + return "0" +}