X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fc0445570096282a9f10b2883a6bbd6a30c7aae1..baef845de7afea10a7884e7daa2798113ec8d477:/lib/dispatchcloud/worker/pool.go

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 3abcba6c73..15b0dbcde5 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -106,6 +106,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
 		newExecutor:                    newExecutor,
 		cluster:                        cluster,
 		bootProbeCommand:               cluster.Containers.CloudVMs.BootProbeCommand,
+		instanceInitCommand:            cloud.InitCommand(cluster.Containers.CloudVMs.InstanceInitCommand),
 		runnerSource:                   cluster.Containers.CloudVMs.DeployRunnerBinary,
 		imageID:                        cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
 		instanceTypes:                  cluster.InstanceTypes,
@@ -149,6 +150,7 @@ type Pool struct {
 	newExecutor                    func(cloud.Instance) Executor
 	cluster                        *arvados.Cluster
 	bootProbeCommand               string
+	instanceInitCommand            cloud.InitCommand
 	runnerSource                   string
 	imageID                        cloud.ImageID
 	instanceTypes                  map[string]arvados.InstanceType
@@ -171,19 +173,20 @@ type Pool struct {
 	runnerArgs                     []string // extra args passed to crunch-run
 
 	// private state
-	subscribers  map[<-chan struct{}]chan<- struct{}
-	creating     map[string]createCall // unfinished (cloud.InstanceSet)Create calls (key is instance secret)
-	workers      map[cloud.InstanceID]*worker
-	loaded       bool                 // loaded list of instances from InstanceSet at least once
-	exited       map[string]time.Time // containers whose crunch-run proc has exited, but ForgetContainer has not been called
-	atQuotaUntil time.Time
-	atQuotaErr   cloud.QuotaError
-	stop         chan bool
-	mtx          sync.RWMutex
-	setupOnce    sync.Once
-	runnerData   []byte
-	runnerMD5    [md5.Size]byte
-	runnerCmd    string
+	subscribers                map[<-chan struct{}]chan<- struct{}
+	creating                   map[string]createCall // unfinished (cloud.InstanceSet)Create calls (key is instance secret)
+	workers                    map[cloud.InstanceID]*worker
+	loaded                     bool                 // loaded list of instances from InstanceSet at least once
+	exited                     map[string]time.Time // containers whose crunch-run proc has exited, but ForgetContainer has not been called
+	atQuotaUntilFewerInstances int
+	atQuotaUntil               time.Time
+	atQuotaErr                 cloud.QuotaError
+	stop                       chan bool
+	mtx                        sync.RWMutex
+	setupOnce                  sync.Once
+	runnerData                 []byte
+	runnerMD5                  [md5.Size]byte
+	runnerCmd                  string
 
 	mContainersRunning        prometheus.Gauge
 	mInstances                *prometheus.GaugeVec
@@ -197,6 +200,8 @@ type Pool struct {
 	mTimeFromShutdownToGone   prometheus.Summary
 	mTimeFromQueueToCrunchRun prometheus.Summary
 	mRunProbeDuration         *prometheus.SummaryVec
+	mProbeAgeMax              prometheus.Gauge
+	mProbeAgeMedian           prometheus.Gauge
 }
 
 type createCall struct {
@@ -318,6 +323,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 	wp.mtx.Lock()
 	defer wp.mtx.Unlock()
 	if time.Now().Before(wp.atQuotaUntil) ||
+		wp.atQuotaUntilFewerInstances > 0 ||
 		wp.instanceSet.throttleCreate.Error() != nil ||
 		(wp.maxInstances > 0 && wp.maxInstances <= len(wp.workers)+len(wp.creating)) {
 		return false
@@ -345,7 +351,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 			wp.tagKeyPrefix + tagKeyIdleBehavior:   string(IdleBehaviorRun),
 			wp.tagKeyPrefix + tagKeyInstanceSecret: secret,
 		}
-		initCmd := TagVerifier{nil, secret, nil}.InitCommand()
+		initCmd := TagVerifier{nil, secret, nil}.InitCommand() + "\n" + wp.instanceInitCommand
 		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
 		wp.mtx.Lock()
 		defer wp.mtx.Unlock()
@@ -356,8 +362,24 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 		if err != nil {
 			if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
 				wp.atQuotaErr = err
-				wp.atQuotaUntil = time.Now().Add(quotaErrorTTL)
-				time.AfterFunc(quotaErrorTTL, wp.notify)
+				n := len(wp.workers) + len(wp.creating) - 1
+				if n < 1 {
+					// Quota error with no
+					// instances running --
+					// nothing to do but wait
+					wp.atQuotaUntilFewerInstances = 0
+					wp.atQuotaUntil = time.Now().Add(quotaErrorTTL)
+					time.AfterFunc(quotaErrorTTL, wp.notify)
+					logger.WithField("atQuotaUntil", wp.atQuotaUntil).Info("quota error with 0 running -- waiting for quotaErrorTTL")
+				} else if n < wp.atQuotaUntilFewerInstances || wp.atQuotaUntilFewerInstances == 0 {
+					// Quota error with N
+					// instances running -- report
+					// AtQuota until some
+					// instances shut down
+					wp.atQuotaUntilFewerInstances = n
+					wp.atQuotaUntil = time.Time{}
+					logger.WithField("atQuotaUntilFewerInstances", n).Info("quota error -- waiting for next instance shutdown")
+				}
 			}
 			logger.WithError(err).Error("create failed")
 			wp.instanceSet.throttleCreate.CheckRateLimitError(err, wp.logger, "create instance", wp.notify)
@@ -377,7 +399,9 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 func (wp *Pool) AtQuota() bool {
 	wp.mtx.Lock()
 	defer wp.mtx.Unlock()
-	return time.Now().Before(wp.atQuotaUntil) || (wp.maxInstances > 0 && wp.maxInstances <= len(wp.workers)+len(wp.creating))
+	return wp.atQuotaUntilFewerInstances > 0 ||
+		time.Now().Before(wp.atQuotaUntil) ||
+		(wp.maxInstances > 0 && wp.maxInstances <= len(wp.workers)+len(wp.creating))
 }
 
 // SetIdleBehavior determines how the indicated instance will behave
@@ -397,10 +421,15 @@ func (wp *Pool) SetIdleBehavior(id cloud.InstanceID, idleBehavior IdleBehavior)
 func (wp *Pool) reportSSHConnected(inst cloud.Instance) {
 	wp.mtx.Lock()
 	defer wp.mtx.Unlock()
-	wkr := wp.workers[inst.ID()]
+	wkr, ok := wp.workers[inst.ID()]
+	if !ok {
+		// race: inst was removed from the pool
+		return
+	}
 	if wkr.state != StateBooting || !wkr.firstSSHConnection.IsZero() {
-		// the node is not in booting state (can happen if a-d-c is restarted) OR
-		// this is not the first SSH connection
+		// the node is not in booting state (can happen if
+		// a-d-c is restarted) OR this is not the first SSH
+		// connection
 		return
 	}
 
@@ -621,6 +650,20 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 		Help:      "Number of containers reported running by cloud VMs.",
 	})
 	reg.MustRegister(wp.mContainersRunning)
+	wp.mProbeAgeMax = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_max",
+		Help:      "Maximum number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMax)
+	wp.mProbeAgeMedian = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "probe_age_seconds_median",
+		Help:      "Median number of seconds since an instance's most recent successful probe.",
+	})
+	reg.MustRegister(wp.mProbeAgeMedian)
 	wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
@@ -733,6 +776,8 @@ func (wp *Pool) updateMetrics() {
 	cpu := map[string]int64{}
 	mem := map[string]int64{}
 	var running int64
+	now := time.Now()
+	var probed []time.Time
 	for _, wkr := range wp.workers {
 		var cat string
 		switch {
@@ -752,6 +797,7 @@ func (wp *Pool) updateMetrics() {
 		cpu[cat] += int64(wkr.instType.VCPUs)
 		mem[cat] += int64(wkr.instType.RAM)
 		running += int64(len(wkr.running) + len(wkr.starting))
+		probed = append(probed, wkr.probed)
 	}
 	for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
 		wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
@@ -768,6 +814,15 @@ func (wp *Pool) updateMetrics() {
 		wp.mInstances.WithLabelValues(k.cat, k.instType).Set(float64(v))
 	}
 	wp.mContainersRunning.Set(float64(running))
+
+	if len(probed) == 0 {
+		wp.mProbeAgeMax.Set(0)
+		wp.mProbeAgeMedian.Set(0)
+	} else {
+		sort.Slice(probed, func(i, j int) bool { return probed[i].Before(probed[j]) })
+		wp.mProbeAgeMax.Set(now.Sub(probed[0]).Seconds())
+		wp.mProbeAgeMedian.Set(now.Sub(probed[len(probed)/2]).Seconds())
+	}
 }
 
 func (wp *Pool) runProbes() {
@@ -873,6 +928,9 @@ func (wp *Pool) Instances() []InstanceView {
 // KillInstance destroys a cloud VM instance. It returns an error if
 // the given instance does not exist.
 func (wp *Pool) KillInstance(id cloud.InstanceID, reason string) error {
+	wp.setupOnce.Do(wp.setup)
+	wp.mtx.Lock()
+	defer wp.mtx.Unlock()
 	wkr, ok := wp.workers[id]
 	if !ok {
 		return errors.New("instance not found")
@@ -994,6 +1052,14 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 		notify = true
 	}
 
+	if wp.atQuotaUntilFewerInstances > len(wp.workers)+len(wp.creating) {
+		// After syncing, there are fewer instances (including
+		// pending creates) than there were last time we saw a
+		// quota error.  This might mean it's now possible to
+		// create new instances.  Reset our "at quota" state.
+		wp.atQuotaUntilFewerInstances = 0
+	}
+
 	if !wp.loaded {
 		notify = true
 		wp.loaded = true