mInstancesPrice *prometheus.GaugeVec
mVCPUs *prometheus.GaugeVec
mMemory *prometheus.GaugeVec
+ mDisappearances *prometheus.CounterVec
}
type createCall struct {
wp.tagKeyPrefix + tagKeyIdleBehavior: string(IdleBehaviorRun),
wp.tagKeyPrefix + tagKeyInstanceSecret: secret,
}
- initCmd := cloud.InitCommand(fmt.Sprintf("umask 0177 && echo -n %q >%s", secret, instanceSecretFilename))
+ initCmd := TagVerifier{nil, secret}.InitCommand()
inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
wp.mtx.Lock()
defer wp.mtx.Unlock()
// Caller must have lock.
func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType) (*worker, bool) {
secret := inst.Tags()[wp.tagKeyPrefix+tagKeyInstanceSecret]
- inst = tagVerifier{inst, secret}
+ inst = TagVerifier{inst, secret}
id := inst.ID()
if wkr := wp.workers[id]; wkr != nil {
wkr.executor.SetTarget(inst)
Help: "Total memory on all cloud VMs.",
}, []string{"category"})
reg.MustRegister(wp.mMemory)
+ wp.mDisappearances = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "instances_disappeared",
+ Help: "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
+ }, []string{"state"})
+ for _, v := range stateString {
+ wp.mDisappearances.WithLabelValues(v).Add(0)
+ }
+ reg.MustRegister(wp.mDisappearances)
}
func (wp *Pool) runMetrics() {
"WorkerState": wkr.state,
})
logger.Info("instance disappeared in cloud")
+ if wp.mDisappearances != nil {
+ wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc()
+ }
delete(wp.workers, id)
go wkr.Close()
notify = true