loaded bool // loaded list of instances from InstanceSet at least once
exited map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
atQuotaUntil time.Time
loaded bool // loaded list of instances from InstanceSet at least once
exited map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
atQuotaUntil time.Time
- state State
- instance cloud.Instance
- executor Executor
- instType arvados.InstanceType
- vcpus int64
- memory int64
- booted bool
- probed time.Time
- updated time.Time
- busy time.Time
- unallocated time.Time
- lastUUID string
- running map[string]struct{}
- starting map[string]struct{}
- probing chan struct{}
+ state State
+ instance cloud.Instance
+ executor Executor
+ instType arvados.InstanceType
+ vcpus int64
+ memory int64
+ probed time.Time
+ updated time.Time
+ busy time.Time
+ lastUUID string
+ running map[string]struct{} // remember to update state idle<->running when this changes
+ starting map[string]struct{} // remember to update state idle<->running when this changes
+ probing chan struct{}
- executor: wp.newExecutor(inst),
- state: initialState,
- instance: inst,
- instType: it,
- probed: now,
- busy: now,
- updated: now,
- unallocated: now,
- running: make(map[string]struct{}),
- starting: make(map[string]struct{}),
- probing: make(chan struct{}, 1),
+ executor: wp.newExecutor(inst),
+ state: initialState,
+ instance: inst,
+ instType: it,
+ probed: now,
+ busy: now,
+ updated: now,
+ running: make(map[string]struct{}),
+ starting: make(map[string]struct{}),
+ probing: make(chan struct{}, 1),
defer wp.mtx.Unlock()
logger := wp.logger.WithField("InstanceType", it.Name)
logger.Info("shutdown requested")
defer wp.mtx.Unlock()
logger := wp.logger.WithField("InstanceType", it.Name)
logger.Info("shutdown requested")
- if wkr.state != tryState || len(wkr.running)+len(wkr.starting) > 0 {
- continue
+ if wkr.state == tryState && wkr.instType == it {
+ logger = logger.WithField("Instance", wkr.instance)
+ logger.Info("shutting down")
+ wp.shutdown(wkr, logger)
+ return true
- if wkr.instType != it {
- continue
- }
- logger = logger.WithField("Instance", wkr.instance)
- logger.Info("shutting down")
- wp.shutdown(wkr, logger)
- return true
logger = logger.WithField("Instance", wkr.instance)
logger.Debug("starting container")
wkr.starting[ctr.UUID] = struct{}{}
logger = logger.WithField("Instance", wkr.instance)
logger.Debug("starting container")
wkr.starting[ctr.UUID] = struct{}{}
go func() {
stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
wp.mtx.Lock()
go func() {
stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
wp.mtx.Lock()
// KillContainer kills the crunch-run process for the given container
// UUID, if it's running on any worker.
// KillContainer kills the crunch-run process for the given container
// UUID, if it's running on any worker.
defer wp.mtx.Unlock()
if _, ok := wkr.running[uuid]; ok {
delete(wkr.running, uuid)
defer wp.mtx.Unlock()
if _, ok := wkr.running[uuid]; ok {
delete(wkr.running, uuid)
ArvadosInstanceType: w.instType.Name,
ProviderInstanceType: w.instType.ProviderType,
LastContainerUUID: w.lastUUID,
ArvadosInstanceType: w.instType.Name,
ProviderInstanceType: w.instType.ProviderType,
LastContainerUUID: w.lastUUID,
- if booted && !wkr.booted {
- wkr.booted = booted
- logger.Info("instance booted")
- } else {
- booted = wkr.booted
+ if ok || wkr.state == StateRunning || wkr.state == StateIdle {
+ logger.Info("instance booted; will try probeRunning")
+ needProbeRunning = true
ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
}
logger = logger.WithField("stderr", string(stderr))
wp.mtx.Lock()
defer wp.mtx.Unlock()
if !ok {
ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
}
logger = logger.WithField("stderr", string(stderr))
wp.mtx.Lock()
defer wp.mtx.Unlock()
if !ok {
if updated != wkr.updated {
// Worker was updated after the probe began, so
if updated != wkr.updated {
// Worker was updated after the probe began, so
- wkr.unallocated = updateTime
+ // Actual last-busy time was sometime between wkr.busy
+ // and now. Now is the earliest opportunity to take
+ // advantage of the non-busy state, though.
+ wkr.busy = updateTime