"net/url"
"os"
"sync"
+ "sync/atomic"
"time"
"git.arvados.org/arvados.git/lib/config"
StaleLockTimeout: arvados.Duration(5 * time.Millisecond),
RuntimeEngine: "stub",
MaxDispatchAttempts: 10,
+ MaximumPriceFactor: 1.5,
CloudVMs: arvados.CloudVMsConfig{
Driver: "test",
SyncInterval: arvados.Duration(10 * time.Millisecond),
s.disp.setupOnce.Do(s.disp.initialize)
queue := &test.Queue{
MaxDispatchAttempts: 5,
- ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ ChooseType: func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
return ChooseInstanceType(s.cluster, ctr)
},
Logger: ctxlog.TestLogger(c),
finishContainer(ctr)
return int(rand.Uint32() & 0x3)
}
- n := 0
+ var countCapacityErrors int64
+ vmCount := int32(0)
s.stubDriver.Queue = queue
- s.stubDriver.SetupVM = func(stubvm *test.StubVM) {
- n++
+ s.stubDriver.SetupVM = func(stubvm *test.StubVM) error {
+ if pt := stubvm.Instance().ProviderType(); pt == test.InstanceType(6).ProviderType {
+ c.Logf("test: returning capacity error for instance type %s", pt)
+ atomic.AddInt64(&countCapacityErrors, 1)
+ return test.CapacityError{InstanceTypeSpecific: true}
+ }
+ n := atomic.AddInt32(&vmCount, 1)
+ c.Logf("SetupVM: instance %s n=%d", stubvm.Instance(), n)
stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
stubvm.ExecuteContainer = executeContainer
stubvm.CrashRunningContainer = finishContainer
stubvm.ExtraCrunchRunArgs = "'--runtime-engine=stub' '--foo' '--extra='\\''args'\\'''"
- switch n % 7 {
- case 0:
+ switch {
+ case n%7 == 0:
+ // some instances start out OK but then stop
+ // running any commands
stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
- case 1:
+ case n%7 == 1:
+ // some instances never pass a run-probe
stubvm.CrunchRunMissing = true
- case 2:
+ case n%7 == 2:
+ // some instances start out OK but then start
+ // reporting themselves as broken
stubvm.ReportBroken = time.Now().Add(time.Duration(rand.Int63n(200)) * time.Millisecond)
+ case n == 3:
+ // 1 instance is completely broken, ensuring
+ // the boot_outcomes{outcome="failure"} metric
+ // is not zero
+ stubvm.Broken = time.Now()
default:
stubvm.CrunchRunCrashRate = 0.1
stubvm.ArvMountDeadlockRate = 0.1
}
+ return nil
}
s.stubDriver.Bugf = c.Errorf
}
}
+ c.Check(countCapacityErrors, check.Not(check.Equals), int64(0))
+
req := httptest.NewRequest("GET", "/metrics", nil)
req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
resp := httptest.NewRecorder()