Merge branch '21146-pysdk-new-websockets'
[arvados.git] / lib / dispatchcloud / dispatcher_test.go
index 4583a596eebfe48a08fd862e6d840d8df401c047..51c2c3d6a35543cf586f30daac39a0822ad5a90e 100644 (file)
@@ -15,6 +15,7 @@ import (
        "net/url"
        "os"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/lib/config"
@@ -52,6 +53,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
                ErrorRateCreate:           0.1,
                ErrorRateDestroy:          0.1,
                MinTimeBetweenCreateCalls: time.Millisecond,
+               QuotaMaxInstances:         10,
        }
 
        // We need the postgresql connection info from the integration
@@ -71,6 +73,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
                        StaleLockTimeout:       arvados.Duration(5 * time.Millisecond),
                        RuntimeEngine:          "stub",
                        MaxDispatchAttempts:    10,
+                       MaximumPriceFactor:     1.5,
                        CloudVMs: arvados.CloudVMsConfig{
                                Driver:               "test",
                                SyncInterval:         arvados.Duration(10 * time.Millisecond),
@@ -159,7 +162,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
        s.disp.setupOnce.Do(s.disp.initialize)
        queue := &test.Queue{
                MaxDispatchAttempts: 5,
-               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+               ChooseType: func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
                        return ChooseInstanceType(s.cluster, ctr)
                },
                Logger: ctxlog.TestLogger(c),
@@ -204,26 +207,44 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                finishContainer(ctr)
                return int(rand.Uint32() & 0x3)
        }
-       n := 0
+       var countCapacityErrors int64
+       vmCount := int32(0)
        s.stubDriver.Queue = queue
-       s.stubDriver.SetupVM = func(stubvm *test.StubVM) {
-               n++
+       s.stubDriver.SetupVM = func(stubvm *test.StubVM) error {
+               if pt := stubvm.Instance().ProviderType(); pt == test.InstanceType(6).ProviderType {
+                       c.Logf("test: returning capacity error for instance type %s", pt)
+                       atomic.AddInt64(&countCapacityErrors, 1)
+                       return test.CapacityError{InstanceTypeSpecific: true}
+               }
+               n := atomic.AddInt32(&vmCount, 1)
+               c.Logf("SetupVM: instance %s n=%d", stubvm.Instance(), n)
                stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
                stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
                stubvm.ExecuteContainer = executeContainer
                stubvm.CrashRunningContainer = finishContainer
                stubvm.ExtraCrunchRunArgs = "'--runtime-engine=stub' '--foo' '--extra='\\''args'\\'''"
-               switch n % 7 {
-               case 0:
+               switch {
+               case n%7 == 0:
+                       // some instances start out OK but then stop
+                       // running any commands
                        stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
-               case 1:
+               case n%7 == 1:
+                       // some instances never pass a run-probe
                        stubvm.CrunchRunMissing = true
-               case 2:
+               case n%7 == 2:
+                       // some instances start out OK but then start
+                       // reporting themselves as broken
                        stubvm.ReportBroken = time.Now().Add(time.Duration(rand.Int63n(200)) * time.Millisecond)
+               case n == 3:
+                       // 1 instance is completely broken, ensuring
+                       // the boot_outcomes{outcome="failure"} metric
+                       // is not zero
+                       stubvm.Broken = time.Now()
                default:
                        stubvm.CrunchRunCrashRate = 0.1
                        stubvm.ArvMountDeadlockRate = 0.1
                }
+               return nil
        }
        s.stubDriver.Bugf = c.Errorf
 
@@ -259,6 +280,8 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                }
        }
 
+       c.Check(countCapacityErrors, check.Not(check.Equals), int64(0))
+
        req := httptest.NewRequest("GET", "/metrics", nil)
        req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
        resp := httptest.NewRecorder()
@@ -367,6 +390,7 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
        sr := getInstances()
        c.Check(len(sr.Items), check.Equals, 0)
 
+       s.stubDriver.ErrorRateCreate = 0
        ch := s.disp.pool.Subscribe()
        defer s.disp.pool.Unsubscribe(ch)
        ok := s.disp.pool.Create(test.InstanceType(1))