X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f9f0960543c846af8054832c22371c9bc6734615..d6446b03e2f5d5079a870bdd7b963456dc12b485:/lib/dispatchcloud/dispatcher_test.go diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go index 17121ffeb6..33d7f4e9ac 100644 --- a/lib/dispatchcloud/dispatcher_test.go +++ b/lib/dispatchcloud/dispatcher_test.go @@ -15,6 +15,7 @@ import ( "net/url" "os" "sync" + "sync/atomic" "time" "git.arvados.org/arvados.git/lib/config" @@ -49,8 +50,10 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) { s.stubDriver = &test.StubDriver{ HostKey: hostpriv, AuthorizedKeys: []ssh.PublicKey{dispatchpub}, + ErrorRateCreate: 0.1, ErrorRateDestroy: 0.1, MinTimeBetweenCreateCalls: time.Millisecond, + QuotaMaxInstances: 10, } // We need the postgresql connection info from the integration @@ -70,6 +73,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) { StaleLockTimeout: arvados.Duration(5 * time.Millisecond), RuntimeEngine: "stub", MaxDispatchAttempts: 10, + MaximumPriceFactor: 1.5, CloudVMs: arvados.CloudVMsConfig{ Driver: "test", SyncInterval: arvados.Duration(10 * time.Millisecond), @@ -158,7 +162,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { s.disp.setupOnce.Do(s.disp.initialize) queue := &test.Queue{ MaxDispatchAttempts: 5, - ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) { + ChooseType: func(ctr *arvados.Container) ([]arvados.InstanceType, error) { return ChooseInstanceType(s.cluster, ctr) }, Logger: ctxlog.TestLogger(c), @@ -203,26 +207,43 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { finishContainer(ctr) return int(rand.Uint32() & 0x3) } + var countCapacityErrors int64 n := 0 s.stubDriver.Queue = queue - s.stubDriver.SetupVM = func(stubvm *test.StubVM) { + s.stubDriver.SetupVM = func(stubvm *test.StubVM) error { + if pt := stubvm.Instance().ProviderType(); pt == test.InstanceType(6).ProviderType { + c.Logf("test: returning capacity error for instance type %s", pt) + atomic.AddInt64(&countCapacityErrors, 1) + return test.CapacityError{InstanceTypeSpecific: true} + } n++ stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond)))) stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond))) stubvm.ExecuteContainer = executeContainer stubvm.CrashRunningContainer = finishContainer stubvm.ExtraCrunchRunArgs = "'--runtime-engine=stub' '--foo' '--extra='\\''args'\\'''" - switch n % 7 { - case 0: + switch { + case n%7 == 0: + // some instances start out OK but then stop + // running any commands stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond) - case 1: + case n%7 == 1: + // some instances never pass a run-probe stubvm.CrunchRunMissing = true - case 2: + case n%7 == 2: + // some instances start out OK but then start + // reporting themselves as broken stubvm.ReportBroken = time.Now().Add(time.Duration(rand.Int63n(200)) * time.Millisecond) + case n == 3: + // 1 instance is completely broken, ensuring + // the boot_outcomes{outcome="failure"} metric + // is not zero + stubvm.CrunchRunCrashRate = 1 default: stubvm.CrunchRunCrashRate = 0.1 stubvm.ArvMountDeadlockRate = 0.1 } + return nil } s.stubDriver.Bugf = c.Errorf @@ -258,6 +279,8 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { } } + c.Check(countCapacityErrors, check.Not(check.Equals), int64(0)) + req := httptest.NewRequest("GET", "/metrics", nil) req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken) resp := httptest.NewRecorder() @@ -366,6 +389,7 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) { sr := getInstances() c.Check(len(sr.Items), check.Equals, 0) + s.stubDriver.ErrorRateCreate = 0 ch := s.disp.pool.Subscribe() defer s.disp.pool.Unsubscribe(ch) ok := s.disp.pool.Create(test.InstanceType(1))