18670: Fix unreliable test.
authorTom Clegg <tom@curii.com>
Tue, 25 Jan 2022 04:48:17 +0000 (23:48 -0500)
committerTom Clegg <tom@curii.com>
Tue, 25 Jan 2022 04:48:17 +0000 (23:48 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/lsf/dispatch.go
lib/lsf/dispatch_test.go
lib/lsf/lsfqueue.go

index c9ed5582bc0a006c00ab1d94b7161fd455b65382..0d9324784d503e1fb30789e45e2f65ae7b84fdd1 100644 (file)
@@ -119,7 +119,7 @@ func (disp *dispatcher) init() {
        disp.lsfcli.logger = disp.logger
        disp.lsfqueue = lsfqueue{
                logger: disp.logger,
-               period: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+               period: disp.Cluster.Containers.CloudVMs.PollInterval.Duration(),
                lsfcli: &disp.lsfcli,
        }
        disp.ArvClient.AuthToken = disp.AuthToken
@@ -256,7 +256,7 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 
        // Try "bkill" every few seconds until the LSF job disappears
        // from the queue.
-       ticker := time.NewTicker(5 * time.Second)
+       ticker := time.NewTicker(disp.Cluster.Containers.CloudVMs.PollInterval.Duration() / 2)
        defer ticker.Stop()
        for qent, ok := disp.lsfqueue.Lookup(ctr.UUID); ok; _, ok = disp.lsfqueue.Lookup(ctr.UUID) {
                err := disp.lsfcli.Bkill(qent.ID)
index c678a9a4815f951f3cdf499c9c4a97c6c6deaa22..a99983f34a8ae4163f9a91ba59c43ab9e57c3e00 100644 (file)
@@ -44,7 +44,8 @@ func (s *suite) SetUpTest(c *check.C) {
        c.Assert(err, check.IsNil)
        cluster, err := cfg.GetCluster("")
        c.Assert(err, check.IsNil)
-       cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second)
+       cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second / 4)
+       cluster.Containers.MinRetryPeriod = arvados.Duration(time.Second / 4)
        s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
        s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
                return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
@@ -243,16 +244,19 @@ func (s *suite) TestSubmit(c *check.C) {
                }
                // "queuedcontainer" should be running
                if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
+                       c.Log("Lookup(queuedcontainer) == false")
                        continue
                }
                // "lockedcontainer" should be cancelled because it
                // has priority 0 (no matching container requests)
-               if _, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
+               if ent, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
+                       c.Logf("Lookup(lockedcontainer) == true, ent = %#v", ent)
                        continue
                }
                // "crTooBig" should be cancelled because lsf stub
                // reports there is no suitable instance type
-               if _, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+               if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+                       c.Logf("Lookup(crTooBig) == true, ent = %#v", ent)
                        continue
                }
                var ctr arvados.Container
index 3ed4d0c1820cfaad1340c1304902a7deabd0fcb7..60f01640a0fcab807f3f1a487558179f090de7e2 100644 (file)
@@ -58,7 +58,7 @@ func (q *lsfqueue) getNext() map[string]bjobsEntry {
 func (q *lsfqueue) init() {
        q.updated = sync.NewCond(&q.mutex)
        q.nextReady = make(chan (<-chan struct{}))
-       ticker := time.NewTicker(time.Second)
+       ticker := time.NewTicker(q.period)
        go func() {
                for range ticker.C {
                        // Send a new "next update ready" channel to