From 8b42f8d30a103b22b39c8cd2d407ecbbddd008b6 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 30 Mar 2023 17:19:13 -0400 Subject: [PATCH] 20240: Expose deadlock in multi-level workflow test case. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/controller/localdb/container_test.go | 38 ++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/lib/controller/localdb/container_test.go b/lib/controller/localdb/container_test.go index ad77a1cd04..30469bcc65 100644 --- a/lib/controller/localdb/container_test.go +++ b/lib/controller/localdb/container_test.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "math/rand" + "strings" "sync" "time" @@ -100,7 +101,7 @@ func (s *containerSuite) TestUpdatePriorityShouldBeZero(c *C) { } func (s *containerSuite) TestUpdatePriorityMultiLevelWorkflow(c *C) { - testCtx, testCancel := context.WithDeadline(s.ctx, time.Now().Add(time.Second*20)) + testCtx, testCancel := context.WithDeadline(s.ctx, time.Now().Add(30*time.Second)) defer testCancel() adminCtx := ctrlctx.NewWithToken(testCtx, s.cluster, s.cluster.SystemRootToken) @@ -193,9 +194,42 @@ func (s *containerSuite) TestUpdatePriorityMultiLevelWorkflow(c *C) { c.Assert(err, IsNil) c.Check(priority, Not(Equals), 0) } - chaosCancel() + // Flood railsapi with priority updates. This can cause + // database deadlock: one call acquires row locks in the order + // {i0j0, i0, i0j1}, while another call acquires row locks in + // the order {i0j1, i0, i0j0}. + deadlockCtx, deadlockCancel := context.WithDeadline(adminCtx, time.Now().Add(30*time.Second)) + defer deadlockCancel() + for _, cr := range allcrs { + if strings.Contains(cr.Command[2], " j ") && !strings.Contains(cr.Command[2], " k ") { + wg.Add(1) + go func() { + defer wg.Done() + for _, p := range []int{1, 2, 3, 4} { + var err error + for { + _, err = s.localdb.ContainerRequestUpdate(deadlockCtx, arvados.UpdateOptions{ + UUID: cr.UUID, + Attrs: map[string]interface{}{ + "priority": p, + }, + }) + if err != nil && strings.Contains(err.Error(), "TRDeadlockDetected") { + c.Logf("Deadlock detected (will retry): %q", err) + time.Sleep(time.Duration(rand.Intn(int(time.Second / 4)))) + continue + } + c.Check(err, IsNil) + break + } + } + }() + } + } + wg.Wait() + // Simulate cascading cancellation of the entire tree. For // this we need a goroutine to notice and cancel containers // with state=Running and priority=0, and cancel them -- 2.30.2