From: Tom Clegg Date: Mon, 21 Nov 2022 15:26:34 +0000 (-0500) Subject: 19364: Cancel container request when timing out. X-Git-Tag: 2.5.0~30^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/273d4dda75bad4b1ba18bc3616f16082b95c0467 19364: Cancel container request when timing out. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go index 3e3ac86757..ed963e1ef7 100644 --- a/lib/diagnostics/cmd.go +++ b/lib/diagnostics/cmd.go @@ -703,12 +703,11 @@ func (diag *diagnoser) runtests() { timeout := 10 * time.Minute diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout)) - ctx, cancel = context.WithDeadline(context.Background(), time.Now().Add(timeout)) - defer cancel() + deadline := time.Now().Add(timeout) var c arvados.Container - for ; cr.State != arvados.ContainerRequestStateFinal; time.Sleep(2 * time.Second) { - ctx, cancel := context.WithDeadline(ctx, time.Now().Add(diag.timeout)) + for ; cr.State != arvados.ContainerRequestStateFinal && time.Now().Before(deadline); time.Sleep(2 * time.Second) { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) defer cancel() crStateWas := cr.State @@ -728,11 +727,26 @@ func (diag *diagnoser) runtests() { if c.State != cStateWas { diag.debugf("container state = %s", c.State) } + + cancel() } + if cr.State != arvados.ContainerRequestStateFinal { + err := client.RequestAndDecodeContext(context.Background(), &cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{ + "container_request": map[string]interface{}{ + "priority": 0, + }}) + if err != nil { + diag.infof("error canceling container request %s: %s", cr.UUID, err) + } else { + diag.debugf("canceled container request %s", cr.UUID) + } + return fmt.Errorf("timed out waiting for container to finish; container request %s state was %q, container %s state was %q", cr.UUID, cr.State, c.UUID, c.State) + } if c.State != arvados.ContainerStateComplete { return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State) - } else if c.ExitCode != 0 { + } + if c.ExitCode != 0 { return fmt.Errorf("container exited %d", c.ExitCode) } return nil