X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d5df19fdff62724f9faeb3bee17201363071bf9e..32c63d03d45d231768eb1497dfc5e9f4a0d23c16:/lib/diagnostics/cmd.go diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go index 9c229c9b4e..8d89b84d37 100644 --- a/lib/diagnostics/cmd.go +++ b/lib/diagnostics/cmd.go @@ -131,6 +131,8 @@ func (diag *diagnoser) dotest(id int, title string, fn func() error) { func (diag *diagnoser) runtests() { client := arvados.NewClientFromEnv() + // Disable auto-retry, use context instead + client.Timeout = 0 if client.APIHost == "" || client.AuthToken == "" { diag.errorf("ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are not set -- aborting without running any tests") @@ -318,9 +320,9 @@ func (diag *diagnoser) runtests() { isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0 isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items) if isExternal { - diag.verbosef("controller returned only proxy services, this host is treated as \"external\"") + diag.infof("controller returned only proxy services, this host is treated as \"external\"") } else if isInternal { - diag.verbosef("controller returned only non-proxy services, this host is treated as \"internal\"") + diag.infof("controller returned only non-proxy services, this host is treated as \"internal\"") } if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) { return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal) @@ -703,12 +705,11 @@ func (diag *diagnoser) runtests() { timeout := 10 * time.Minute diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout)) - ctx, cancel = context.WithDeadline(context.Background(), time.Now().Add(timeout)) - defer cancel() + deadline := time.Now().Add(timeout) var c arvados.Container - for ; cr.State != arvados.ContainerRequestStateFinal; time.Sleep(2 * time.Second) { - ctx, cancel := context.WithDeadline(ctx, time.Now().Add(diag.timeout)) + for ; cr.State != arvados.ContainerRequestStateFinal && time.Now().Before(deadline); time.Sleep(2 * time.Second) { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) defer cancel() crStateWas := cr.State @@ -728,11 +729,26 @@ func (diag *diagnoser) runtests() { if c.State != cStateWas { diag.debugf("container state = %s", c.State) } + + cancel() } + if cr.State != arvados.ContainerRequestStateFinal { + err := client.RequestAndDecodeContext(context.Background(), &cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{ + "container_request": map[string]interface{}{ + "priority": 0, + }}) + if err != nil { + diag.infof("error canceling container request %s: %s", cr.UUID, err) + } else { + diag.debugf("canceled container request %s", cr.UUID) + } + return fmt.Errorf("timed out waiting for container to finish; container request %s state was %q, container %s state was %q", cr.UUID, cr.State, c.UUID, c.State) + } if c.State != arvados.ContainerStateComplete { return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State) - } else if c.ExitCode != 0 { + } + if c.ExitCode != 0 { return fmt.Errorf("container exited %d", c.ExitCode) } return nil