From: Tom Clegg Date: Wed, 6 Dec 2023 17:47:53 +0000 (-0500) Subject: 21258: Fix "container completed twice" testing bug. X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/95e0edffd5914e136fe9baac16c99858306aeeba 21258: Fix "container completed twice" testing bug. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go index 0a74d97606..2265be6e16 100644 --- a/lib/dispatchcloud/test/stub_driver.go +++ b/lib/dispatchcloud/test/stub_driver.go @@ -239,6 +239,8 @@ type StubVM struct { killing map[string]bool lastPID int64 deadlocked string + stubprocs sync.WaitGroup + destroying bool sync.Mutex } @@ -267,6 +269,17 @@ func (svm *StubVM) Instance() stubInstance { } func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 { + // Ensure we don't start any new stubprocs after Destroy() + // has started Wait()ing for stubprocs to end. + svm.Lock() + if svm.destroying { + svm.Unlock() + return 1 + } + svm.stubprocs.Add(1) + defer svm.stubprocs.Done() + svm.Unlock() + stdinData, err := ioutil.ReadAll(stdin) if err != nil { fmt.Fprintf(stderr, "error reading stdin: %s\n", err) @@ -304,7 +317,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, pid := svm.lastPID svm.running[uuid] = stubProcess{pid: pid} svm.Unlock() + time.Sleep(svm.CrunchRunDetachDelay) + + svm.Lock() + defer svm.Unlock() + if svm.destroying { + fmt.Fprint(stderr, "crunch-run: killed by system shutdown\n") + return 9 + } fmt.Fprintf(stderr, "starting %s\n", uuid) logger := svm.sis.logger.WithFields(logrus.Fields{ "Instance": svm.id, @@ -312,13 +333,18 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, "PID": pid, }) logger.Printf("[test] starting crunch-run stub") + svm.stubprocs.Add(1) go func() { + defer svm.stubprocs.Done() var ctr arvados.Container var started, completed bool defer func() { logger.Print("[test] exiting crunch-run stub") svm.Lock() defer svm.Unlock() + if svm.destroying { + return + } if svm.running[uuid].pid != pid { bugf := svm.sis.driver.Bugf if bugf == nil { @@ -358,8 +384,10 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, svm.Lock() killed := svm.killing[uuid] + delete(svm.killing, uuid) + destroying := svm.destroying svm.Unlock() - if killed || wantCrashEarly { + if killed || wantCrashEarly || destroying { return } @@ -451,6 +479,10 @@ func (si stubInstance) Destroy() error { if math_rand.Float64() < si.svm.sis.driver.ErrorRateDestroy { return errors.New("instance could not be destroyed") } + si.svm.Lock() + si.svm.destroying = true + si.svm.Unlock() + si.svm.stubprocs.Wait() si.svm.SSHService.Close() sis.mtx.Lock() defer sis.mtx.Unlock()