21258: Fix "container completed twice" testing bug.
authorTom Clegg <tom@curii.com>
Wed, 6 Dec 2023 17:47:53 +0000 (12:47 -0500)
committerTom Clegg <tom@curii.com>
Wed, 6 Dec 2023 17:47:53 +0000 (12:47 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/dispatchcloud/test/stub_driver.go

index 0a74d976063351ba8a3bdff0b35c91d360028c73..2265be6e1610015358036f515c50acea5bad5c11 100644 (file)
@@ -239,6 +239,8 @@ type StubVM struct {
        killing      map[string]bool
        lastPID      int64
        deadlocked   string
+       stubprocs    sync.WaitGroup
+       destroying   bool
        sync.Mutex
 }
 
@@ -267,6 +269,17 @@ func (svm *StubVM) Instance() stubInstance {
 }
 
 func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+       // Ensure we don't start any new stubprocs after Destroy()
+       // has started Wait()ing for stubprocs to end.
+       svm.Lock()
+       if svm.destroying {
+               svm.Unlock()
+               return 1
+       }
+       svm.stubprocs.Add(1)
+       defer svm.stubprocs.Done()
+       svm.Unlock()
+
        stdinData, err := ioutil.ReadAll(stdin)
        if err != nil {
                fmt.Fprintf(stderr, "error reading stdin: %s\n", err)
@@ -304,7 +317,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                pid := svm.lastPID
                svm.running[uuid] = stubProcess{pid: pid}
                svm.Unlock()
+
                time.Sleep(svm.CrunchRunDetachDelay)
+
+               svm.Lock()
+               defer svm.Unlock()
+               if svm.destroying {
+                       fmt.Fprint(stderr, "crunch-run: killed by system shutdown\n")
+                       return 9
+               }
                fmt.Fprintf(stderr, "starting %s\n", uuid)
                logger := svm.sis.logger.WithFields(logrus.Fields{
                        "Instance":      svm.id,
@@ -312,13 +333,18 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                        "PID":           pid,
                })
                logger.Printf("[test] starting crunch-run stub")
+               svm.stubprocs.Add(1)
                go func() {
+                       defer svm.stubprocs.Done()
                        var ctr arvados.Container
                        var started, completed bool
                        defer func() {
                                logger.Print("[test] exiting crunch-run stub")
                                svm.Lock()
                                defer svm.Unlock()
+                               if svm.destroying {
+                                       return
+                               }
                                if svm.running[uuid].pid != pid {
                                        bugf := svm.sis.driver.Bugf
                                        if bugf == nil {
@@ -358,8 +384,10 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
 
                        svm.Lock()
                        killed := svm.killing[uuid]
+                       delete(svm.killing, uuid)
+                       destroying := svm.destroying
                        svm.Unlock()
-                       if killed || wantCrashEarly {
+                       if killed || wantCrashEarly || destroying {
                                return
                        }
 
@@ -451,6 +479,10 @@ func (si stubInstance) Destroy() error {
        if math_rand.Float64() < si.svm.sis.driver.ErrorRateDestroy {
                return errors.New("instance could not be destroyed")
        }
+       si.svm.Lock()
+       si.svm.destroying = true
+       si.svm.Unlock()
+       si.svm.stubprocs.Wait()
        si.svm.SSHService.Close()
        sis.mtx.Lock()
        defer sis.mtx.Unlock()