17384: Respect CrunchRunCommand and CrunchRunArgumentsList in a-d-c.
[arvados.git] / lib / dispatchcloud / test / stub_driver.go
index 132bd4d695f0ef88095951b151be592029c31328..1b31a71a264fabf865f981f5f94eab1649847ac4 100644 (file)
@@ -131,7 +131,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
                tags:         copyTags(tags),
                providerType: it.ProviderType,
                initCommand:  cmd,
-               running:      map[string]int64{},
+               running:      map[string]stubProcess{},
                killing:      map[string]bool{},
        }
        svm.SSHService = SSHService{
@@ -189,8 +189,11 @@ type StubVM struct {
        CrunchRunMissing      bool
        CrunchRunCrashRate    float64
        CrunchRunDetachDelay  time.Duration
+       ArvMountMaxExitLag    time.Duration
+       ArvMountDeadlockRate  float64
        ExecuteContainer      func(arvados.Container) int
        CrashRunningContainer func(arvados.Container)
+       ExtraCrunchRunArgs    string // extra args expected after "crunch-run --detach --stdin-env "
 
        sis          *StubInstanceSet
        id           cloud.InstanceID
@@ -198,12 +201,21 @@ type StubVM struct {
        initCommand  cloud.InitCommand
        providerType string
        SSHService   SSHService
-       running      map[string]int64
+       running      map[string]stubProcess
        killing      map[string]bool
        lastPID      int64
+       deadlocked   string
        sync.Mutex
 }
 
+type stubProcess struct {
+       pid int64
+
+       // crunch-run has exited, but arv-mount process (or something)
+       // still holds lock in /var/run/
+       exited bool
+}
+
 func (svm *StubVM) Instance() stubInstance {
        svm.Lock()
        defer svm.Unlock()
@@ -240,7 +252,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                fmt.Fprint(stderr, "crunch-run: command not found\n")
                return 1
        }
-       if strings.HasPrefix(command, "crunch-run --detach --stdin-env ") {
+       if strings.HasPrefix(command, "crunch-run --detach --stdin-env "+svm.ExtraCrunchRunArgs) {
                var stdinKV map[string]string
                err := json.Unmarshal(stdinData, &stdinKV)
                if err != nil {
@@ -256,7 +268,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                svm.Lock()
                svm.lastPID++
                pid := svm.lastPID
-               svm.running[uuid] = pid
+               svm.running[uuid] = stubProcess{pid: pid}
                svm.Unlock()
                time.Sleep(svm.CrunchRunDetachDelay)
                fmt.Fprintf(stderr, "starting %s\n", uuid)
@@ -273,14 +285,13 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                                logger.Print("[test] exiting crunch-run stub")
                                svm.Lock()
                                defer svm.Unlock()
-                               if svm.running[uuid] != pid {
+                               if svm.running[uuid].pid != pid {
                                        bugf := svm.sis.driver.Bugf
                                        if bugf == nil {
                                                bugf = logger.Warnf
                                        }
-                                       bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid])
-                               } else {
-                                       delete(svm.running, uuid)
+                                       bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s].pid==%d", pid, uuid, svm.running[uuid].pid)
+                                       return
                                }
                                if !completed {
                                        logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub")
@@ -288,6 +299,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                                                svm.CrashRunningContainer(ctr)
                                        }
                                }
+                               sproc := svm.running[uuid]
+                               sproc.exited = true
+                               svm.running[uuid] = sproc
+                               svm.Unlock()
+                               time.Sleep(svm.ArvMountMaxExitLag * time.Duration(math_rand.Float64()))
+                               svm.Lock()
+                               if math_rand.Float64() >= svm.ArvMountDeadlockRate {
+                                       delete(svm.running, uuid)
+                               }
                        }()
 
                        crashluck := math_rand.Float64()
@@ -333,26 +353,31 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
        if command == "crunch-run --list" {
                svm.Lock()
                defer svm.Unlock()
-               for uuid := range svm.running {
-                       fmt.Fprintf(stdout, "%s\n", uuid)
+               for uuid, sproc := range svm.running {
+                       if sproc.exited {
+                               fmt.Fprintf(stdout, "%s stale\n", uuid)
+                       } else {
+                               fmt.Fprintf(stdout, "%s\n", uuid)
+                       }
                }
                if !svm.ReportBroken.IsZero() && svm.ReportBroken.Before(time.Now()) {
                        fmt.Fprintln(stdout, "broken")
                }
+               fmt.Fprintln(stdout, svm.deadlocked)
                return 0
        }
        if strings.HasPrefix(command, "crunch-run --kill ") {
                svm.Lock()
-               _, running := svm.running[uuid]
-               if running {
+               sproc, running := svm.running[uuid]
+               if running && !sproc.exited {
                        svm.killing[uuid] = true
                        svm.Unlock()
                        time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond)
                        svm.Lock()
-                       _, running = svm.running[uuid]
+                       sproc, running = svm.running[uuid]
                }
                svm.Unlock()
-               if running {
+               if running && !sproc.exited {
                        fmt.Fprintf(stderr, "%s: container is running\n", uuid)
                        return 1
                }