Fix some golint warnings.
[arvados.git] / lib / dispatchcloud / test / stub_driver.go
index 28fee6d9278db08482c9daa2f575e217bc6c6bb3..41eb20763c75248c6cea81a2e9854ad2dfde42a8 100644 (file)
@@ -17,8 +17,8 @@ import (
        "sync"
        "time"
 
-       "git.curoverse.com/arvados.git/lib/cloud"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/lib/cloud"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
@@ -34,6 +34,11 @@ type StubDriver struct {
        // VM's error rate and other behaviors.
        SetupVM func(*StubVM)
 
+       // Bugf, if set, is called if a bug is detected in the caller
+       // or stub. Typically set to (*check.C)Errorf. If unset,
+       // logger.Warnf is called instead.
+       Bugf func(string, ...interface{})
+
        // StubVM's fake crunch-run uses this Queue to read and update
        // container state.
        Queue *Queue
@@ -56,7 +61,7 @@ type StubDriver struct {
 }
 
 // InstanceSet returns a new *StubInstanceSet.
-func (sd *StubDriver) InstanceSet(params json.RawMessage, id cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+func (sd *StubDriver) InstanceSet(params json.RawMessage, id cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
        if sd.holdCloudOps == nil {
                sd.holdCloudOps = make(chan bool)
        }
@@ -99,6 +104,7 @@ type StubInstanceSet struct {
 
        allowCreateCall    time.Time
        allowInstancesCall time.Time
+       lastInstanceID     int
 }
 
 func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, cmd cloud.InitCommand, authKey ssh.PublicKey) (cloud.Instance, error) {
@@ -112,20 +118,21 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
        }
        if sis.allowCreateCall.After(time.Now()) {
                return nil, RateLimitError{sis.allowCreateCall}
-       } else {
-               sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
        }
-
+       sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
        ak := sis.driver.AuthorizedKeys
        if authKey != nil {
                ak = append([]ssh.PublicKey{authKey}, ak...)
        }
+       sis.lastInstanceID++
        svm := &StubVM{
                sis:          sis,
-               id:           cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())),
+               id:           cloud.InstanceID(fmt.Sprintf("inst%d,%s", sis.lastInstanceID, it.ProviderType)),
                tags:         copyTags(tags),
                providerType: it.ProviderType,
                initCommand:  cmd,
+               running:      map[string]int64{},
+               killing:      map[string]bool{},
        }
        svm.SSHService = SSHService{
                HostKey:        sis.driver.HostKey,
@@ -145,9 +152,8 @@ func (sis *StubInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, err
        defer sis.mtx.RUnlock()
        if sis.allowInstancesCall.After(time.Now()) {
                return nil, RateLimitError{sis.allowInstancesCall}
-       } else {
-               sis.allowInstancesCall = time.Now().Add(sis.driver.MinTimeBetweenInstancesCalls)
        }
+       sis.allowInstancesCall = time.Now().Add(sis.driver.MinTimeBetweenInstancesCalls)
        var r []cloud.Instance
        for _, ss := range sis.servers {
                r = append(r, ss.Instance())
@@ -179,6 +185,7 @@ func (e RateLimitError) EarliestRetry() time.Time { return e.Retry }
 type StubVM struct {
        Boot                  time.Time
        Broken                time.Time
+       ReportBroken          time.Time
        CrunchRunMissing      bool
        CrunchRunCrashRate    float64
        CrunchRunDetachDelay  time.Duration
@@ -192,6 +199,7 @@ type StubVM struct {
        providerType string
        SSHService   SSHService
        running      map[string]int64
+       killing      map[string]bool
        lastPID      int64
        sync.Mutex
 }
@@ -241,16 +249,13 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                }
                for _, name := range []string{"ARVADOS_API_HOST", "ARVADOS_API_TOKEN"} {
                        if stdinKV[name] == "" {
-                               fmt.Fprintf(stderr, "%s env var missing from stdin %q\n", name, stdin)
+                               fmt.Fprintf(stderr, "%s env var missing from stdin %q\n", name, stdinData)
                                return 1
                        }
                }
                svm.Lock()
                svm.lastPID++
                pid := svm.lastPID
-               if svm.running == nil {
-                       svm.running = map[string]int64{}
-               }
                svm.running[uuid] = pid
                svm.Unlock()
                time.Sleep(svm.CrunchRunDetachDelay)
@@ -262,49 +267,68 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                })
                logger.Printf("[test] starting crunch-run stub")
                go func() {
+                       var ctr arvados.Container
+                       var started, completed bool
+                       defer func() {
+                               logger.Print("[test] exiting crunch-run stub")
+                               svm.Lock()
+                               defer svm.Unlock()
+                               if svm.running[uuid] != pid {
+                                       if !completed {
+                                               bugf := svm.sis.driver.Bugf
+                                               if bugf == nil {
+                                                       bugf = logger.Warnf
+                                               }
+                                               bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid])
+                                       }
+                               } else {
+                                       delete(svm.running, uuid)
+                               }
+                               if !completed {
+                                       logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub")
+                                       if started && svm.CrashRunningContainer != nil {
+                                               svm.CrashRunningContainer(ctr)
+                                       }
+                               }
+                       }()
+
                        crashluck := math_rand.Float64()
+                       wantCrash := crashluck < svm.CrunchRunCrashRate
+                       wantCrashEarly := crashluck < svm.CrunchRunCrashRate/2
+
                        ctr, ok := queue.Get(uuid)
                        if !ok {
                                logger.Print("[test] container not in queue")
                                return
                        }
 
-                       defer func() {
-                               if ctr.State == arvados.ContainerStateRunning && svm.CrashRunningContainer != nil {
-                                       svm.CrashRunningContainer(ctr)
-                               }
-                       }()
-
-                       if crashluck > svm.CrunchRunCrashRate/2 {
-                               time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
-                               ctr.State = arvados.ContainerStateRunning
-                               if !queue.Notify(ctr) {
-                                       ctr, _ = queue.Get(uuid)
-                                       logger.Print("[test] erroring out because state=Running update was rejected")
-                                       return
-                               }
-                       }
-
                        time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
 
                        svm.Lock()
-                       defer svm.Unlock()
-                       if svm.running[uuid] != pid {
-                               logger.Print("[test] container was killed")
+                       killed := svm.running[uuid] != pid
+                       svm.Unlock()
+                       if killed || wantCrashEarly {
                                return
                        }
-                       delete(svm.running, uuid)
 
-                       if crashluck < svm.CrunchRunCrashRate {
+                       ctr.State = arvados.ContainerStateRunning
+                       started = queue.Notify(ctr)
+                       if !started {
+                               ctr, _ = queue.Get(uuid)
+                               logger.Print("[test] erroring out because state=Running update was rejected")
+                               return
+                       }
+
+                       if wantCrash {
                                logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub")
-                       } else {
-                               if svm.ExecuteContainer != nil {
-                                       ctr.ExitCode = svm.ExecuteContainer(ctr)
-                               }
-                               logger.WithField("ExitCode", ctr.ExitCode).Print("[test] exiting crunch-run stub")
-                               ctr.State = arvados.ContainerStateComplete
-                               go queue.Notify(ctr)
+                               return
                        }
+                       if svm.ExecuteContainer != nil {
+                               ctr.ExitCode = svm.ExecuteContainer(ctr)
+                       }
+                       logger.WithField("ExitCode", ctr.ExitCode).Print("[test] completing container")
+                       ctr.State = arvados.ContainerStateComplete
+                       completed = queue.Notify(ctr)
                }()
                return 0
        }
@@ -314,34 +338,40 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                for uuid := range svm.running {
                        fmt.Fprintf(stdout, "%s\n", uuid)
                }
+               if !svm.ReportBroken.IsZero() && svm.ReportBroken.Before(time.Now()) {
+                       fmt.Fprintln(stdout, "broken")
+               }
                return 0
        }
        if strings.HasPrefix(command, "crunch-run --kill ") {
                svm.Lock()
                pid, running := svm.running[uuid]
-               svm.Unlock()
-               go func() {
-                       time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
+               if running && !svm.killing[uuid] {
+                       svm.killing[uuid] = true
+                       go func() {
+                               time.Sleep(time.Duration(math_rand.Float64()*30) * time.Millisecond)
+                               svm.Lock()
+                               defer svm.Unlock()
+                               if svm.running[uuid] == pid {
+                                       // Kill only if the running entry
+                                       // hasn't since been killed and
+                                       // replaced with a different one.
+                                       delete(svm.running, uuid)
+                               }
+                               delete(svm.killing, uuid)
+                       }()
+                       svm.Unlock()
+                       time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond)
                        svm.Lock()
-                       defer svm.Unlock()
-                       if svm.running[uuid] == pid {
-                               // Kill only if the running entry
-                               // hasn't since been killed and
-                               // replaced with a different one.
-                               delete(svm.running, uuid)
-                       }
-               }()
-               time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond)
-               svm.Lock()
-               _, running = svm.running[uuid]
+                       _, running = svm.running[uuid]
+               }
                svm.Unlock()
                if running {
                        fmt.Fprintf(stderr, "%s: container is running\n", uuid)
                        return 1
-               } else {
-                       fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
-                       return 0
                }
+               fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
+               return 0
        }
        if command == "true" {
                return 0