11002: Merge branch 'master' into 11002-arvput-crash-fix
[arvados.git] / services / crunchstat / crunchstat_test.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "fmt"
7         "io"
8         "io/ioutil"
9         "log"
10         "math/rand"
11         "os"
12         "os/exec"
13         "sync"
14         "syscall"
15         "testing"
16         "time"
17 )
18
19 // Test that CopyPipeToChildLog works even on lines longer than
20 // bufio.MaxScanTokenSize.
21 func TestCopyPipeToChildLogLongLines(t *testing.T) {
22         logger, logBuf := bufLogger()
23
24         pipeIn, pipeOut := io.Pipe()
25         copied := make(chan bool)
26         go func() {
27                 copyPipeToChildLog(pipeIn, logger)
28                 close(copied)
29         }()
30
31         sentBytes := make([]byte, bufio.MaxScanTokenSize+MaxLogLine+(1<<22))
32         go func() {
33                 pipeOut.Write([]byte("before\n"))
34
35                 for i := range sentBytes {
36                         // Some bytes that aren't newlines:
37                         sentBytes[i] = byte((rand.Int() & 0xff) | 0x80)
38                 }
39                 sentBytes[len(sentBytes)-1] = '\n'
40                 pipeOut.Write(sentBytes)
41
42                 pipeOut.Write([]byte("after"))
43                 pipeOut.Close()
44         }()
45
46         if before, err := logBuf.ReadBytes('\n'); err != nil || string(before) != "before\n" {
47                 t.Fatalf("\"before\n\" not received (got \"%s\", %s)", before, err)
48         }
49
50         var receivedBytes []byte
51         done := false
52         for !done {
53                 line, err := logBuf.ReadBytes('\n')
54                 if err != nil {
55                         t.Fatal(err)
56                 }
57                 if len(line) >= 5 && string(line[0:5]) == "[...]" {
58                         if receivedBytes == nil {
59                                 t.Fatal("Beginning of line reported as continuation")
60                         }
61                         line = line[5:]
62                 }
63                 if len(line) >= 6 && string(line[len(line)-6:]) == "[...]\n" {
64                         line = line[:len(line)-6]
65                 } else {
66                         done = true
67                 }
68                 receivedBytes = append(receivedBytes, line...)
69         }
70         if bytes.Compare(receivedBytes, sentBytes) != 0 {
71                 t.Fatalf("sent %d bytes, got %d different bytes", len(sentBytes), len(receivedBytes))
72         }
73
74         if after, err := logBuf.ReadBytes('\n'); err != nil || string(after) != "after\n" {
75                 t.Fatalf("\"after\n\" not received (got \"%s\", %s)", after, err)
76         }
77
78         select {
79         case <-time.After(time.Second):
80                 t.Fatal("Timeout")
81         case <-copied:
82                 // Done.
83         }
84 }
85
86 func bufLogger() (*log.Logger, *bufio.Reader) {
87         r, w := io.Pipe()
88         logger := log.New(w, "", 0)
89         return logger, bufio.NewReader(r)
90 }
91
92 func TestSignalOnDeadPPID(t *testing.T) {
93         if !testDeadParent(t, 0) {
94                 t.Fatal("child should still be alive after parent dies")
95         }
96         if testDeadParent(t, 15) {
97                 t.Fatal("child should have been killed when parent died")
98         }
99 }
100
101 // testDeadParent returns true if crunchstat's child proc is still
102 // alive after its parent dies.
103 func testDeadParent(t *testing.T, signum int) bool {
104         var err error
105         var bin, childlockfile, parentlockfile *os.File
106         for _, f := range []**os.File{&bin, &childlockfile, &parentlockfile} {
107                 *f, err = ioutil.TempFile("", "crunchstat_")
108                 if err != nil {
109                         t.Fatal(err)
110                 }
111                 defer (*f).Close()
112                 defer os.Remove((*f).Name())
113         }
114
115         bin.Close()
116         err = exec.Command("go", "build", "-o", bin.Name()).Run()
117         if err != nil {
118                 t.Fatal(err)
119         }
120
121         err = syscall.Flock(int(parentlockfile.Fd()), syscall.LOCK_EX)
122         if err != nil {
123                 t.Fatal(err)
124         }
125
126         cmd := exec.Command("bash", "-c", `
127 set -e
128 "$BINFILE" -cgroup-root=/none -ppid-check-interval=10ms -signal-on-dead-ppid="$SIGNUM" bash -c '
129     set -e
130     unlock() {
131         flock --unlock "$CHILDLOCKFD"
132         kill %1
133     }
134     trap unlock TERM
135     flock --exclusive "$CHILDLOCKFD"
136     echo -n "$$" > "$CHILDLOCKFILE"
137     flock --unlock "$PARENTLOCKFD"
138     sleep 20 </dev/null >/dev/null 2>/dev/null &
139     wait %1
140     unlock
141 ' &
142
143 # wait for inner bash to start, to ensure $BINFILE has seen this bash proc as its initial PPID
144 flock --exclusive "$PARENTLOCKFILE" true
145 `)
146         cmd.Env = append(os.Environ(),
147                 "SIGNUM="+fmt.Sprintf("%d", signum),
148                 "PARENTLOCKFD=3",
149                 "PARENTLOCKFILE="+parentlockfile.Name(),
150                 "CHILDLOCKFD=4",
151                 "CHILDLOCKFILE="+childlockfile.Name(),
152                 "BINFILE="+bin.Name())
153         cmd.ExtraFiles = []*os.File{parentlockfile, childlockfile}
154         stderr, err := cmd.StderrPipe()
155         if err != nil {
156                 t.Fatal(err)
157         }
158         stdout, err := cmd.StdoutPipe()
159         if err != nil {
160                 t.Fatal(err)
161         }
162         cmd.Start()
163         defer cmd.Wait()
164
165         var wg sync.WaitGroup
166         wg.Add(2)
167         defer wg.Wait()
168         for _, rdr := range []io.ReadCloser{stderr, stdout} {
169                 go func(rdr io.ReadCloser) {
170                         defer wg.Done()
171                         buf := make([]byte, 1024)
172                         for {
173                                 n, err := rdr.Read(buf)
174                                 if n > 0 {
175                                         t.Logf("%s", buf[:n])
176                                 }
177                                 if err != nil {
178                                         return
179                                 }
180                         }
181                 }(rdr)
182         }
183
184         // Wait until inner bash process releases parentlockfile
185         // (which means it has locked childlockfile and written its
186         // PID)
187         err = exec.Command("flock", "--exclusive", parentlockfile.Name(), "true").Run()
188         if err != nil {
189                 t.Fatal(err)
190         }
191
192         childDone := make(chan bool)
193         go func() {
194                 // Notify the main thread when the inner bash process
195                 // releases its lock on childlockfile (which means
196                 // either its sleep process ended or it received a
197                 // TERM signal).
198                 t0 := time.Now()
199                 err = exec.Command("flock", "--exclusive", childlockfile.Name(), "true").Run()
200                 if err != nil {
201                         t.Fatal(err)
202                 }
203                 t.Logf("child done after %s", time.Since(t0))
204                 close(childDone)
205         }()
206
207         select {
208         case <-time.After(500 * time.Millisecond):
209                 // Inner bash process is still alive after the timeout
210                 // period. Kill it now, so our stdout and stderr pipes
211                 // can finish and we don't leave a mess of child procs
212                 // behind.
213                 buf, err := ioutil.ReadFile(childlockfile.Name())
214                 if err != nil {
215                         t.Fatal(err)
216                 }
217                 var childPID int
218                 _, err = fmt.Sscanf(string(buf), "%d", &childPID)
219                 if err != nil {
220                         t.Fatal(err)
221                 }
222                 child, err := os.FindProcess(childPID)
223                 if err != nil {
224                         t.Fatal(err)
225                 }
226                 child.Signal(syscall.Signal(15))
227                 return true
228
229         case <-childDone:
230                 // Inner bash process ended soon after its grandparent
231                 // ended.
232                 return false
233         }
234 }