13937: Export stats as prometheus metrics. (WIP)
[arvados.git] / services / crunchstat / crunchstat_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "bufio"
9         "bytes"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "log"
14         "math/rand"
15         "os"
16         "os/exec"
17         "sync"
18         "syscall"
19         "testing"
20         "time"
21 )
22
23 // Test that CopyPipeToChildLog works even on lines longer than
24 // bufio.MaxScanTokenSize.
25 func TestCopyPipeToChildLogLongLines(t *testing.T) {
26         logger, logBuf := bufLogger()
27
28         pipeIn, pipeOut := io.Pipe()
29         copied := make(chan bool)
30         go func() {
31                 copyPipeToChildLog(pipeIn, logger)
32                 close(copied)
33         }()
34
35         sentBytes := make([]byte, bufio.MaxScanTokenSize+MaxLogLine+(1<<22))
36         go func() {
37                 pipeOut.Write([]byte("before\n"))
38
39                 for i := range sentBytes {
40                         // Some bytes that aren't newlines:
41                         sentBytes[i] = byte((rand.Int() & 0xff) | 0x80)
42                 }
43                 sentBytes[len(sentBytes)-1] = '\n'
44                 pipeOut.Write(sentBytes)
45
46                 pipeOut.Write([]byte("after"))
47                 pipeOut.Close()
48         }()
49
50         if before, err := logBuf.ReadBytes('\n'); err != nil || string(before) != "before\n" {
51                 t.Fatalf("\"before\n\" not received (got \"%s\", %s)", before, err)
52         }
53
54         var receivedBytes []byte
55         done := false
56         for !done {
57                 line, err := logBuf.ReadBytes('\n')
58                 if err != nil {
59                         t.Fatal(err)
60                 }
61                 if len(line) >= 5 && string(line[0:5]) == "[...]" {
62                         if receivedBytes == nil {
63                                 t.Fatal("Beginning of line reported as continuation")
64                         }
65                         line = line[5:]
66                 }
67                 if len(line) >= 6 && string(line[len(line)-6:]) == "[...]\n" {
68                         line = line[:len(line)-6]
69                 } else {
70                         done = true
71                 }
72                 receivedBytes = append(receivedBytes, line...)
73         }
74         if bytes.Compare(receivedBytes, sentBytes) != 0 {
75                 t.Fatalf("sent %d bytes, got %d different bytes", len(sentBytes), len(receivedBytes))
76         }
77
78         if after, err := logBuf.ReadBytes('\n'); err != nil || string(after) != "after\n" {
79                 t.Fatalf("\"after\n\" not received (got \"%s\", %s)", after, err)
80         }
81
82         select {
83         case <-time.After(time.Second):
84                 t.Fatal("Timeout")
85         case <-copied:
86                 // Done.
87         }
88 }
89
90 func bufLogger() (*log.Logger, *bufio.Reader) {
91         r, w := io.Pipe()
92         logger := log.New(w, "", 0)
93         return logger, bufio.NewReader(r)
94 }
95
96 func TestSignalOnDeadPPID(t *testing.T) {
97         if !testDeadParent(t, 0) {
98                 t.Fatal("child should still be alive after parent dies")
99         }
100         if testDeadParent(t, 15) {
101                 t.Fatal("child should have been killed when parent died")
102         }
103 }
104
105 // testDeadParent returns true if crunchstat's child proc is still
106 // alive after its parent dies.
107 func testDeadParent(t *testing.T, signum int) bool {
108         var err error
109         var bin, childlockfile, parentlockfile *os.File
110         for _, f := range []**os.File{&bin, &childlockfile, &parentlockfile} {
111                 *f, err = ioutil.TempFile("", "crunchstat_")
112                 if err != nil {
113                         t.Fatal(err)
114                 }
115                 defer (*f).Close()
116                 defer os.Remove((*f).Name())
117         }
118
119         bin.Close()
120         err = exec.Command("go", "build", "-o", bin.Name()).Run()
121         if err != nil {
122                 t.Fatal(err)
123         }
124
125         err = syscall.Flock(int(parentlockfile.Fd()), syscall.LOCK_EX)
126         if err != nil {
127                 t.Fatal(err)
128         }
129
130         cmd := exec.Command("bash", "-c", `
131 set -e
132 "$BINFILE" -cgroup-root=/none -ppid-check-interval=10ms -signal-on-dead-ppid="$SIGNUM" bash -c '
133     set -e
134     unlock() {
135         flock --unlock "$CHILDLOCKFD"
136         kill %1
137     }
138     trap unlock TERM
139     flock --exclusive "$CHILDLOCKFD"
140     echo -n "$$" > "$CHILDLOCKFILE"
141     flock --unlock "$PARENTLOCKFD"
142     sleep 20 </dev/null >/dev/null 2>/dev/null &
143     wait %1
144     unlock
145 ' &
146
147 # wait for inner bash to start, to ensure $BINFILE has seen this bash proc as its initial PPID
148 flock --exclusive "$PARENTLOCKFILE" true
149 `)
150         cmd.Env = append(os.Environ(),
151                 "SIGNUM="+fmt.Sprintf("%d", signum),
152                 "PARENTLOCKFD=3",
153                 "PARENTLOCKFILE="+parentlockfile.Name(),
154                 "CHILDLOCKFD=4",
155                 "CHILDLOCKFILE="+childlockfile.Name(),
156                 "BINFILE="+bin.Name())
157         cmd.ExtraFiles = []*os.File{parentlockfile, childlockfile}
158         stderr, err := cmd.StderrPipe()
159         if err != nil {
160                 t.Fatal(err)
161         }
162         stdout, err := cmd.StdoutPipe()
163         if err != nil {
164                 t.Fatal(err)
165         }
166         cmd.Start()
167         defer cmd.Wait()
168
169         var wg sync.WaitGroup
170         wg.Add(2)
171         defer wg.Wait()
172         for _, rdr := range []io.ReadCloser{stderr, stdout} {
173                 go func(rdr io.ReadCloser) {
174                         defer wg.Done()
175                         buf := make([]byte, 1024)
176                         for {
177                                 n, err := rdr.Read(buf)
178                                 if n > 0 {
179                                         t.Logf("%s", buf[:n])
180                                 }
181                                 if err != nil {
182                                         return
183                                 }
184                         }
185                 }(rdr)
186         }
187
188         // Wait until inner bash process releases parentlockfile
189         // (which means it has locked childlockfile and written its
190         // PID)
191         err = exec.Command("flock", "--exclusive", parentlockfile.Name(), "true").Run()
192         if err != nil {
193                 t.Fatal(err)
194         }
195
196         childDone := make(chan bool)
197         go func() {
198                 // Notify the main thread when the inner bash process
199                 // releases its lock on childlockfile (which means
200                 // either its sleep process ended or it received a
201                 // TERM signal).
202                 t0 := time.Now()
203                 err = exec.Command("flock", "--exclusive", childlockfile.Name(), "true").Run()
204                 if err != nil {
205                         t.Fatal(err)
206                 }
207                 t.Logf("child done after %s", time.Since(t0))
208                 close(childDone)
209         }()
210
211         select {
212         case <-time.After(500 * time.Millisecond):
213                 // Inner bash process is still alive after the timeout
214                 // period. Kill it now, so our stdout and stderr pipes
215                 // can finish and we don't leave a mess of child procs
216                 // behind.
217                 buf, err := ioutil.ReadFile(childlockfile.Name())
218                 if err != nil {
219                         t.Fatal(err)
220                 }
221                 var childPID int
222                 _, err = fmt.Sscanf(string(buf), "%d", &childPID)
223                 if err != nil {
224                         t.Fatal(err)
225                 }
226                 child, err := os.FindProcess(childPID)
227                 if err != nil {
228                         t.Fatal(err)
229                 }
230                 child.Signal(syscall.Signal(15))
231                 return true
232
233         case <-childDone:
234                 // Inner bash process ended soon after its grandparent
235                 // ended.
236                 return false
237         }
238 }