X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/224f384d411bb1b4cccc7165c55bb64fd5c695ad..ca69f0bebc31124d9b61cec4b790d45a94bff379:/services/crunchstat/crunchstat.go diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go index cae95fdd9d..d28bee0f5e 100644 --- a/services/crunchstat/crunchstat.go +++ b/services/crunchstat/crunchstat.go @@ -1,8 +1,13 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + package main import ( "bufio" "flag" + "fmt" "io" "log" "os" @@ -11,30 +16,59 @@ import ( "syscall" "time" - "git.curoverse.com/arvados.git/lib/crunchstat" + "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/crunchstat" ) const MaxLogLine = 1 << 14 // Child stderr lines >16KiB will be split +var ( + signalOnDeadPPID int = 15 + ppidCheckInterval = time.Second + version = "dev" +) + +type logger interface { + Printf(string, ...interface{}) +} + func main() { reporter := crunchstat.Reporter{ Logger: log.New(os.Stderr, "crunchstat: ", 0), } - flag.StringVar(&reporter.CgroupRoot, "cgroup-root", "", "Root of cgroup tree") - flag.StringVar(&reporter.CgroupParent, "cgroup-parent", "", "Name of container parent under cgroup") - flag.StringVar(&reporter.CIDFile, "cgroup-cid", "", "Path to container id file") - pollMsec := flag.Int64("poll", 1000, "Reporting interval, in milliseconds") + flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + flags.StringVar(&reporter.CgroupRoot, "cgroup-root", "", "Root of cgroup tree") + flags.StringVar(&reporter.CgroupParent, "cgroup-parent", "", "Name of container parent under cgroup") + flags.StringVar(&reporter.CIDFile, "cgroup-cid", "", "Path to container id file") + flags.IntVar(&signalOnDeadPPID, "signal-on-dead-ppid", signalOnDeadPPID, "Signal to send child if crunchstat's parent process disappears (0 to disable)") + flags.DurationVar(&ppidCheckInterval, "ppid-check-interval", ppidCheckInterval, "Time between checks for parent process disappearance") + pollMsec := flags.Int64("poll", 1000, "Reporting interval, in milliseconds") + getVersion := flags.Bool("version", false, "Print version information and exit.") + + if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "program [args ...]", os.Stderr); !ok { + os.Exit(code) + } else if *getVersion { + fmt.Printf("crunchstat %s\n", version) + return + } else if flags.NArg() == 0 { + fmt.Fprintf(os.Stderr, "missing required argument: program (try -help)\n") + os.Exit(2) + } - flag.Parse() + reporter.Logger.Printf("crunchstat %s started", version) if reporter.CgroupRoot == "" { - reporter.Logger.Fatal("error: must provide -cgroup-root") + reporter.Logger.Printf("error: must provide -cgroup-root") + os.Exit(2) + } else if signalOnDeadPPID < 0 { + reporter.Logger.Printf("-signal-on-dead-ppid=%d is invalid (use a positive signal number, or 0 to disable)", signalOnDeadPPID) + os.Exit(2) } reporter.PollPeriod = time.Duration(*pollMsec) * time.Millisecond reporter.Start() - err := runCommand(flag.Args(), reporter.Logger) + err := runCommand(flags.Args(), reporter.Logger) reporter.Stop() if err, ok := err.(*exec.ExitError); ok { @@ -48,17 +82,19 @@ func main() { if status, ok := err.Sys().(syscall.WaitStatus); ok { os.Exit(status.ExitStatus()) } else { - reporter.Logger.Fatalln("ExitError without WaitStatus:", err) + reporter.Logger.Printf("ExitError without WaitStatus: %v", err) + os.Exit(1) } } else if err != nil { - reporter.Logger.Fatalln("error in cmd.Wait:", err) + reporter.Logger.Printf("error running command: %v", err) + os.Exit(1) } } -func runCommand(argv []string, logger *log.Logger) error { +func runCommand(argv []string, logger logger) error { cmd := exec.Command(argv[0], argv[1:]...) - logger.Println("Running", argv) + logger.Printf("Running %v", argv) // Child process will use our stdin and stdout pipes // (we close our copies below) @@ -72,32 +108,65 @@ func runCommand(argv []string, logger *log.Logger) error { if cmd.Process != nil { cmd.Process.Signal(catch) } - logger.Println("notice: caught signal:", catch) + logger.Printf("notice: caught signal: %v", catch) }(sigChan) signal.Notify(sigChan, syscall.SIGTERM) signal.Notify(sigChan, syscall.SIGINT) + // Kill our child proc if our parent process disappears + if signalOnDeadPPID != 0 { + go sendSignalOnDeadPPID(ppidCheckInterval, signalOnDeadPPID, os.Getppid(), cmd, logger) + } + // Funnel stderr through our channel - stderr_pipe, err := cmd.StderrPipe() + stderrPipe, err := cmd.StderrPipe() if err != nil { - logger.Fatalln("error in StderrPipe:", err) + logger.Printf("error in StderrPipe: %v", err) + return err } // Run subprocess if err := cmd.Start(); err != nil { - logger.Fatalln("error in cmd.Start:", err) + logger.Printf("error in cmd.Start: %v", err) + return err } // Close stdin/stdout in this (parent) process os.Stdin.Close() os.Stdout.Close() - copyPipeToChildLog(stderr_pipe, log.New(os.Stderr, "", 0)) + err = copyPipeToChildLog(stderrPipe, log.New(os.Stderr, "", 0)) + if err != nil { + cmd.Process.Kill() + return err + } return cmd.Wait() } -func copyPipeToChildLog(in io.ReadCloser, logger *log.Logger) { +func sendSignalOnDeadPPID(intvl time.Duration, signum, ppidOrig int, cmd *exec.Cmd, logger logger) { + ticker := time.NewTicker(intvl) + for range ticker.C { + ppid := os.Getppid() + if ppid == ppidOrig { + continue + } + if cmd.Process == nil { + // Child process isn't running yet + continue + } + logger.Printf("notice: crunchstat ppid changed from %d to %d -- killing child pid %d with signal %d", ppidOrig, ppid, cmd.Process.Pid, signum) + err := cmd.Process.Signal(syscall.Signal(signum)) + if err != nil { + logger.Printf("error: sending signal: %s", err) + continue + } + ticker.Stop() + break + } +} + +func copyPipeToChildLog(in io.ReadCloser, logger logger) error { reader := bufio.NewReaderSize(in, MaxLogLine) var prefix string for { @@ -105,13 +174,13 @@ func copyPipeToChildLog(in io.ReadCloser, logger *log.Logger) { if err == io.EOF { break } else if err != nil { - logger.Fatal("error reading child stderr:", err) + return fmt.Errorf("error reading child stderr: %w", err) } var suffix string if isPrefix { suffix = "[...]" } - logger.Print(prefix, string(line), suffix) + logger.Printf("%s%s%s", prefix, string(line), suffix) // Set up prefix for following line if isPrefix { prefix = "[...]" @@ -119,5 +188,5 @@ func copyPipeToChildLog(in io.ReadCloser, logger *log.Logger) { prefix = "" } } - in.Close() + return in.Close() }