// ContainerRunner is the main stateful struct used for a single execution of a
// container.
type ContainerRunner struct {
- Docker ThinDockerClient
- client *arvados.Client
- ArvClient IArvadosClient
- Kc IKeepClient
- arvados.Container
+ Docker ThinDockerClient
+ client *arvados.Client
+ ArvClient IArvadosClient
+ Kc IKeepClient
+ Container arvados.Container
ContainerConfig dockercontainer.Config
- dockercontainer.HostConfig
- token string
- ContainerID string
- ExitCode *int
- NewLogWriter
- loggingDone chan bool
- CrunchLog *ThrottledLogger
- Stdout io.WriteCloser
- Stderr io.WriteCloser
- LogCollection arvados.CollectionFileSystem
- LogsPDH *string
- RunArvMount
- MkTempDir
- ArvMount *exec.Cmd
- ArvMountPoint string
- HostOutputDir string
- Binds []string
- Volumes map[string]struct{}
- OutputPDH *string
- SigChan chan os.Signal
- ArvMountExit chan error
- SecretMounts map[string]arvados.Mount
- MkArvClient func(token string) (IArvadosClient, error)
- finalState string
- parentTemp string
+ HostConfig dockercontainer.HostConfig
+ token string
+ ContainerID string
+ ExitCode *int
+ NewLogWriter NewLogWriter
+ loggingDone chan bool
+ CrunchLog *ThrottledLogger
+ Stdout io.WriteCloser
+ Stderr io.WriteCloser
+ logUUID string
+ logMtx sync.Mutex
+ LogCollection arvados.CollectionFileSystem
+ LogsPDH *string
+ RunArvMount RunArvMount
+ MkTempDir MkTempDir
+ ArvMount *exec.Cmd
+ ArvMountPoint string
+ HostOutputDir string
+ Binds []string
+ Volumes map[string]struct{}
+ OutputPDH *string
+ SigChan chan os.Signal
+ ArvMountExit chan error
+ SecretMounts map[string]arvados.Mount
+ MkArvClient func(token string) (IArvadosClient, error)
+ finalState string
+ parentTemp string
ListProcesses func() ([]PsProcess, error)
CgroupParent: runner.expectCgroupParent,
CgroupRoot: runner.cgroupRoot,
PollPeriod: runner.statInterval,
+ TempDir: runner.parentTemp,
}
runner.statReporter.Start()
return nil
}
}
+func (runner *ContainerRunner) updateLogs() {
+ ticker := time.NewTicker(crunchLogUpdatePeriod / 360)
+ defer ticker.Stop()
+
+ sigusr1 := make(chan os.Signal, 1)
+ signal.Notify(sigusr1, syscall.SIGUSR1)
+ defer signal.Stop(sigusr1)
+
+ saveAtTime := time.Now().Add(crunchLogUpdatePeriod)
+ saveAtSize := crunchLogUpdateSize
+ var savedSize int64
+ for {
+ select {
+ case <-ticker.C:
+ case <-sigusr1:
+ saveAtTime = time.Now()
+ }
+ runner.logMtx.Lock()
+ done := runner.LogsPDH != nil
+ runner.logMtx.Unlock()
+ if done {
+ return
+ }
+ size := runner.LogCollection.Size()
+ if size == savedSize || (time.Now().Before(saveAtTime) && size < saveAtSize) {
+ continue
+ }
+ saveAtTime = time.Now().Add(crunchLogUpdatePeriod)
+ saveAtSize = runner.LogCollection.Size() + crunchLogUpdateSize
+ saved, err := runner.saveLogCollection(false)
+ if err != nil {
+ runner.CrunchLog.Printf("error updating log collection: %s", err)
+ continue
+ }
+
+ var updated arvados.Container
+ err = runner.ArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{
+ "container": arvadosclient.Dict{"log": saved.PortableDataHash},
+ }, &updated)
+ if err != nil {
+ runner.CrunchLog.Printf("error updating container log to %s: %s", saved.PortableDataHash, err)
+ continue
+ }
+
+ savedSize = size
+ }
+}
+
// CaptureOutput saves data from the container's output directory if
// needed, and updates the container output accordingly.
func (runner *ContainerRunner) CaptureOutput() error {
// -- it exists only to send logs to other channels.
return nil
}
+ saved, err := runner.saveLogCollection(true)
+ if err != nil {
+ return fmt.Errorf("error saving log collection: %s", err)
+ }
+ runner.logMtx.Lock()
+ defer runner.logMtx.Unlock()
+ runner.LogsPDH = &saved.PortableDataHash
+ return nil
+}
+func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.Collection, err error) {
+ runner.logMtx.Lock()
+ defer runner.logMtx.Unlock()
+ if runner.LogsPDH != nil {
+ // Already finalized.
+ return
+ }
mt, err := runner.LogCollection.MarshalManifest(".")
if err != nil {
- return fmt.Errorf("While creating log manifest: %v", err)
- }
-
- var response arvados.Collection
- err = runner.ArvClient.Create("collections",
- arvadosclient.Dict{
- "ensure_unique_name": true,
- "collection": arvadosclient.Dict{
- "is_trashed": true,
- "name": "logs for " + runner.Container.UUID,
- "manifest_text": mt}},
- &response)
+ err = fmt.Errorf("error creating log manifest: %v", err)
+ return
+ }
+ updates := arvadosclient.Dict{
+ "name": "logs for " + runner.Container.UUID,
+ "manifest_text": mt,
+ }
+ if final {
+ updates["is_trashed"] = true
+ } else {
+ exp := time.Now().Add(crunchLogUpdatePeriod * 24)
+ updates["trash_at"] = exp
+ updates["delete_at"] = exp
+ }
+ reqBody := arvadosclient.Dict{"collection": updates}
+ if runner.logUUID == "" {
+ reqBody["ensure_unique_name"] = true
+ err = runner.ArvClient.Create("collections", reqBody, &response)
+ } else {
+ err = runner.ArvClient.Update("collections", runner.logUUID, reqBody, &response)
+ }
if err != nil {
- return fmt.Errorf("While creating log collection: %v", err)
+ return
}
- runner.LogsPDH = &response.PortableDataHash
- return nil
+ runner.logUUID = response.UUID
+ return
}
// UpdateContainerRunning updates the container state to "Running"
if err != nil {
return "", err
}
- runner.token = auth.APIToken
+ runner.token = fmt.Sprintf("v2/%s/%s/%s", auth.UUID, auth.APIToken, runner.Container.UUID)
return runner.token, nil
}
// hasn't already been assigned when Run() returns,
// this cleanup func will cause Run() to return the
// first non-nil error that is passed to checkErr().
- checkErr := func(e error) {
+ checkErr := func(errorIn string, e error) {
if e == nil {
return
}
- runner.CrunchLog.Print(e)
+ runner.CrunchLog.Printf("error in %s: %v", errorIn, e)
if err == nil {
err = e
}
}
// Log the error encountered in Run(), if any
- checkErr(err)
+ checkErr("Run", err)
if runner.finalState == "Queued" {
runner.UpdateContainerFinal()
// capture partial output and write logs
}
- checkErr(runner.CaptureOutput())
- checkErr(runner.stopHoststat())
- checkErr(runner.CommitLogs())
- checkErr(runner.UpdateContainerFinal())
+ checkErr("CaptureOutput", runner.CaptureOutput())
+ checkErr("stopHoststat", runner.stopHoststat())
+ checkErr("CommitLogs", runner.CommitLogs())
+ checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
}()
err = runner.fetchContainerRecord()
cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
loadLogThrottleParams(api)
+ go cr.updateLogs()
return cr, nil
}
cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
+ detach := flag.Bool("detach", false, "Detach from parent process and run in the background")
+ sleep := flag.Duration("sleep", 0, "Delay before starting (testing use only)")
+ kill := flag.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID")
+ list := flag.Bool("list", false, "List UUIDs of existing crunch-run processes")
enableNetwork := flag.String("container-enable-networking", "default",
`Specify if networking should be enabled for container. One of 'default', 'always':
default: only enable networking if container requests it.
memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
getVersion := flag.Bool("version", false, "Print version information and exit.")
checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
+
+ detached := false
+ if len(os.Args) > 1 && os.Args[1] == "-detached" {
+ // This process was invoked by a parent process, which
+ // has passed along its own arguments, including
+ // -detach, after the leading -detached flag. Strip
+ // the leading -detached flag (it's not recognized by
+ // flag.Parse()) ... and remember not to detach all
+ // over again in this process.
+ os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
+ detached = true
+ }
flag.Parse()
+ switch {
+ case *detach && !detached:
+ os.Exit(Detach(flag.Arg(0), os.Args, os.Stdout, os.Stderr))
+ case *kill >= 0:
+ os.Exit(KillProcess(flag.Arg(0), syscall.Signal(*kill), os.Stdout, os.Stderr))
+ case *list:
+ os.Exit(ListProcesses(os.Stdout, os.Stderr))
+ }
+
// Print version information if requested
if *getVersion {
fmt.Printf("crunch-run %s\n", version)
}
log.Printf("crunch-run %s started", version)
+ time.Sleep(*sleep)
containerId := flag.Arg(0)