8 "git.curoverse.com/arvados.git/lib/crunchstat"
9 "git.curoverse.com/arvados.git/sdk/go/arvados"
10 "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
11 "git.curoverse.com/arvados.git/sdk/go/keepclient"
12 "git.curoverse.com/arvados.git/sdk/go/manifest"
13 "github.com/curoverse/dockerclient"
28 // IArvadosClient is the minimal Arvados API methods used by crunch-run.
29 type IArvadosClient interface {
30 Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error
31 Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
32 Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
33 Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error
34 Discovery(key string) (interface{}, error)
37 // ErrCancelled is the error returned when the container is cancelled.
38 var ErrCancelled = errors.New("Cancelled")
40 // IKeepClient is the minimal Keep API methods used by crunch-run.
41 type IKeepClient interface {
42 PutHB(hash string, buf []byte) (string, int, error)
43 ManifestFileReader(m manifest.Manifest, filename string) (keepclient.ReadCloserWithLen, error)
46 // NewLogWriter is a factory function to create a new log writer.
47 type NewLogWriter func(name string) io.WriteCloser
49 type RunArvMount func(args []string, tok string) (*exec.Cmd, error)
51 type MkTempDir func(string, string) (string, error)
53 // ThinDockerClient is the minimal Docker client interface used by crunch-run.
54 type ThinDockerClient interface {
55 StopContainer(id string, timeout int) error
56 InspectImage(id string) (*dockerclient.ImageInfo, error)
57 LoadImage(reader io.Reader) error
58 CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error)
59 StartContainer(id string, config *dockerclient.HostConfig) error
60 AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error)
61 Wait(id string) <-chan dockerclient.WaitResult
62 RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error)
65 // ContainerRunner is the main stateful struct used for a single execution of a
67 type ContainerRunner struct {
68 Docker ThinDockerClient
69 ArvClient IArvadosClient
72 dockerclient.ContainerConfig
73 dockerclient.HostConfig
79 CrunchLog *ThrottledLogger
81 Stderr *ThrottledLogger
82 LogCollection *CollectionWriter
89 CleanupTempDir []string
94 SigChan chan os.Signal
95 ArvMountExit chan error
97 trashLifetime time.Duration
99 statLogger io.WriteCloser
100 statReporter *crunchstat.Reporter
101 statInterval time.Duration
103 // What we expect the container's cgroup parent to be.
104 expectCgroupParent string
105 // What we tell docker to use as the container's cgroup
106 // parent. Note: Ideally we would use the same field for both
107 // expectCgroupParent and setCgroupParent, and just make it
108 // default to "docker". However, when using docker < 1.10 with
109 // systemd, specifying a non-empty cgroup parent (even the
110 // default value "docker") hits a docker bug
111 // (https://github.com/docker/docker/issues/17126). Using two
112 // separate fields makes it possible to use the "expect cgroup
113 // parent to be X" feature even on sites where the "specify
114 // cgroup parent" feature breaks.
115 setCgroupParent string
118 // SetupSignals sets up signal handling to gracefully terminate the underlying
119 // Docker container and update state when receiving a TERM, INT or QUIT signal.
120 func (runner *ContainerRunner) SetupSignals() {
121 runner.SigChan = make(chan os.Signal, 1)
122 signal.Notify(runner.SigChan, syscall.SIGTERM)
123 signal.Notify(runner.SigChan, syscall.SIGINT)
124 signal.Notify(runner.SigChan, syscall.SIGQUIT)
126 go func(sig <-chan os.Signal) {
128 if !runner.Cancelled {
129 runner.CancelLock.Lock()
130 runner.Cancelled = true
131 if runner.ContainerID != "" {
132 runner.Docker.StopContainer(runner.ContainerID, 10)
134 runner.CancelLock.Unlock()
140 // LoadImage determines the docker image id from the container record and
141 // checks if it is available in the local Docker image store. If not, it loads
142 // the image from Keep.
143 func (runner *ContainerRunner) LoadImage() (err error) {
145 runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.Container.ContainerImage)
147 var collection arvados.Collection
148 err = runner.ArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
150 return fmt.Errorf("While getting container image collection: %v", err)
152 manifest := manifest.Manifest{Text: collection.ManifestText}
153 var img, imageID string
154 for ms := range manifest.StreamIter() {
155 img = ms.FileStreamSegments[0].Name
156 if !strings.HasSuffix(img, ".tar") {
157 return fmt.Errorf("First file in the container image collection does not end in .tar")
159 imageID = img[:len(img)-4]
162 runner.CrunchLog.Printf("Using Docker image id '%s'", imageID)
164 _, err = runner.Docker.InspectImage(imageID)
166 runner.CrunchLog.Print("Loading Docker image from keep")
168 var readCloser io.ReadCloser
169 readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
171 return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
174 err = runner.Docker.LoadImage(readCloser)
176 return fmt.Errorf("While loading container image into Docker: %v", err)
179 runner.CrunchLog.Print("Docker image is available")
182 runner.ContainerConfig.Image = imageID
187 func (runner *ContainerRunner) ArvMountCmd(arvMountCmd []string, token string) (c *exec.Cmd, err error) {
188 c = exec.Command("arv-mount", arvMountCmd...)
190 // Copy our environment, but override ARVADOS_API_TOKEN with
191 // the container auth token.
193 for _, s := range os.Environ() {
194 if !strings.HasPrefix(s, "ARVADOS_API_TOKEN=") {
195 c.Env = append(c.Env, s)
198 c.Env = append(c.Env, "ARVADOS_API_TOKEN="+token)
200 nt := NewThrottledLogger(runner.NewLogWriter("arv-mount"))
209 statReadme := make(chan bool)
210 runner.ArvMountExit = make(chan error)
215 time.Sleep(100 * time.Millisecond)
216 _, err = os.Stat(fmt.Sprintf("%s/by_id/README", runner.ArvMountPoint))
226 runner.ArvMountExit <- c.Wait()
227 close(runner.ArvMountExit)
233 case err := <-runner.ArvMountExit:
234 runner.ArvMount = nil
242 func (runner *ContainerRunner) SetupMounts() (err error) {
243 runner.ArvMountPoint, err = runner.MkTempDir("", "keep")
245 return fmt.Errorf("While creating keep mount temp dir: %v", err)
248 runner.CleanupTempDir = append(runner.CleanupTempDir, runner.ArvMountPoint)
252 arvMountCmd := []string{"--foreground", "--allow-other", "--read-write"}
254 if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
255 arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
258 collectionPaths := []string{}
260 needCertMount := true
262 for bind, mnt := range runner.Container.Mounts {
263 if bind == "stdout" {
264 // Is it a "file" mount kind?
265 if mnt.Kind != "file" {
266 return fmt.Errorf("Unsupported mount kind '%s' for stdout. Only 'file' is supported.", mnt.Kind)
269 // Does path start with OutputPath?
270 prefix := runner.Container.OutputPath
271 if !strings.HasSuffix(prefix, "/") {
274 if !strings.HasPrefix(mnt.Path, prefix) {
275 return fmt.Errorf("Stdout path does not start with OutputPath: %s, %s", mnt.Path, prefix)
278 if bind == "/etc/arvados/ca-certificates.crt" {
279 needCertMount = false
283 case mnt.Kind == "collection":
285 if mnt.UUID != "" && mnt.PortableDataHash != "" {
286 return fmt.Errorf("Cannot specify both 'uuid' and 'portable_data_hash' for a collection mount")
290 return fmt.Errorf("Writing to existing collections currently not permitted.")
293 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.UUID)
294 } else if mnt.PortableDataHash != "" {
296 return fmt.Errorf("Can never write to a collection specified by portable data hash")
298 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.PortableDataHash)
300 src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
301 arvMountCmd = append(arvMountCmd, "--mount-tmp")
302 arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
306 if bind == runner.Container.OutputPath {
307 runner.HostOutputDir = src
309 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", src, bind))
311 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", src, bind))
313 collectionPaths = append(collectionPaths, src)
315 case mnt.Kind == "tmp" && bind == runner.Container.OutputPath:
316 runner.HostOutputDir, err = runner.MkTempDir("", "")
318 return fmt.Errorf("While creating mount temp dir: %v", err)
320 st, staterr := os.Stat(runner.HostOutputDir)
322 return fmt.Errorf("While Stat on temp dir: %v", staterr)
324 err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
326 return fmt.Errorf("While Chmod temp dir: %v", err)
328 runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
329 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
331 case mnt.Kind == "tmp":
332 runner.Binds = append(runner.Binds, bind)
334 case mnt.Kind == "json":
335 jsondata, err := json.Marshal(mnt.Content)
337 return fmt.Errorf("encoding json data: %v", err)
339 // Create a tempdir with a single file
340 // (instead of just a tempfile): this way we
341 // can ensure the file is world-readable
342 // inside the container, without having to
343 // make it world-readable on the docker host.
344 tmpdir, err := runner.MkTempDir("", "")
346 return fmt.Errorf("creating temp dir: %v", err)
348 runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
349 tmpfn := filepath.Join(tmpdir, "mountdata.json")
350 err = ioutil.WriteFile(tmpfn, jsondata, 0644)
352 return fmt.Errorf("writing temp file: %v", err)
354 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", tmpfn, bind))
358 if runner.HostOutputDir == "" {
359 return fmt.Errorf("Output path does not correspond to a writable mount point")
362 if wantAPI := runner.Container.RuntimeConstraints.API; needCertMount && wantAPI != nil && *wantAPI {
363 for _, certfile := range arvadosclient.CertFiles {
364 _, err := os.Stat(certfile)
366 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:/etc/arvados/ca-certificates.crt:ro", certfile))
373 arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
375 arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
377 arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
379 token, err := runner.ContainerToken()
381 return fmt.Errorf("could not get container token: %s", err)
384 runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
386 return fmt.Errorf("While trying to start arv-mount: %v", err)
389 for _, p := range collectionPaths {
392 return fmt.Errorf("While checking that input files exist: %v", err)
399 func (runner *ContainerRunner) ProcessDockerAttach(containerReader io.Reader) {
400 // Handle docker log protocol
401 // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
403 header := make([]byte, 8)
405 _, readerr := io.ReadAtLeast(containerReader, header, 8)
408 readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
411 _, readerr = io.CopyN(runner.Stdout, containerReader, readsize)
414 _, readerr = io.CopyN(runner.Stderr, containerReader, readsize)
419 if readerr != io.EOF {
420 runner.CrunchLog.Printf("While reading docker logs: %v", readerr)
423 closeerr := runner.Stdout.Close()
425 runner.CrunchLog.Printf("While closing stdout logs: %v", closeerr)
428 closeerr = runner.Stderr.Close()
430 runner.CrunchLog.Printf("While closing stderr logs: %v", closeerr)
433 if runner.statReporter != nil {
434 runner.statReporter.Stop()
435 closeerr = runner.statLogger.Close()
437 runner.CrunchLog.Printf("While closing crunchstat logs: %v", closeerr)
441 runner.loggingDone <- true
442 close(runner.loggingDone)
448 func (runner *ContainerRunner) StartCrunchstat() {
449 runner.statLogger = NewThrottledLogger(runner.NewLogWriter("crunchstat"))
450 runner.statReporter = &crunchstat.Reporter{
451 CID: runner.ContainerID,
452 Logger: log.New(runner.statLogger, "", 0),
453 CgroupParent: runner.expectCgroupParent,
454 CgroupRoot: runner.cgroupRoot,
455 PollPeriod: runner.statInterval,
457 runner.statReporter.Start()
460 // AttachLogs connects the docker container stdout and stderr logs to the
461 // Arvados logger which logs to Keep and the API server logs table.
462 func (runner *ContainerRunner) AttachStreams() (err error) {
464 runner.CrunchLog.Print("Attaching container streams")
466 var containerReader io.Reader
467 containerReader, err = runner.Docker.AttachContainer(runner.ContainerID,
468 &dockerclient.AttachOptions{Stream: true, Stdout: true, Stderr: true})
470 return fmt.Errorf("While attaching container stdout/stderr streams: %v", err)
473 runner.loggingDone = make(chan bool)
475 if stdoutMnt, ok := runner.Container.Mounts["stdout"]; ok {
476 stdoutPath := stdoutMnt.Path[len(runner.Container.OutputPath):]
477 index := strings.LastIndex(stdoutPath, "/")
479 subdirs := stdoutPath[:index]
481 st, err := os.Stat(runner.HostOutputDir)
483 return fmt.Errorf("While Stat on temp dir: %v", err)
485 stdoutPath := path.Join(runner.HostOutputDir, subdirs)
486 err = os.MkdirAll(stdoutPath, st.Mode()|os.ModeSetgid|0777)
488 return fmt.Errorf("While MkdirAll %q: %v", stdoutPath, err)
492 stdoutFile, err := os.Create(path.Join(runner.HostOutputDir, stdoutPath))
494 return fmt.Errorf("While creating stdout file: %v", err)
496 runner.Stdout = stdoutFile
498 runner.Stdout = NewThrottledLogger(runner.NewLogWriter("stdout"))
500 runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
502 go runner.ProcessDockerAttach(containerReader)
507 // CreateContainer creates the docker container.
508 func (runner *ContainerRunner) CreateContainer() error {
509 runner.CrunchLog.Print("Creating Docker container")
511 runner.ContainerConfig.Cmd = runner.Container.Command
512 if runner.Container.Cwd != "." {
513 runner.ContainerConfig.WorkingDir = runner.Container.Cwd
516 for k, v := range runner.Container.Environment {
517 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env, k+"="+v)
519 if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
520 tok, err := runner.ContainerToken()
524 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env,
525 "ARVADOS_API_TOKEN="+tok,
526 "ARVADOS_API_HOST="+os.Getenv("ARVADOS_API_HOST"),
527 "ARVADOS_API_HOST_INSECURE="+os.Getenv("ARVADOS_API_HOST_INSECURE"),
529 runner.ContainerConfig.NetworkDisabled = false
531 runner.ContainerConfig.NetworkDisabled = true
535 runner.ContainerID, err = runner.Docker.CreateContainer(&runner.ContainerConfig, "", nil)
537 return fmt.Errorf("While creating container: %v", err)
540 runner.HostConfig = dockerclient.HostConfig{
542 CgroupParent: runner.setCgroupParent,
543 LogConfig: dockerclient.LogConfig{
548 return runner.AttachStreams()
551 // StartContainer starts the docker container created by CreateContainer.
552 func (runner *ContainerRunner) StartContainer() error {
553 runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
554 err := runner.Docker.StartContainer(runner.ContainerID, &runner.HostConfig)
556 return fmt.Errorf("could not start container: %v", err)
561 // WaitFinish waits for the container to terminate, capture the exit code, and
562 // close the stdout/stderr logging.
563 func (runner *ContainerRunner) WaitFinish() error {
564 runner.CrunchLog.Print("Waiting for container to finish")
566 result := runner.Docker.Wait(runner.ContainerID)
569 return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
571 runner.ExitCode = &wr.ExitCode
573 // wait for stdout/stderr to complete
579 // HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
580 func (runner *ContainerRunner) CaptureOutput() error {
581 if runner.finalState != "Complete" {
585 if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
586 // Output may have been set directly by the container, so
587 // refresh the container record to check.
588 err := runner.ArvClient.Get("containers", runner.Container.UUID,
589 nil, &runner.Container)
593 if runner.Container.Output != "" {
594 // Container output is already set.
595 runner.OutputPDH = &runner.Container.Output
600 if runner.HostOutputDir == "" {
604 _, err := os.Stat(runner.HostOutputDir)
606 return fmt.Errorf("While checking host output path: %v", err)
609 var manifestText string
611 collectionMetafile := fmt.Sprintf("%s/.arvados#collection", runner.HostOutputDir)
612 _, err = os.Stat(collectionMetafile)
615 cw := CollectionWriter{runner.Kc, nil, sync.Mutex{}}
616 manifestText, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
618 return fmt.Errorf("While uploading output files: %v", err)
621 // FUSE mount directory
622 file, openerr := os.Open(collectionMetafile)
624 return fmt.Errorf("While opening FUSE metafile: %v", err)
628 var rec arvados.Collection
629 err = json.NewDecoder(file).Decode(&rec)
631 return fmt.Errorf("While reading FUSE metafile: %v", err)
633 manifestText = rec.ManifestText
636 var response arvados.Collection
637 err = runner.ArvClient.Create("collections",
639 "collection": arvadosclient.Dict{
640 "trash_at": time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
641 "name": "output for " + runner.Container.UUID,
642 "manifest_text": manifestText}},
645 return fmt.Errorf("While creating output collection: %v", err)
647 runner.OutputPDH = &response.PortableDataHash
651 func (runner *ContainerRunner) loadDiscoveryVars() {
652 tl, err := runner.ArvClient.Discovery("defaultTrashLifetime")
654 log.Fatalf("getting defaultTrashLifetime from discovery document: %s", err)
656 runner.trashLifetime = time.Duration(tl.(float64)) * time.Second
659 func (runner *ContainerRunner) CleanupDirs() {
660 if runner.ArvMount != nil {
661 umount := exec.Command("fusermount", "-z", "-u", runner.ArvMountPoint)
662 umnterr := umount.Run()
664 runner.CrunchLog.Printf("While running fusermount: %v", umnterr)
667 mnterr := <-runner.ArvMountExit
669 runner.CrunchLog.Printf("Arv-mount exit error: %v", mnterr)
673 for _, tmpdir := range runner.CleanupTempDir {
674 rmerr := os.RemoveAll(tmpdir)
676 runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", tmpdir, rmerr)
681 // CommitLogs posts the collection containing the final container logs.
682 func (runner *ContainerRunner) CommitLogs() error {
683 runner.CrunchLog.Print(runner.finalState)
684 runner.CrunchLog.Close()
686 // Closing CrunchLog above allows it to be committed to Keep at this
687 // point, but re-open crunch log with ArvClient in case there are any
688 // other further (such as failing to write the log to Keep!) while
690 runner.CrunchLog = NewThrottledLogger(&ArvLogWriter{runner.ArvClient, runner.Container.UUID,
693 if runner.LogsPDH != nil {
694 // If we have already assigned something to LogsPDH,
695 // we must be closing the re-opened log, which won't
696 // end up getting attached to the container record and
697 // therefore doesn't need to be saved as a collection
698 // -- it exists only to send logs to other channels.
702 mt, err := runner.LogCollection.ManifestText()
704 return fmt.Errorf("While creating log manifest: %v", err)
707 var response arvados.Collection
708 err = runner.ArvClient.Create("collections",
710 "collection": arvadosclient.Dict{
711 "trash_at": time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
712 "name": "logs for " + runner.Container.UUID,
713 "manifest_text": mt}},
716 return fmt.Errorf("While creating log collection: %v", err)
718 runner.LogsPDH = &response.PortableDataHash
722 // UpdateContainerRunning updates the container state to "Running"
723 func (runner *ContainerRunner) UpdateContainerRunning() error {
724 runner.CancelLock.Lock()
725 defer runner.CancelLock.Unlock()
726 if runner.Cancelled {
729 return runner.ArvClient.Update("containers", runner.Container.UUID,
730 arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
733 // ContainerToken returns the api_token the container (and any
734 // arv-mount processes) are allowed to use.
735 func (runner *ContainerRunner) ContainerToken() (string, error) {
736 if runner.token != "" {
737 return runner.token, nil
740 var auth arvados.APIClientAuthorization
741 err := runner.ArvClient.Call("GET", "containers", runner.Container.UUID, "auth", nil, &auth)
745 runner.token = auth.APIToken
746 return runner.token, nil
749 // UpdateContainerComplete updates the container record state on API
750 // server to "Complete" or "Cancelled"
751 func (runner *ContainerRunner) UpdateContainerFinal() error {
752 update := arvadosclient.Dict{}
753 update["state"] = runner.finalState
754 if runner.LogsPDH != nil {
755 update["log"] = *runner.LogsPDH
757 if runner.finalState == "Complete" {
758 if runner.ExitCode != nil {
759 update["exit_code"] = *runner.ExitCode
761 if runner.OutputPDH != nil {
762 update["output"] = *runner.OutputPDH
765 return runner.ArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil)
768 // IsCancelled returns the value of Cancelled, with goroutine safety.
769 func (runner *ContainerRunner) IsCancelled() bool {
770 runner.CancelLock.Lock()
771 defer runner.CancelLock.Unlock()
772 return runner.Cancelled
775 // NewArvLogWriter creates an ArvLogWriter
776 func (runner *ContainerRunner) NewArvLogWriter(name string) io.WriteCloser {
777 return &ArvLogWriter{runner.ArvClient, runner.Container.UUID, name, runner.LogCollection.Open(name + ".txt")}
780 // Run the full container lifecycle.
781 func (runner *ContainerRunner) Run() (err error) {
782 runner.CrunchLog.Printf("Executing container '%s'", runner.Container.UUID)
784 hostname, hosterr := os.Hostname()
786 runner.CrunchLog.Printf("Error getting hostname '%v'", hosterr)
788 runner.CrunchLog.Printf("Executing on host '%s'", hostname)
791 // Clean up temporary directories _after_ finalizing
792 // everything (if we've made any by then)
793 defer runner.CleanupDirs()
795 runner.finalState = "Queued"
798 // checkErr prints e (unless it's nil) and sets err to
799 // e (unless err is already non-nil). Thus, if err
800 // hasn't already been assigned when Run() returns,
801 // this cleanup func will cause Run() to return the
802 // first non-nil error that is passed to checkErr().
803 checkErr := func(e error) {
807 runner.CrunchLog.Print(e)
813 // Log the error encountered in Run(), if any
816 if runner.finalState == "Queued" {
817 runner.CrunchLog.Close()
818 runner.UpdateContainerFinal()
822 if runner.IsCancelled() {
823 runner.finalState = "Cancelled"
824 // but don't return yet -- we still want to
825 // capture partial output and write logs
828 checkErr(runner.CaptureOutput())
829 checkErr(runner.CommitLogs())
830 checkErr(runner.UpdateContainerFinal())
832 // The real log is already closed, but then we opened
833 // a new one in case we needed to log anything while
835 runner.CrunchLog.Close()
838 err = runner.ArvClient.Get("containers", runner.Container.UUID, nil, &runner.Container)
840 err = fmt.Errorf("While getting container record: %v", err)
844 // setup signal handling
845 runner.SetupSignals()
847 // check for and/or load image
848 err = runner.LoadImage()
850 runner.finalState = "Cancelled"
851 err = fmt.Errorf("While loading container image: %v", err)
855 // set up FUSE mount and binds
856 err = runner.SetupMounts()
858 runner.finalState = "Cancelled"
859 err = fmt.Errorf("While setting up mounts: %v", err)
863 err = runner.CreateContainer()
868 runner.StartCrunchstat()
870 if runner.IsCancelled() {
874 err = runner.UpdateContainerRunning()
878 runner.finalState = "Cancelled"
880 err = runner.StartContainer()
885 err = runner.WaitFinish()
887 runner.finalState = "Complete"
892 // NewContainerRunner creates a new container runner.
893 func NewContainerRunner(api IArvadosClient,
895 docker ThinDockerClient,
896 containerUUID string) *ContainerRunner {
898 cr := &ContainerRunner{ArvClient: api, Kc: kc, Docker: docker}
899 cr.NewLogWriter = cr.NewArvLogWriter
900 cr.RunArvMount = cr.ArvMountCmd
901 cr.MkTempDir = ioutil.TempDir
902 cr.LogCollection = &CollectionWriter{kc, nil, sync.Mutex{}}
903 cr.Container.UUID = containerUUID
904 cr.CrunchLog = NewThrottledLogger(cr.NewLogWriter("crunch-run"))
905 cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
906 cr.loadDiscoveryVars()
911 statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
912 cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
913 cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
914 cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
915 caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
918 containerId := flag.Arg(0)
920 if *caCertsPath != "" {
921 arvadosclient.CertFiles = []string{*caCertsPath}
924 api, err := arvadosclient.MakeArvadosClient()
926 log.Fatalf("%s: %v", containerId, err)
930 var kc *keepclient.KeepClient
931 kc, err = keepclient.MakeKeepClient(api)
933 log.Fatalf("%s: %v", containerId, err)
937 var docker *dockerclient.DockerClient
938 docker, err = dockerclient.NewDockerClient("unix:///var/run/docker.sock", nil)
940 log.Fatalf("%s: %v", containerId, err)
943 cr := NewContainerRunner(api, kc, docker, containerId)
944 cr.statInterval = *statInterval
945 cr.cgroupRoot = *cgroupRoot
946 cr.expectCgroupParent = *cgroupParent
947 if *cgroupParentSubsystem != "" {
948 p := findCgroup(*cgroupParentSubsystem)
949 cr.setCgroupParent = p
950 cr.expectCgroupParent = p
955 log.Fatalf("%s: %v", containerId, err)