971cb3a27a246c9fbe1a325496a6da63e6e69ac4
[arvados.git] / services / crunch-run / crunchrun.go
1 package main
2
3 import (
4         "encoding/json"
5         "errors"
6         "flag"
7         "fmt"
8         "git.curoverse.com/arvados.git/lib/crunchstat"
9         "git.curoverse.com/arvados.git/sdk/go/arvados"
10         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
11         "git.curoverse.com/arvados.git/sdk/go/keepclient"
12         "git.curoverse.com/arvados.git/sdk/go/manifest"
13         "github.com/curoverse/dockerclient"
14         "io"
15         "io/ioutil"
16         "log"
17         "os"
18         "os/exec"
19         "os/signal"
20         "path"
21         "path/filepath"
22         "strings"
23         "sync"
24         "syscall"
25         "time"
26 )
27
28 // IArvadosClient is the minimal Arvados API methods used by crunch-run.
29 type IArvadosClient interface {
30         Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error
31         Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
32         Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
33         Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error
34         Discovery(key string) (interface{}, error)
35 }
36
37 // ErrCancelled is the error returned when the container is cancelled.
38 var ErrCancelled = errors.New("Cancelled")
39
40 // IKeepClient is the minimal Keep API methods used by crunch-run.
41 type IKeepClient interface {
42         PutHB(hash string, buf []byte) (string, int, error)
43         ManifestFileReader(m manifest.Manifest, filename string) (keepclient.ReadCloserWithLen, error)
44 }
45
46 // NewLogWriter is a factory function to create a new log writer.
47 type NewLogWriter func(name string) io.WriteCloser
48
49 type RunArvMount func(args []string, tok string) (*exec.Cmd, error)
50
51 type MkTempDir func(string, string) (string, error)
52
53 // ThinDockerClient is the minimal Docker client interface used by crunch-run.
54 type ThinDockerClient interface {
55         StopContainer(id string, timeout int) error
56         InspectImage(id string) (*dockerclient.ImageInfo, error)
57         LoadImage(reader io.Reader) error
58         CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error)
59         StartContainer(id string, config *dockerclient.HostConfig) error
60         AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error)
61         Wait(id string) <-chan dockerclient.WaitResult
62         RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error)
63 }
64
65 // ContainerRunner is the main stateful struct used for a single execution of a
66 // container.
67 type ContainerRunner struct {
68         Docker    ThinDockerClient
69         ArvClient IArvadosClient
70         Kc        IKeepClient
71         arvados.Container
72         dockerclient.ContainerConfig
73         dockerclient.HostConfig
74         token       string
75         ContainerID string
76         ExitCode    *int
77         NewLogWriter
78         loggingDone   chan bool
79         CrunchLog     *ThrottledLogger
80         Stdout        io.WriteCloser
81         Stderr        *ThrottledLogger
82         LogCollection *CollectionWriter
83         LogsPDH       *string
84         RunArvMount
85         MkTempDir
86         ArvMount       *exec.Cmd
87         ArvMountPoint  string
88         HostOutputDir  string
89         CleanupTempDir []string
90         Binds          []string
91         OutputPDH      *string
92         CancelLock     sync.Mutex
93         Cancelled      bool
94         SigChan        chan os.Signal
95         ArvMountExit   chan error
96         finalState     string
97         trashLifetime  time.Duration
98
99         statLogger   io.WriteCloser
100         statReporter *crunchstat.Reporter
101         statInterval time.Duration
102         cgroupRoot   string
103         // What we expect the container's cgroup parent to be.
104         expectCgroupParent string
105         // What we tell docker to use as the container's cgroup
106         // parent. Note: Ideally we would use the same field for both
107         // expectCgroupParent and setCgroupParent, and just make it
108         // default to "docker". However, when using docker < 1.10 with
109         // systemd, specifying a non-empty cgroup parent (even the
110         // default value "docker") hits a docker bug
111         // (https://github.com/docker/docker/issues/17126). Using two
112         // separate fields makes it possible to use the "expect cgroup
113         // parent to be X" feature even on sites where the "specify
114         // cgroup parent" feature breaks.
115         setCgroupParent string
116 }
117
118 // SetupSignals sets up signal handling to gracefully terminate the underlying
119 // Docker container and update state when receiving a TERM, INT or QUIT signal.
120 func (runner *ContainerRunner) SetupSignals() {
121         runner.SigChan = make(chan os.Signal, 1)
122         signal.Notify(runner.SigChan, syscall.SIGTERM)
123         signal.Notify(runner.SigChan, syscall.SIGINT)
124         signal.Notify(runner.SigChan, syscall.SIGQUIT)
125
126         go func(sig <-chan os.Signal) {
127                 for range sig {
128                         if !runner.Cancelled {
129                                 runner.CancelLock.Lock()
130                                 runner.Cancelled = true
131                                 if runner.ContainerID != "" {
132                                         runner.Docker.StopContainer(runner.ContainerID, 10)
133                                 }
134                                 runner.CancelLock.Unlock()
135                         }
136                 }
137         }(runner.SigChan)
138 }
139
140 // LoadImage determines the docker image id from the container record and
141 // checks if it is available in the local Docker image store.  If not, it loads
142 // the image from Keep.
143 func (runner *ContainerRunner) LoadImage() (err error) {
144
145         runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.Container.ContainerImage)
146
147         var collection arvados.Collection
148         err = runner.ArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
149         if err != nil {
150                 return fmt.Errorf("While getting container image collection: %v", err)
151         }
152         manifest := manifest.Manifest{Text: collection.ManifestText}
153         var img, imageID string
154         for ms := range manifest.StreamIter() {
155                 img = ms.FileStreamSegments[0].Name
156                 if !strings.HasSuffix(img, ".tar") {
157                         return fmt.Errorf("First file in the container image collection does not end in .tar")
158                 }
159                 imageID = img[:len(img)-4]
160         }
161
162         runner.CrunchLog.Printf("Using Docker image id '%s'", imageID)
163
164         _, err = runner.Docker.InspectImage(imageID)
165         if err != nil {
166                 runner.CrunchLog.Print("Loading Docker image from keep")
167
168                 var readCloser io.ReadCloser
169                 readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
170                 if err != nil {
171                         return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
172                 }
173
174                 err = runner.Docker.LoadImage(readCloser)
175                 if err != nil {
176                         return fmt.Errorf("While loading container image into Docker: %v", err)
177                 }
178         } else {
179                 runner.CrunchLog.Print("Docker image is available")
180         }
181
182         runner.ContainerConfig.Image = imageID
183
184         return nil
185 }
186
187 func (runner *ContainerRunner) ArvMountCmd(arvMountCmd []string, token string) (c *exec.Cmd, err error) {
188         c = exec.Command("arv-mount", arvMountCmd...)
189
190         // Copy our environment, but override ARVADOS_API_TOKEN with
191         // the container auth token.
192         c.Env = nil
193         for _, s := range os.Environ() {
194                 if !strings.HasPrefix(s, "ARVADOS_API_TOKEN=") {
195                         c.Env = append(c.Env, s)
196                 }
197         }
198         c.Env = append(c.Env, "ARVADOS_API_TOKEN="+token)
199
200         nt := NewThrottledLogger(runner.NewLogWriter("arv-mount"))
201         c.Stdout = nt
202         c.Stderr = nt
203
204         err = c.Start()
205         if err != nil {
206                 return nil, err
207         }
208
209         statReadme := make(chan bool)
210         runner.ArvMountExit = make(chan error)
211
212         keepStatting := true
213         go func() {
214                 for keepStatting {
215                         time.Sleep(100 * time.Millisecond)
216                         _, err = os.Stat(fmt.Sprintf("%s/by_id/README", runner.ArvMountPoint))
217                         if err == nil {
218                                 keepStatting = false
219                                 statReadme <- true
220                         }
221                 }
222                 close(statReadme)
223         }()
224
225         go func() {
226                 runner.ArvMountExit <- c.Wait()
227                 close(runner.ArvMountExit)
228         }()
229
230         select {
231         case <-statReadme:
232                 break
233         case err := <-runner.ArvMountExit:
234                 runner.ArvMount = nil
235                 keepStatting = false
236                 return nil, err
237         }
238
239         return c, nil
240 }
241
242 func (runner *ContainerRunner) SetupMounts() (err error) {
243         runner.ArvMountPoint, err = runner.MkTempDir("", "keep")
244         if err != nil {
245                 return fmt.Errorf("While creating keep mount temp dir: %v", err)
246         }
247
248         runner.CleanupTempDir = append(runner.CleanupTempDir, runner.ArvMountPoint)
249
250         pdhOnly := true
251         tmpcount := 0
252         arvMountCmd := []string{"--foreground", "--allow-other", "--read-write"}
253
254         if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
255                 arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
256         }
257
258         collectionPaths := []string{}
259         runner.Binds = nil
260         needCertMount := true
261
262         for bind, mnt := range runner.Container.Mounts {
263                 if bind == "stdout" {
264                         // Is it a "file" mount kind?
265                         if mnt.Kind != "file" {
266                                 return fmt.Errorf("Unsupported mount kind '%s' for stdout. Only 'file' is supported.", mnt.Kind)
267                         }
268
269                         // Does path start with OutputPath?
270                         prefix := runner.Container.OutputPath
271                         if !strings.HasSuffix(prefix, "/") {
272                                 prefix += "/"
273                         }
274                         if !strings.HasPrefix(mnt.Path, prefix) {
275                                 return fmt.Errorf("Stdout path does not start with OutputPath: %s, %s", mnt.Path, prefix)
276                         }
277                 }
278                 if bind == "/etc/arvados/ca-certificates.crt" {
279                         needCertMount = false
280                 }
281
282                 switch {
283                 case mnt.Kind == "collection":
284                         var src string
285                         if mnt.UUID != "" && mnt.PortableDataHash != "" {
286                                 return fmt.Errorf("Cannot specify both 'uuid' and 'portable_data_hash' for a collection mount")
287                         }
288                         if mnt.UUID != "" {
289                                 if mnt.Writable {
290                                         return fmt.Errorf("Writing to existing collections currently not permitted.")
291                                 }
292                                 pdhOnly = false
293                                 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.UUID)
294                         } else if mnt.PortableDataHash != "" {
295                                 if mnt.Writable {
296                                         return fmt.Errorf("Can never write to a collection specified by portable data hash")
297                                 }
298                                 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.PortableDataHash)
299                         } else {
300                                 src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
301                                 arvMountCmd = append(arvMountCmd, "--mount-tmp")
302                                 arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
303                                 tmpcount += 1
304                         }
305                         if mnt.Writable {
306                                 if bind == runner.Container.OutputPath {
307                                         runner.HostOutputDir = src
308                                 }
309                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", src, bind))
310                         } else {
311                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", src, bind))
312                         }
313                         collectionPaths = append(collectionPaths, src)
314
315                 case mnt.Kind == "tmp" && bind == runner.Container.OutputPath:
316                         runner.HostOutputDir, err = runner.MkTempDir("", "")
317                         if err != nil {
318                                 return fmt.Errorf("While creating mount temp dir: %v", err)
319                         }
320                         st, staterr := os.Stat(runner.HostOutputDir)
321                         if staterr != nil {
322                                 return fmt.Errorf("While Stat on temp dir: %v", staterr)
323                         }
324                         err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
325                         if staterr != nil {
326                                 return fmt.Errorf("While Chmod temp dir: %v", err)
327                         }
328                         runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
329                         runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
330
331                 case mnt.Kind == "tmp":
332                         runner.Binds = append(runner.Binds, bind)
333
334                 case mnt.Kind == "json":
335                         jsondata, err := json.Marshal(mnt.Content)
336                         if err != nil {
337                                 return fmt.Errorf("encoding json data: %v", err)
338                         }
339                         // Create a tempdir with a single file
340                         // (instead of just a tempfile): this way we
341                         // can ensure the file is world-readable
342                         // inside the container, without having to
343                         // make it world-readable on the docker host.
344                         tmpdir, err := runner.MkTempDir("", "")
345                         if err != nil {
346                                 return fmt.Errorf("creating temp dir: %v", err)
347                         }
348                         runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
349                         tmpfn := filepath.Join(tmpdir, "mountdata.json")
350                         err = ioutil.WriteFile(tmpfn, jsondata, 0644)
351                         if err != nil {
352                                 return fmt.Errorf("writing temp file: %v", err)
353                         }
354                         runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", tmpfn, bind))
355                 }
356         }
357
358         if runner.HostOutputDir == "" {
359                 return fmt.Errorf("Output path does not correspond to a writable mount point")
360         }
361
362         if wantAPI := runner.Container.RuntimeConstraints.API; needCertMount && wantAPI != nil && *wantAPI {
363                 for _, certfile := range arvadosclient.CertFiles {
364                         _, err := os.Stat(certfile)
365                         if err == nil {
366                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:/etc/arvados/ca-certificates.crt:ro", certfile))
367                                 break
368                         }
369                 }
370         }
371
372         if pdhOnly {
373                 arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
374         } else {
375                 arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
376         }
377         arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
378
379         token, err := runner.ContainerToken()
380         if err != nil {
381                 return fmt.Errorf("could not get container token: %s", err)
382         }
383
384         runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
385         if err != nil {
386                 return fmt.Errorf("While trying to start arv-mount: %v", err)
387         }
388
389         for _, p := range collectionPaths {
390                 _, err = os.Stat(p)
391                 if err != nil {
392                         return fmt.Errorf("While checking that input files exist: %v", err)
393                 }
394         }
395
396         return nil
397 }
398
399 func (runner *ContainerRunner) ProcessDockerAttach(containerReader io.Reader) {
400         // Handle docker log protocol
401         // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
402
403         header := make([]byte, 8)
404         for {
405                 _, readerr := io.ReadAtLeast(containerReader, header, 8)
406
407                 if readerr == nil {
408                         readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
409                         if header[0] == 1 {
410                                 // stdout
411                                 _, readerr = io.CopyN(runner.Stdout, containerReader, readsize)
412                         } else {
413                                 // stderr
414                                 _, readerr = io.CopyN(runner.Stderr, containerReader, readsize)
415                         }
416                 }
417
418                 if readerr != nil {
419                         if readerr != io.EOF {
420                                 runner.CrunchLog.Printf("While reading docker logs: %v", readerr)
421                         }
422
423                         closeerr := runner.Stdout.Close()
424                         if closeerr != nil {
425                                 runner.CrunchLog.Printf("While closing stdout logs: %v", closeerr)
426                         }
427
428                         closeerr = runner.Stderr.Close()
429                         if closeerr != nil {
430                                 runner.CrunchLog.Printf("While closing stderr logs: %v", closeerr)
431                         }
432
433                         if runner.statReporter != nil {
434                                 runner.statReporter.Stop()
435                                 closeerr = runner.statLogger.Close()
436                                 if closeerr != nil {
437                                         runner.CrunchLog.Printf("While closing crunchstat logs: %v", closeerr)
438                                 }
439                         }
440
441                         runner.loggingDone <- true
442                         close(runner.loggingDone)
443                         return
444                 }
445         }
446 }
447
448 func (runner *ContainerRunner) StartCrunchstat() {
449         runner.statLogger = NewThrottledLogger(runner.NewLogWriter("crunchstat"))
450         runner.statReporter = &crunchstat.Reporter{
451                 CID:          runner.ContainerID,
452                 Logger:       log.New(runner.statLogger, "", 0),
453                 CgroupParent: runner.expectCgroupParent,
454                 CgroupRoot:   runner.cgroupRoot,
455                 PollPeriod:   runner.statInterval,
456         }
457         runner.statReporter.Start()
458 }
459
460 // AttachLogs connects the docker container stdout and stderr logs to the
461 // Arvados logger which logs to Keep and the API server logs table.
462 func (runner *ContainerRunner) AttachStreams() (err error) {
463
464         runner.CrunchLog.Print("Attaching container streams")
465
466         var containerReader io.Reader
467         containerReader, err = runner.Docker.AttachContainer(runner.ContainerID,
468                 &dockerclient.AttachOptions{Stream: true, Stdout: true, Stderr: true})
469         if err != nil {
470                 return fmt.Errorf("While attaching container stdout/stderr streams: %v", err)
471         }
472
473         runner.loggingDone = make(chan bool)
474
475         if stdoutMnt, ok := runner.Container.Mounts["stdout"]; ok {
476                 stdoutPath := stdoutMnt.Path[len(runner.Container.OutputPath):]
477                 index := strings.LastIndex(stdoutPath, "/")
478                 if index > 0 {
479                         subdirs := stdoutPath[:index]
480                         if subdirs != "" {
481                                 st, err := os.Stat(runner.HostOutputDir)
482                                 if err != nil {
483                                         return fmt.Errorf("While Stat on temp dir: %v", err)
484                                 }
485                                 stdoutPath := path.Join(runner.HostOutputDir, subdirs)
486                                 err = os.MkdirAll(stdoutPath, st.Mode()|os.ModeSetgid|0777)
487                                 if err != nil {
488                                         return fmt.Errorf("While MkdirAll %q: %v", stdoutPath, err)
489                                 }
490                         }
491                 }
492                 stdoutFile, err := os.Create(path.Join(runner.HostOutputDir, stdoutPath))
493                 if err != nil {
494                         return fmt.Errorf("While creating stdout file: %v", err)
495                 }
496                 runner.Stdout = stdoutFile
497         } else {
498                 runner.Stdout = NewThrottledLogger(runner.NewLogWriter("stdout"))
499         }
500         runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
501
502         go runner.ProcessDockerAttach(containerReader)
503
504         return nil
505 }
506
507 // CreateContainer creates the docker container.
508 func (runner *ContainerRunner) CreateContainer() error {
509         runner.CrunchLog.Print("Creating Docker container")
510
511         runner.ContainerConfig.Cmd = runner.Container.Command
512         if runner.Container.Cwd != "." {
513                 runner.ContainerConfig.WorkingDir = runner.Container.Cwd
514         }
515
516         for k, v := range runner.Container.Environment {
517                 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env, k+"="+v)
518         }
519         if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
520                 tok, err := runner.ContainerToken()
521                 if err != nil {
522                         return err
523                 }
524                 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env,
525                         "ARVADOS_API_TOKEN="+tok,
526                         "ARVADOS_API_HOST="+os.Getenv("ARVADOS_API_HOST"),
527                         "ARVADOS_API_HOST_INSECURE="+os.Getenv("ARVADOS_API_HOST_INSECURE"),
528                 )
529                 runner.ContainerConfig.NetworkDisabled = false
530         } else {
531                 runner.ContainerConfig.NetworkDisabled = true
532         }
533
534         var err error
535         runner.ContainerID, err = runner.Docker.CreateContainer(&runner.ContainerConfig, "", nil)
536         if err != nil {
537                 return fmt.Errorf("While creating container: %v", err)
538         }
539
540         runner.HostConfig = dockerclient.HostConfig{
541                 Binds:        runner.Binds,
542                 CgroupParent: runner.setCgroupParent,
543                 LogConfig: dockerclient.LogConfig{
544                         Type: "none",
545                 },
546         }
547
548         return runner.AttachStreams()
549 }
550
551 // StartContainer starts the docker container created by CreateContainer.
552 func (runner *ContainerRunner) StartContainer() error {
553         runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
554         err := runner.Docker.StartContainer(runner.ContainerID, &runner.HostConfig)
555         if err != nil {
556                 return fmt.Errorf("could not start container: %v", err)
557         }
558         return nil
559 }
560
561 // WaitFinish waits for the container to terminate, capture the exit code, and
562 // close the stdout/stderr logging.
563 func (runner *ContainerRunner) WaitFinish() error {
564         runner.CrunchLog.Print("Waiting for container to finish")
565
566         result := runner.Docker.Wait(runner.ContainerID)
567         wr := <-result
568         if wr.Error != nil {
569                 return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
570         }
571         runner.ExitCode = &wr.ExitCode
572
573         // wait for stdout/stderr to complete
574         <-runner.loggingDone
575
576         return nil
577 }
578
579 // HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
580 func (runner *ContainerRunner) CaptureOutput() error {
581         if runner.finalState != "Complete" {
582                 return nil
583         }
584
585         if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
586                 // Output may have been set directly by the container, so
587                 // refresh the container record to check.
588                 err := runner.ArvClient.Get("containers", runner.Container.UUID,
589                         nil, &runner.Container)
590                 if err != nil {
591                         return err
592                 }
593                 if runner.Container.Output != "" {
594                         // Container output is already set.
595                         runner.OutputPDH = &runner.Container.Output
596                         return nil
597                 }
598         }
599
600         if runner.HostOutputDir == "" {
601                 return nil
602         }
603
604         _, err := os.Stat(runner.HostOutputDir)
605         if err != nil {
606                 return fmt.Errorf("While checking host output path: %v", err)
607         }
608
609         var manifestText string
610
611         collectionMetafile := fmt.Sprintf("%s/.arvados#collection", runner.HostOutputDir)
612         _, err = os.Stat(collectionMetafile)
613         if err != nil {
614                 // Regular directory
615                 cw := CollectionWriter{runner.Kc, nil, sync.Mutex{}}
616                 manifestText, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
617                 if err != nil {
618                         return fmt.Errorf("While uploading output files: %v", err)
619                 }
620         } else {
621                 // FUSE mount directory
622                 file, openerr := os.Open(collectionMetafile)
623                 if openerr != nil {
624                         return fmt.Errorf("While opening FUSE metafile: %v", err)
625                 }
626                 defer file.Close()
627
628                 var rec arvados.Collection
629                 err = json.NewDecoder(file).Decode(&rec)
630                 if err != nil {
631                         return fmt.Errorf("While reading FUSE metafile: %v", err)
632                 }
633                 manifestText = rec.ManifestText
634         }
635
636         var response arvados.Collection
637         err = runner.ArvClient.Create("collections",
638                 arvadosclient.Dict{
639                         "collection": arvadosclient.Dict{
640                                 "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
641                                 "name":          "output for " + runner.Container.UUID,
642                                 "manifest_text": manifestText}},
643                 &response)
644         if err != nil {
645                 return fmt.Errorf("While creating output collection: %v", err)
646         }
647         runner.OutputPDH = &response.PortableDataHash
648         return nil
649 }
650
651 func (runner *ContainerRunner) loadDiscoveryVars() {
652         tl, err := runner.ArvClient.Discovery("defaultTrashLifetime")
653         if err != nil {
654                 log.Fatalf("getting defaultTrashLifetime from discovery document: %s", err)
655         }
656         runner.trashLifetime = time.Duration(tl.(float64)) * time.Second
657 }
658
659 func (runner *ContainerRunner) CleanupDirs() {
660         if runner.ArvMount != nil {
661                 umount := exec.Command("fusermount", "-z", "-u", runner.ArvMountPoint)
662                 umnterr := umount.Run()
663                 if umnterr != nil {
664                         runner.CrunchLog.Printf("While running fusermount: %v", umnterr)
665                 }
666
667                 mnterr := <-runner.ArvMountExit
668                 if mnterr != nil {
669                         runner.CrunchLog.Printf("Arv-mount exit error: %v", mnterr)
670                 }
671         }
672
673         for _, tmpdir := range runner.CleanupTempDir {
674                 rmerr := os.RemoveAll(tmpdir)
675                 if rmerr != nil {
676                         runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", tmpdir, rmerr)
677                 }
678         }
679 }
680
681 // CommitLogs posts the collection containing the final container logs.
682 func (runner *ContainerRunner) CommitLogs() error {
683         runner.CrunchLog.Print(runner.finalState)
684         runner.CrunchLog.Close()
685
686         // Closing CrunchLog above allows it to be committed to Keep at this
687         // point, but re-open crunch log with ArvClient in case there are any
688         // other further (such as failing to write the log to Keep!) while
689         // shutting down
690         runner.CrunchLog = NewThrottledLogger(&ArvLogWriter{runner.ArvClient, runner.Container.UUID,
691                 "crunch-run", nil})
692
693         if runner.LogsPDH != nil {
694                 // If we have already assigned something to LogsPDH,
695                 // we must be closing the re-opened log, which won't
696                 // end up getting attached to the container record and
697                 // therefore doesn't need to be saved as a collection
698                 // -- it exists only to send logs to other channels.
699                 return nil
700         }
701
702         mt, err := runner.LogCollection.ManifestText()
703         if err != nil {
704                 return fmt.Errorf("While creating log manifest: %v", err)
705         }
706
707         var response arvados.Collection
708         err = runner.ArvClient.Create("collections",
709                 arvadosclient.Dict{
710                         "collection": arvadosclient.Dict{
711                                 "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
712                                 "name":          "logs for " + runner.Container.UUID,
713                                 "manifest_text": mt}},
714                 &response)
715         if err != nil {
716                 return fmt.Errorf("While creating log collection: %v", err)
717         }
718         runner.LogsPDH = &response.PortableDataHash
719         return nil
720 }
721
722 // UpdateContainerRunning updates the container state to "Running"
723 func (runner *ContainerRunner) UpdateContainerRunning() error {
724         runner.CancelLock.Lock()
725         defer runner.CancelLock.Unlock()
726         if runner.Cancelled {
727                 return ErrCancelled
728         }
729         return runner.ArvClient.Update("containers", runner.Container.UUID,
730                 arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
731 }
732
733 // ContainerToken returns the api_token the container (and any
734 // arv-mount processes) are allowed to use.
735 func (runner *ContainerRunner) ContainerToken() (string, error) {
736         if runner.token != "" {
737                 return runner.token, nil
738         }
739
740         var auth arvados.APIClientAuthorization
741         err := runner.ArvClient.Call("GET", "containers", runner.Container.UUID, "auth", nil, &auth)
742         if err != nil {
743                 return "", err
744         }
745         runner.token = auth.APIToken
746         return runner.token, nil
747 }
748
749 // UpdateContainerComplete updates the container record state on API
750 // server to "Complete" or "Cancelled"
751 func (runner *ContainerRunner) UpdateContainerFinal() error {
752         update := arvadosclient.Dict{}
753         update["state"] = runner.finalState
754         if runner.LogsPDH != nil {
755                 update["log"] = *runner.LogsPDH
756         }
757         if runner.finalState == "Complete" {
758                 if runner.ExitCode != nil {
759                         update["exit_code"] = *runner.ExitCode
760                 }
761                 if runner.OutputPDH != nil {
762                         update["output"] = *runner.OutputPDH
763                 }
764         }
765         return runner.ArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil)
766 }
767
768 // IsCancelled returns the value of Cancelled, with goroutine safety.
769 func (runner *ContainerRunner) IsCancelled() bool {
770         runner.CancelLock.Lock()
771         defer runner.CancelLock.Unlock()
772         return runner.Cancelled
773 }
774
775 // NewArvLogWriter creates an ArvLogWriter
776 func (runner *ContainerRunner) NewArvLogWriter(name string) io.WriteCloser {
777         return &ArvLogWriter{runner.ArvClient, runner.Container.UUID, name, runner.LogCollection.Open(name + ".txt")}
778 }
779
780 // Run the full container lifecycle.
781 func (runner *ContainerRunner) Run() (err error) {
782         runner.CrunchLog.Printf("Executing container '%s'", runner.Container.UUID)
783
784         hostname, hosterr := os.Hostname()
785         if hosterr != nil {
786                 runner.CrunchLog.Printf("Error getting hostname '%v'", hosterr)
787         } else {
788                 runner.CrunchLog.Printf("Executing on host '%s'", hostname)
789         }
790
791         // Clean up temporary directories _after_ finalizing
792         // everything (if we've made any by then)
793         defer runner.CleanupDirs()
794
795         runner.finalState = "Queued"
796
797         defer func() {
798                 // checkErr prints e (unless it's nil) and sets err to
799                 // e (unless err is already non-nil). Thus, if err
800                 // hasn't already been assigned when Run() returns,
801                 // this cleanup func will cause Run() to return the
802                 // first non-nil error that is passed to checkErr().
803                 checkErr := func(e error) {
804                         if e == nil {
805                                 return
806                         }
807                         runner.CrunchLog.Print(e)
808                         if err == nil {
809                                 err = e
810                         }
811                 }
812
813                 // Log the error encountered in Run(), if any
814                 checkErr(err)
815
816                 if runner.finalState == "Queued" {
817                         runner.CrunchLog.Close()
818                         runner.UpdateContainerFinal()
819                         return
820                 }
821
822                 if runner.IsCancelled() {
823                         runner.finalState = "Cancelled"
824                         // but don't return yet -- we still want to
825                         // capture partial output and write logs
826                 }
827
828                 checkErr(runner.CaptureOutput())
829                 checkErr(runner.CommitLogs())
830                 checkErr(runner.UpdateContainerFinal())
831
832                 // The real log is already closed, but then we opened
833                 // a new one in case we needed to log anything while
834                 // finalizing.
835                 runner.CrunchLog.Close()
836         }()
837
838         err = runner.ArvClient.Get("containers", runner.Container.UUID, nil, &runner.Container)
839         if err != nil {
840                 err = fmt.Errorf("While getting container record: %v", err)
841                 return
842         }
843
844         // setup signal handling
845         runner.SetupSignals()
846
847         // check for and/or load image
848         err = runner.LoadImage()
849         if err != nil {
850                 runner.finalState = "Cancelled"
851                 err = fmt.Errorf("While loading container image: %v", err)
852                 return
853         }
854
855         // set up FUSE mount and binds
856         err = runner.SetupMounts()
857         if err != nil {
858                 runner.finalState = "Cancelled"
859                 err = fmt.Errorf("While setting up mounts: %v", err)
860                 return
861         }
862
863         err = runner.CreateContainer()
864         if err != nil {
865                 return
866         }
867
868         runner.StartCrunchstat()
869
870         if runner.IsCancelled() {
871                 return
872         }
873
874         err = runner.UpdateContainerRunning()
875         if err != nil {
876                 return
877         }
878         runner.finalState = "Cancelled"
879
880         err = runner.StartContainer()
881         if err != nil {
882                 return
883         }
884
885         err = runner.WaitFinish()
886         if err == nil {
887                 runner.finalState = "Complete"
888         }
889         return
890 }
891
892 // NewContainerRunner creates a new container runner.
893 func NewContainerRunner(api IArvadosClient,
894         kc IKeepClient,
895         docker ThinDockerClient,
896         containerUUID string) *ContainerRunner {
897
898         cr := &ContainerRunner{ArvClient: api, Kc: kc, Docker: docker}
899         cr.NewLogWriter = cr.NewArvLogWriter
900         cr.RunArvMount = cr.ArvMountCmd
901         cr.MkTempDir = ioutil.TempDir
902         cr.LogCollection = &CollectionWriter{kc, nil, sync.Mutex{}}
903         cr.Container.UUID = containerUUID
904         cr.CrunchLog = NewThrottledLogger(cr.NewLogWriter("crunch-run"))
905         cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
906         cr.loadDiscoveryVars()
907         return cr
908 }
909
910 func main() {
911         statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
912         cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
913         cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
914         cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
915         caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
916         flag.Parse()
917
918         containerId := flag.Arg(0)
919
920         if *caCertsPath != "" {
921                 arvadosclient.CertFiles = []string{*caCertsPath}
922         }
923
924         api, err := arvadosclient.MakeArvadosClient()
925         if err != nil {
926                 log.Fatalf("%s: %v", containerId, err)
927         }
928         api.Retries = 8
929
930         var kc *keepclient.KeepClient
931         kc, err = keepclient.MakeKeepClient(api)
932         if err != nil {
933                 log.Fatalf("%s: %v", containerId, err)
934         }
935         kc.Retries = 4
936
937         var docker *dockerclient.DockerClient
938         docker, err = dockerclient.NewDockerClient("unix:///var/run/docker.sock", nil)
939         if err != nil {
940                 log.Fatalf("%s: %v", containerId, err)
941         }
942
943         cr := NewContainerRunner(api, kc, docker, containerId)
944         cr.statInterval = *statInterval
945         cr.cgroupRoot = *cgroupRoot
946         cr.expectCgroupParent = *cgroupParent
947         if *cgroupParentSubsystem != "" {
948                 p := findCgroup(*cgroupParentSubsystem)
949                 cr.setCgroupParent = p
950                 cr.expectCgroupParent = p
951         }
952
953         err = cr.Run()
954         if err != nil {
955                 log.Fatalf("%s: %v", containerId, err)
956         }
957
958 }