Merge branch '9397-go-manifest' into origin-9397-prepopulate-output-directory
[arvados.git] / services / crunch-run / crunchrun.go
1 package main
2
3 import (
4         "encoding/json"
5         "errors"
6         "flag"
7         "fmt"
8         "git.curoverse.com/arvados.git/lib/crunchstat"
9         "git.curoverse.com/arvados.git/sdk/go/arvados"
10         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
11         "git.curoverse.com/arvados.git/sdk/go/keepclient"
12         "git.curoverse.com/arvados.git/sdk/go/manifest"
13         "github.com/curoverse/dockerclient"
14         "io"
15         "io/ioutil"
16         "log"
17         "os"
18         "os/exec"
19         "os/signal"
20         "path"
21         "path/filepath"
22         "sort"
23         "strings"
24         "sync"
25         "syscall"
26         "time"
27 )
28
29 // IArvadosClient is the minimal Arvados API methods used by crunch-run.
30 type IArvadosClient interface {
31         Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error
32         Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
33         Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error
34         Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error
35         Discovery(key string) (interface{}, error)
36 }
37
38 // ErrCancelled is the error returned when the container is cancelled.
39 var ErrCancelled = errors.New("Cancelled")
40
41 // IKeepClient is the minimal Keep API methods used by crunch-run.
42 type IKeepClient interface {
43         PutHB(hash string, buf []byte) (string, int, error)
44         ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error)
45 }
46
47 // NewLogWriter is a factory function to create a new log writer.
48 type NewLogWriter func(name string) io.WriteCloser
49
50 type RunArvMount func(args []string, tok string) (*exec.Cmd, error)
51
52 type MkTempDir func(string, string) (string, error)
53
54 // ThinDockerClient is the minimal Docker client interface used by crunch-run.
55 type ThinDockerClient interface {
56         StopContainer(id string, timeout int) error
57         InspectImage(id string) (*dockerclient.ImageInfo, error)
58         LoadImage(reader io.Reader) error
59         CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error)
60         StartContainer(id string, config *dockerclient.HostConfig) error
61         AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error)
62         Wait(id string) <-chan dockerclient.WaitResult
63         RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error)
64 }
65
66 // ContainerRunner is the main stateful struct used for a single execution of a
67 // container.
68 type ContainerRunner struct {
69         Docker    ThinDockerClient
70         ArvClient IArvadosClient
71         Kc        IKeepClient
72         arvados.Container
73         dockerclient.ContainerConfig
74         dockerclient.HostConfig
75         token       string
76         ContainerID string
77         ExitCode    *int
78         NewLogWriter
79         loggingDone   chan bool
80         CrunchLog     *ThrottledLogger
81         Stdout        io.WriteCloser
82         Stderr        *ThrottledLogger
83         LogCollection *CollectionWriter
84         LogsPDH       *string
85         RunArvMount
86         MkTempDir
87         ArvMount       *exec.Cmd
88         ArvMountPoint  string
89         HostOutputDir  string
90         CleanupTempDir []string
91         Binds          []string
92         OutputPDH      *string
93         CancelLock     sync.Mutex
94         Cancelled      bool
95         SigChan        chan os.Signal
96         ArvMountExit   chan error
97         finalState     string
98         trashLifetime  time.Duration
99
100         statLogger   io.WriteCloser
101         statReporter *crunchstat.Reporter
102         statInterval time.Duration
103         cgroupRoot   string
104         // What we expect the container's cgroup parent to be.
105         expectCgroupParent string
106         // What we tell docker to use as the container's cgroup
107         // parent. Note: Ideally we would use the same field for both
108         // expectCgroupParent and setCgroupParent, and just make it
109         // default to "docker". However, when using docker < 1.10 with
110         // systemd, specifying a non-empty cgroup parent (even the
111         // default value "docker") hits a docker bug
112         // (https://github.com/docker/docker/issues/17126). Using two
113         // separate fields makes it possible to use the "expect cgroup
114         // parent to be X" feature even on sites where the "specify
115         // cgroup parent" feature breaks.
116         setCgroupParent string
117 }
118
119 // SetupSignals sets up signal handling to gracefully terminate the underlying
120 // Docker container and update state when receiving a TERM, INT or QUIT signal.
121 func (runner *ContainerRunner) SetupSignals() {
122         runner.SigChan = make(chan os.Signal, 1)
123         signal.Notify(runner.SigChan, syscall.SIGTERM)
124         signal.Notify(runner.SigChan, syscall.SIGINT)
125         signal.Notify(runner.SigChan, syscall.SIGQUIT)
126
127         go func(sig <-chan os.Signal) {
128                 for range sig {
129                         if !runner.Cancelled {
130                                 runner.CancelLock.Lock()
131                                 runner.Cancelled = true
132                                 if runner.ContainerID != "" {
133                                         runner.Docker.StopContainer(runner.ContainerID, 10)
134                                 }
135                                 runner.CancelLock.Unlock()
136                         }
137                 }
138         }(runner.SigChan)
139 }
140
141 // LoadImage determines the docker image id from the container record and
142 // checks if it is available in the local Docker image store.  If not, it loads
143 // the image from Keep.
144 func (runner *ContainerRunner) LoadImage() (err error) {
145
146         runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.Container.ContainerImage)
147
148         var collection arvados.Collection
149         err = runner.ArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
150         if err != nil {
151                 return fmt.Errorf("While getting container image collection: %v", err)
152         }
153         manifest := manifest.Manifest{Text: collection.ManifestText}
154         var img, imageID string
155         for ms := range manifest.StreamIter() {
156                 img = ms.FileStreamSegments[0].Name
157                 if !strings.HasSuffix(img, ".tar") {
158                         return fmt.Errorf("First file in the container image collection does not end in .tar")
159                 }
160                 imageID = img[:len(img)-4]
161         }
162
163         runner.CrunchLog.Printf("Using Docker image id '%s'", imageID)
164
165         _, err = runner.Docker.InspectImage(imageID)
166         if err != nil {
167                 runner.CrunchLog.Print("Loading Docker image from keep")
168
169                 var readCloser io.ReadCloser
170                 readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
171                 if err != nil {
172                         return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
173                 }
174
175                 err = runner.Docker.LoadImage(readCloser)
176                 if err != nil {
177                         return fmt.Errorf("While loading container image into Docker: %v", err)
178                 }
179         } else {
180                 runner.CrunchLog.Print("Docker image is available")
181         }
182
183         runner.ContainerConfig.Image = imageID
184
185         return nil
186 }
187
188 func (runner *ContainerRunner) ArvMountCmd(arvMountCmd []string, token string) (c *exec.Cmd, err error) {
189         c = exec.Command("arv-mount", arvMountCmd...)
190
191         // Copy our environment, but override ARVADOS_API_TOKEN with
192         // the container auth token.
193         c.Env = nil
194         for _, s := range os.Environ() {
195                 if !strings.HasPrefix(s, "ARVADOS_API_TOKEN=") {
196                         c.Env = append(c.Env, s)
197                 }
198         }
199         c.Env = append(c.Env, "ARVADOS_API_TOKEN="+token)
200
201         nt := NewThrottledLogger(runner.NewLogWriter("arv-mount"))
202         c.Stdout = nt
203         c.Stderr = nt
204
205         err = c.Start()
206         if err != nil {
207                 return nil, err
208         }
209
210         statReadme := make(chan bool)
211         runner.ArvMountExit = make(chan error)
212
213         keepStatting := true
214         go func() {
215                 for keepStatting {
216                         time.Sleep(100 * time.Millisecond)
217                         _, err = os.Stat(fmt.Sprintf("%s/by_id/README", runner.ArvMountPoint))
218                         if err == nil {
219                                 keepStatting = false
220                                 statReadme <- true
221                         }
222                 }
223                 close(statReadme)
224         }()
225
226         go func() {
227                 runner.ArvMountExit <- c.Wait()
228                 close(runner.ArvMountExit)
229         }()
230
231         select {
232         case <-statReadme:
233                 break
234         case err := <-runner.ArvMountExit:
235                 runner.ArvMount = nil
236                 keepStatting = false
237                 return nil, err
238         }
239
240         return c, nil
241 }
242
243 func (runner *ContainerRunner) SetupArvMountPoint(prefix string) (err error) {
244         if runner.ArvMountPoint == "" {
245                 runner.ArvMountPoint, err = runner.MkTempDir("", prefix)
246         }
247         return
248 }
249
250 func (runner *ContainerRunner) SetupMounts() (err error) {
251         err = runner.SetupArvMountPoint("keep")
252         if err != nil {
253                 return fmt.Errorf("While creating keep mount temp dir: %v", err)
254         }
255
256         runner.CleanupTempDir = append(runner.CleanupTempDir, runner.ArvMountPoint)
257
258         pdhOnly := true
259         tmpcount := 0
260         arvMountCmd := []string{"--foreground", "--allow-other", "--read-write"}
261
262         if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
263                 arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
264         }
265
266         collectionPaths := []string{}
267         runner.Binds = nil
268         needCertMount := true
269
270         var binds []string
271         for bind, _ := range runner.Container.Mounts {
272                 binds = append(binds, bind)
273         }
274         sort.Strings(binds)
275
276         for _, bind := range binds {
277                 mnt := runner.Container.Mounts[bind]
278                 if bind == "stdout" {
279                         // Is it a "file" mount kind?
280                         if mnt.Kind != "file" {
281                                 return fmt.Errorf("Unsupported mount kind '%s' for stdout. Only 'file' is supported.", mnt.Kind)
282                         }
283
284                         // Does path start with OutputPath?
285                         prefix := runner.Container.OutputPath
286                         if !strings.HasSuffix(prefix, "/") {
287                                 prefix += "/"
288                         }
289                         if !strings.HasPrefix(mnt.Path, prefix) {
290                                 return fmt.Errorf("Stdout path does not start with OutputPath: %s, %s", mnt.Path, prefix)
291                         }
292                 }
293
294                 if bind == "/etc/arvados/ca-certificates.crt" {
295                         needCertMount = false
296                 }
297
298                 if strings.HasPrefix(bind, runner.Container.OutputPath+"/") && bind != runner.Container.OutputPath+"/" {
299                         if mnt.Kind != "collection" {
300                                 return fmt.Errorf("Only mount points of kind 'collection' are supported underneath the output_path: %v", bind)
301                         }
302                 }
303
304                 switch {
305                 case mnt.Kind == "collection":
306                         var src string
307                         if mnt.UUID != "" && mnt.PortableDataHash != "" {
308                                 return fmt.Errorf("Cannot specify both 'uuid' and 'portable_data_hash' for a collection mount")
309                         }
310                         if mnt.UUID != "" {
311                                 if mnt.Writable {
312                                         return fmt.Errorf("Writing to existing collections currently not permitted.")
313                                 }
314                                 pdhOnly = false
315                                 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.UUID)
316                         } else if mnt.PortableDataHash != "" {
317                                 if mnt.Writable {
318                                         return fmt.Errorf("Can never write to a collection specified by portable data hash")
319                                 }
320                                 src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.PortableDataHash)
321                         } else {
322                                 src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
323                                 arvMountCmd = append(arvMountCmd, "--mount-tmp")
324                                 arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
325                                 tmpcount += 1
326                         }
327                         if mnt.Writable {
328                                 if bind == runner.Container.OutputPath {
329                                         runner.HostOutputDir = src
330                                 } else if strings.HasPrefix(bind, runner.Container.OutputPath+"/") {
331                                         return fmt.Errorf("Writable mount points are not permitted underneath the output_path: %v", bind)
332                                 }
333                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", src, bind))
334                         } else {
335                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", src, bind))
336                         }
337                         collectionPaths = append(collectionPaths, src)
338
339                 case mnt.Kind == "tmp" && bind == runner.Container.OutputPath:
340                         runner.HostOutputDir, err = runner.MkTempDir("", "")
341                         if err != nil {
342                                 return fmt.Errorf("While creating mount temp dir: %v", err)
343                         }
344                         st, staterr := os.Stat(runner.HostOutputDir)
345                         if staterr != nil {
346                                 return fmt.Errorf("While Stat on temp dir: %v", staterr)
347                         }
348                         err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
349                         if staterr != nil {
350                                 return fmt.Errorf("While Chmod temp dir: %v", err)
351                         }
352                         runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
353                         runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
354
355                 case mnt.Kind == "tmp":
356                         runner.Binds = append(runner.Binds, bind)
357
358                 case mnt.Kind == "json":
359                         jsondata, err := json.Marshal(mnt.Content)
360                         if err != nil {
361                                 return fmt.Errorf("encoding json data: %v", err)
362                         }
363                         // Create a tempdir with a single file
364                         // (instead of just a tempfile): this way we
365                         // can ensure the file is world-readable
366                         // inside the container, without having to
367                         // make it world-readable on the docker host.
368                         tmpdir, err := runner.MkTempDir("", "")
369                         if err != nil {
370                                 return fmt.Errorf("creating temp dir: %v", err)
371                         }
372                         runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
373                         tmpfn := filepath.Join(tmpdir, "mountdata.json")
374                         err = ioutil.WriteFile(tmpfn, jsondata, 0644)
375                         if err != nil {
376                                 return fmt.Errorf("writing temp file: %v", err)
377                         }
378                         runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", tmpfn, bind))
379                 }
380         }
381
382         if runner.HostOutputDir == "" {
383                 return fmt.Errorf("Output path does not correspond to a writable mount point")
384         }
385
386         if wantAPI := runner.Container.RuntimeConstraints.API; needCertMount && wantAPI != nil && *wantAPI {
387                 for _, certfile := range arvadosclient.CertFiles {
388                         _, err := os.Stat(certfile)
389                         if err == nil {
390                                 runner.Binds = append(runner.Binds, fmt.Sprintf("%s:/etc/arvados/ca-certificates.crt:ro", certfile))
391                                 break
392                         }
393                 }
394         }
395
396         if pdhOnly {
397                 arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
398         } else {
399                 arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
400         }
401         arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
402
403         token, err := runner.ContainerToken()
404         if err != nil {
405                 return fmt.Errorf("could not get container token: %s", err)
406         }
407
408         runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
409         if err != nil {
410                 return fmt.Errorf("While trying to start arv-mount: %v", err)
411         }
412
413         for _, p := range collectionPaths {
414                 _, err = os.Stat(p)
415                 if err != nil {
416                         return fmt.Errorf("While checking that input files exist: %v", err)
417                 }
418         }
419
420         return nil
421 }
422
423 func (runner *ContainerRunner) ProcessDockerAttach(containerReader io.Reader) {
424         // Handle docker log protocol
425         // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
426
427         header := make([]byte, 8)
428         for {
429                 _, readerr := io.ReadAtLeast(containerReader, header, 8)
430
431                 if readerr == nil {
432                         readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
433                         if header[0] == 1 {
434                                 // stdout
435                                 _, readerr = io.CopyN(runner.Stdout, containerReader, readsize)
436                         } else {
437                                 // stderr
438                                 _, readerr = io.CopyN(runner.Stderr, containerReader, readsize)
439                         }
440                 }
441
442                 if readerr != nil {
443                         if readerr != io.EOF {
444                                 runner.CrunchLog.Printf("While reading docker logs: %v", readerr)
445                         }
446
447                         closeerr := runner.Stdout.Close()
448                         if closeerr != nil {
449                                 runner.CrunchLog.Printf("While closing stdout logs: %v", closeerr)
450                         }
451
452                         closeerr = runner.Stderr.Close()
453                         if closeerr != nil {
454                                 runner.CrunchLog.Printf("While closing stderr logs: %v", closeerr)
455                         }
456
457                         if runner.statReporter != nil {
458                                 runner.statReporter.Stop()
459                                 closeerr = runner.statLogger.Close()
460                                 if closeerr != nil {
461                                         runner.CrunchLog.Printf("While closing crunchstat logs: %v", closeerr)
462                                 }
463                         }
464
465                         runner.loggingDone <- true
466                         close(runner.loggingDone)
467                         return
468                 }
469         }
470 }
471
472 func (runner *ContainerRunner) StartCrunchstat() {
473         runner.statLogger = NewThrottledLogger(runner.NewLogWriter("crunchstat"))
474         runner.statReporter = &crunchstat.Reporter{
475                 CID:          runner.ContainerID,
476                 Logger:       log.New(runner.statLogger, "", 0),
477                 CgroupParent: runner.expectCgroupParent,
478                 CgroupRoot:   runner.cgroupRoot,
479                 PollPeriod:   runner.statInterval,
480         }
481         runner.statReporter.Start()
482 }
483
484 // AttachLogs connects the docker container stdout and stderr logs to the
485 // Arvados logger which logs to Keep and the API server logs table.
486 func (runner *ContainerRunner) AttachStreams() (err error) {
487
488         runner.CrunchLog.Print("Attaching container streams")
489
490         var containerReader io.Reader
491         containerReader, err = runner.Docker.AttachContainer(runner.ContainerID,
492                 &dockerclient.AttachOptions{Stream: true, Stdout: true, Stderr: true})
493         if err != nil {
494                 return fmt.Errorf("While attaching container stdout/stderr streams: %v", err)
495         }
496
497         runner.loggingDone = make(chan bool)
498
499         if stdoutMnt, ok := runner.Container.Mounts["stdout"]; ok {
500                 stdoutPath := stdoutMnt.Path[len(runner.Container.OutputPath):]
501                 index := strings.LastIndex(stdoutPath, "/")
502                 if index > 0 {
503                         subdirs := stdoutPath[:index]
504                         if subdirs != "" {
505                                 st, err := os.Stat(runner.HostOutputDir)
506                                 if err != nil {
507                                         return fmt.Errorf("While Stat on temp dir: %v", err)
508                                 }
509                                 stdoutPath := path.Join(runner.HostOutputDir, subdirs)
510                                 err = os.MkdirAll(stdoutPath, st.Mode()|os.ModeSetgid|0777)
511                                 if err != nil {
512                                         return fmt.Errorf("While MkdirAll %q: %v", stdoutPath, err)
513                                 }
514                         }
515                 }
516                 stdoutFile, err := os.Create(path.Join(runner.HostOutputDir, stdoutPath))
517                 if err != nil {
518                         return fmt.Errorf("While creating stdout file: %v", err)
519                 }
520                 runner.Stdout = stdoutFile
521         } else {
522                 runner.Stdout = NewThrottledLogger(runner.NewLogWriter("stdout"))
523         }
524         runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
525
526         go runner.ProcessDockerAttach(containerReader)
527
528         return nil
529 }
530
531 // CreateContainer creates the docker container.
532 func (runner *ContainerRunner) CreateContainer() error {
533         runner.CrunchLog.Print("Creating Docker container")
534
535         runner.ContainerConfig.Cmd = runner.Container.Command
536         if runner.Container.Cwd != "." {
537                 runner.ContainerConfig.WorkingDir = runner.Container.Cwd
538         }
539
540         for k, v := range runner.Container.Environment {
541                 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env, k+"="+v)
542         }
543         if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
544                 tok, err := runner.ContainerToken()
545                 if err != nil {
546                         return err
547                 }
548                 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env,
549                         "ARVADOS_API_TOKEN="+tok,
550                         "ARVADOS_API_HOST="+os.Getenv("ARVADOS_API_HOST"),
551                         "ARVADOS_API_HOST_INSECURE="+os.Getenv("ARVADOS_API_HOST_INSECURE"),
552                 )
553                 runner.ContainerConfig.NetworkDisabled = false
554         } else {
555                 runner.ContainerConfig.NetworkDisabled = true
556         }
557
558         var err error
559         runner.ContainerID, err = runner.Docker.CreateContainer(&runner.ContainerConfig, "", nil)
560         if err != nil {
561                 return fmt.Errorf("While creating container: %v", err)
562         }
563
564         runner.HostConfig = dockerclient.HostConfig{
565                 Binds:        runner.Binds,
566                 CgroupParent: runner.setCgroupParent,
567                 LogConfig: dockerclient.LogConfig{
568                         Type: "none",
569                 },
570         }
571
572         return runner.AttachStreams()
573 }
574
575 // StartContainer starts the docker container created by CreateContainer.
576 func (runner *ContainerRunner) StartContainer() error {
577         runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
578         err := runner.Docker.StartContainer(runner.ContainerID, &runner.HostConfig)
579         if err != nil {
580                 return fmt.Errorf("could not start container: %v", err)
581         }
582         return nil
583 }
584
585 // WaitFinish waits for the container to terminate, capture the exit code, and
586 // close the stdout/stderr logging.
587 func (runner *ContainerRunner) WaitFinish() error {
588         runner.CrunchLog.Print("Waiting for container to finish")
589
590         result := runner.Docker.Wait(runner.ContainerID)
591         wr := <-result
592         if wr.Error != nil {
593                 return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
594         }
595         runner.ExitCode = &wr.ExitCode
596
597         // wait for stdout/stderr to complete
598         <-runner.loggingDone
599
600         return nil
601 }
602
603 // HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
604 func (runner *ContainerRunner) CaptureOutput() error {
605         if runner.finalState != "Complete" {
606                 return nil
607         }
608
609         if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
610                 // Output may have been set directly by the container, so
611                 // refresh the container record to check.
612                 err := runner.ArvClient.Get("containers", runner.Container.UUID,
613                         nil, &runner.Container)
614                 if err != nil {
615                         return err
616                 }
617                 if runner.Container.Output != "" {
618                         // Container output is already set.
619                         runner.OutputPDH = &runner.Container.Output
620                         return nil
621                 }
622         }
623
624         if runner.HostOutputDir == "" {
625                 return nil
626         }
627
628         _, err := os.Stat(runner.HostOutputDir)
629         if err != nil {
630                 return fmt.Errorf("While checking host output path: %v", err)
631         }
632
633         var manifestText string
634
635         collectionMetafile := fmt.Sprintf("%s/.arvados#collection", runner.HostOutputDir)
636         _, err = os.Stat(collectionMetafile)
637         if err != nil {
638                 // Regular directory
639                 cw := CollectionWriter{runner.Kc, nil, sync.Mutex{}}
640                 manifestText, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
641                 if err != nil {
642                         return fmt.Errorf("While uploading output files: %v", err)
643                 }
644         } else {
645                 // FUSE mount directory
646                 file, openerr := os.Open(collectionMetafile)
647                 if openerr != nil {
648                         return fmt.Errorf("While opening FUSE metafile: %v", err)
649                 }
650                 defer file.Close()
651
652                 var rec arvados.Collection
653                 err = json.NewDecoder(file).Decode(&rec)
654                 if err != nil {
655                         return fmt.Errorf("While reading FUSE metafile: %v", err)
656                 }
657                 manifestText = rec.ManifestText
658         }
659
660         // Pre-populate output from the configured mount points
661         var binds []string
662         for bind, _ := range runner.Container.Mounts {
663                 binds = append(binds, bind)
664         }
665         sort.Strings(binds)
666
667         for _, bind := range binds {
668                 mnt := runner.Container.Mounts[bind]
669
670                 bindSuffix := strings.TrimPrefix(bind, runner.Container.OutputPath)
671
672                 if bindSuffix == bind || len(bindSuffix) <= 0 {
673                         // either does not start with OutputPath or is OutputPath itself
674                         continue
675                 }
676
677                 if strings.HasPrefix(bindSuffix, "/") == false {
678                         bindSuffix = "/" + bindSuffix
679                 }
680
681                 if mnt.ExcludeFromOutput == true {
682                         continue
683                 }
684
685                 idx := strings.Index(mnt.PortableDataHash, "/")
686                 if idx > 0 {
687                         mnt.Path = mnt.PortableDataHash[idx:]
688                         mnt.PortableDataHash = mnt.PortableDataHash[0:idx]
689                 }
690
691                 // append to manifest_text
692                 m, err := runner.getCollectionManifestForPath(mnt, bindSuffix)
693                 if err != nil {
694                         return err
695                 }
696
697                 manifestText = manifestText + m
698         }
699
700         // Save output
701         var response arvados.Collection
702         err = runner.ArvClient.Create("collections",
703                 arvadosclient.Dict{
704                         "collection": arvadosclient.Dict{
705                                 "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
706                                 "name":          "output for " + runner.Container.UUID,
707                                 "manifest_text": manifestText}},
708                 &response)
709         if err != nil {
710                 return fmt.Errorf("While creating output collection: %v", err)
711         }
712         runner.OutputPDH = &response.PortableDataHash
713         return nil
714 }
715
716 var outputCollections = make(map[string]arvados.Collection)
717
718 // Fetch the collection for the mnt.PortableDataHash
719 // Return the manifest_text fragment corresponding to the specified mnt.Path
720 //  after making any required updates.
721 //  Ex:
722 //    If mnt.Path is not specified,
723 //      return the entire manifest_text after replacing any "." with bindSuffix
724 //    If mnt.Path corresponds to one stream,
725 //      return the manifest_text for that stream after replacing that stream name with bindSuffix
726 //    Otherwise, check if a filename in any one stream is being sought. Return the manifest_text
727 //      for that stream after replacing stream name with bindSuffix minus the last word
728 //      and the file name with last word of the bindSuffix
729 //  Allowed path examples:
730 //    "path":"/"
731 //    "path":"/subdir1"
732 //    "path":"/subdir1/subdir2"
733 //    "path":"/subdir/filename" etc
734 func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, bindSuffix string) (string, error) {
735         collection := outputCollections[mnt.PortableDataHash]
736         if collection.PortableDataHash == "" {
737                 err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
738                 if err != nil {
739                         return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
740                 }
741                 outputCollections[mnt.PortableDataHash] = collection
742         }
743
744         if collection.ManifestText == "" {
745                 runner.CrunchLog.Printf("No manifest text for collection %v", collection.PortableDataHash)
746                 return "", nil
747         }
748
749         manifest := manifest.Manifest{Text: collection.ManifestText}
750         manifestText := manifest.NormalizedManifestForPath(mnt.Path)
751
752         if manifestText == "" {
753                 // It could be denormalized manifest
754                 mntPath := strings.Trim(mnt.Path, "/")
755                 manifestText = strings.Replace(collection.ManifestText, "./", "."+bindSuffix+"/", -1)
756                 manifestText = strings.Replace(manifestText, ". ", "."+bindSuffix+" ", -1)
757                 wanted := ""
758                 for _, stream := range strings.Split(manifestText, "\n") {
759                         if strings.Index(stream, mntPath) == -1 {
760                                 continue
761                         }
762
763                         for _, token := range strings.Split(manifestText, " ") {
764                                 if strings.Index(token, ":") == -1 {
765                                         wanted += " " + token
766                                 } else if strings.Index(token, ":"+mntPath) >= 0 {
767                                         wanted += " " + token + "\n"
768                                         break
769                                 }
770                         }
771                 }
772                 return wanted, nil
773         }
774
775         if mnt.Path == "" || mnt.Path == "/" {
776                 // no path specified; return the entire manifest text after making adjustments
777                 manifestText = strings.Replace(manifestText, "./", "."+bindSuffix+"/", -1)
778                 manifestText = strings.Replace(manifestText, ". ", "."+bindSuffix+" ", -1)
779         } else {
780                 // either a single stream or file from a stream is being sought
781                 bindIdx := strings.LastIndex(bindSuffix, "/")
782                 var bindSubdir, bindFileName string
783                 if bindIdx >= 0 {
784                         bindSubdir = "." + bindSuffix[0:bindIdx]
785                         bindFileName = bindSuffix[bindIdx+1:]
786                 }
787                 mntPath := mnt.Path
788                 if strings.HasSuffix(mntPath, "/") {
789                         mntPath = mntPath[0 : len(mntPath)-1]
790                 }
791                 pathIdx := strings.LastIndex(mntPath, "/")
792                 var pathSubdir, pathFileName string
793                 if pathIdx >= 0 {
794                         pathSubdir = "." + mntPath[0:pathIdx]
795                         pathFileName = mntPath[pathIdx+1:]
796                 }
797
798                 if strings.Index(manifestText, "."+mntPath+" ") != -1 {
799                         // path refers to this complete stream
800                         manifestText = strings.Replace(manifestText, "."+mntPath, "."+bindSuffix, -1)
801                 } else {
802                         // look for a matching file in this stream
803                         manifestText = strings.Replace(manifestText, ":"+pathFileName, ":"+bindFileName, -1)
804                         manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
805                 }
806         }
807
808         if manifestText == "" {
809                 runner.CrunchLog.Printf("No manifest segment found for bind '%v' with path '%v'", bindSuffix, mnt.Path)
810         }
811
812         return manifestText, nil
813 }
814
815 func (runner *ContainerRunner) loadDiscoveryVars() {
816         tl, err := runner.ArvClient.Discovery("defaultTrashLifetime")
817         if err != nil {
818                 log.Fatalf("getting defaultTrashLifetime from discovery document: %s", err)
819         }
820         runner.trashLifetime = time.Duration(tl.(float64)) * time.Second
821 }
822
823 func (runner *ContainerRunner) CleanupDirs() {
824         if runner.ArvMount != nil {
825                 umount := exec.Command("fusermount", "-z", "-u", runner.ArvMountPoint)
826                 umnterr := umount.Run()
827                 if umnterr != nil {
828                         runner.CrunchLog.Printf("While running fusermount: %v", umnterr)
829                 }
830
831                 mnterr := <-runner.ArvMountExit
832                 if mnterr != nil {
833                         runner.CrunchLog.Printf("Arv-mount exit error: %v", mnterr)
834                 }
835         }
836
837         for _, tmpdir := range runner.CleanupTempDir {
838                 rmerr := os.RemoveAll(tmpdir)
839                 if rmerr != nil {
840                         runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", tmpdir, rmerr)
841                 }
842         }
843 }
844
845 // CommitLogs posts the collection containing the final container logs.
846 func (runner *ContainerRunner) CommitLogs() error {
847         runner.CrunchLog.Print(runner.finalState)
848         runner.CrunchLog.Close()
849
850         // Closing CrunchLog above allows it to be committed to Keep at this
851         // point, but re-open crunch log with ArvClient in case there are any
852         // other further (such as failing to write the log to Keep!) while
853         // shutting down
854         runner.CrunchLog = NewThrottledLogger(&ArvLogWriter{runner.ArvClient, runner.Container.UUID,
855                 "crunch-run", nil})
856
857         if runner.LogsPDH != nil {
858                 // If we have already assigned something to LogsPDH,
859                 // we must be closing the re-opened log, which won't
860                 // end up getting attached to the container record and
861                 // therefore doesn't need to be saved as a collection
862                 // -- it exists only to send logs to other channels.
863                 return nil
864         }
865
866         mt, err := runner.LogCollection.ManifestText()
867         if err != nil {
868                 return fmt.Errorf("While creating log manifest: %v", err)
869         }
870
871         var response arvados.Collection
872         err = runner.ArvClient.Create("collections",
873                 arvadosclient.Dict{
874                         "collection": arvadosclient.Dict{
875                                 "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
876                                 "name":          "logs for " + runner.Container.UUID,
877                                 "manifest_text": mt}},
878                 &response)
879         if err != nil {
880                 return fmt.Errorf("While creating log collection: %v", err)
881         }
882         runner.LogsPDH = &response.PortableDataHash
883         return nil
884 }
885
886 // UpdateContainerRunning updates the container state to "Running"
887 func (runner *ContainerRunner) UpdateContainerRunning() error {
888         runner.CancelLock.Lock()
889         defer runner.CancelLock.Unlock()
890         if runner.Cancelled {
891                 return ErrCancelled
892         }
893         return runner.ArvClient.Update("containers", runner.Container.UUID,
894                 arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
895 }
896
897 // ContainerToken returns the api_token the container (and any
898 // arv-mount processes) are allowed to use.
899 func (runner *ContainerRunner) ContainerToken() (string, error) {
900         if runner.token != "" {
901                 return runner.token, nil
902         }
903
904         var auth arvados.APIClientAuthorization
905         err := runner.ArvClient.Call("GET", "containers", runner.Container.UUID, "auth", nil, &auth)
906         if err != nil {
907                 return "", err
908         }
909         runner.token = auth.APIToken
910         return runner.token, nil
911 }
912
913 // UpdateContainerComplete updates the container record state on API
914 // server to "Complete" or "Cancelled"
915 func (runner *ContainerRunner) UpdateContainerFinal() error {
916         update := arvadosclient.Dict{}
917         update["state"] = runner.finalState
918         if runner.LogsPDH != nil {
919                 update["log"] = *runner.LogsPDH
920         }
921         if runner.finalState == "Complete" {
922                 if runner.ExitCode != nil {
923                         update["exit_code"] = *runner.ExitCode
924                 }
925                 if runner.OutputPDH != nil {
926                         update["output"] = *runner.OutputPDH
927                 }
928         }
929         return runner.ArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil)
930 }
931
932 // IsCancelled returns the value of Cancelled, with goroutine safety.
933 func (runner *ContainerRunner) IsCancelled() bool {
934         runner.CancelLock.Lock()
935         defer runner.CancelLock.Unlock()
936         return runner.Cancelled
937 }
938
939 // NewArvLogWriter creates an ArvLogWriter
940 func (runner *ContainerRunner) NewArvLogWriter(name string) io.WriteCloser {
941         return &ArvLogWriter{runner.ArvClient, runner.Container.UUID, name, runner.LogCollection.Open(name + ".txt")}
942 }
943
944 // Run the full container lifecycle.
945 func (runner *ContainerRunner) Run() (err error) {
946         runner.CrunchLog.Printf("Executing container '%s'", runner.Container.UUID)
947
948         hostname, hosterr := os.Hostname()
949         if hosterr != nil {
950                 runner.CrunchLog.Printf("Error getting hostname '%v'", hosterr)
951         } else {
952                 runner.CrunchLog.Printf("Executing on host '%s'", hostname)
953         }
954
955         // Clean up temporary directories _after_ finalizing
956         // everything (if we've made any by then)
957         defer runner.CleanupDirs()
958
959         runner.finalState = "Queued"
960
961         defer func() {
962                 // checkErr prints e (unless it's nil) and sets err to
963                 // e (unless err is already non-nil). Thus, if err
964                 // hasn't already been assigned when Run() returns,
965                 // this cleanup func will cause Run() to return the
966                 // first non-nil error that is passed to checkErr().
967                 checkErr := func(e error) {
968                         if e == nil {
969                                 return
970                         }
971                         runner.CrunchLog.Print(e)
972                         if err == nil {
973                                 err = e
974                         }
975                 }
976
977                 // Log the error encountered in Run(), if any
978                 checkErr(err)
979
980                 if runner.finalState == "Queued" {
981                         runner.CrunchLog.Close()
982                         runner.UpdateContainerFinal()
983                         return
984                 }
985
986                 if runner.IsCancelled() {
987                         runner.finalState = "Cancelled"
988                         // but don't return yet -- we still want to
989                         // capture partial output and write logs
990                 }
991
992                 checkErr(runner.CaptureOutput())
993                 checkErr(runner.CommitLogs())
994                 checkErr(runner.UpdateContainerFinal())
995
996                 // The real log is already closed, but then we opened
997                 // a new one in case we needed to log anything while
998                 // finalizing.
999                 runner.CrunchLog.Close()
1000         }()
1001
1002         err = runner.ArvClient.Get("containers", runner.Container.UUID, nil, &runner.Container)
1003         if err != nil {
1004                 err = fmt.Errorf("While getting container record: %v", err)
1005                 return
1006         }
1007
1008         // setup signal handling
1009         runner.SetupSignals()
1010
1011         // check for and/or load image
1012         err = runner.LoadImage()
1013         if err != nil {
1014                 runner.finalState = "Cancelled"
1015                 err = fmt.Errorf("While loading container image: %v", err)
1016                 return
1017         }
1018
1019         // set up FUSE mount and binds
1020         err = runner.SetupMounts()
1021         if err != nil {
1022                 runner.finalState = "Cancelled"
1023                 err = fmt.Errorf("While setting up mounts: %v", err)
1024                 return
1025         }
1026
1027         err = runner.CreateContainer()
1028         if err != nil {
1029                 return
1030         }
1031
1032         runner.StartCrunchstat()
1033
1034         if runner.IsCancelled() {
1035                 return
1036         }
1037
1038         err = runner.UpdateContainerRunning()
1039         if err != nil {
1040                 return
1041         }
1042         runner.finalState = "Cancelled"
1043
1044         err = runner.StartContainer()
1045         if err != nil {
1046                 return
1047         }
1048
1049         err = runner.WaitFinish()
1050         if err == nil {
1051                 runner.finalState = "Complete"
1052         }
1053         return
1054 }
1055
1056 // NewContainerRunner creates a new container runner.
1057 func NewContainerRunner(api IArvadosClient,
1058         kc IKeepClient,
1059         docker ThinDockerClient,
1060         containerUUID string) *ContainerRunner {
1061
1062         cr := &ContainerRunner{ArvClient: api, Kc: kc, Docker: docker}
1063         cr.NewLogWriter = cr.NewArvLogWriter
1064         cr.RunArvMount = cr.ArvMountCmd
1065         cr.MkTempDir = ioutil.TempDir
1066         cr.LogCollection = &CollectionWriter{kc, nil, sync.Mutex{}}
1067         cr.Container.UUID = containerUUID
1068         cr.CrunchLog = NewThrottledLogger(cr.NewLogWriter("crunch-run"))
1069         cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
1070         cr.loadDiscoveryVars()
1071         return cr
1072 }
1073
1074 func main() {
1075         statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
1076         cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
1077         cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
1078         cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
1079         caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
1080         flag.Parse()
1081
1082         containerId := flag.Arg(0)
1083
1084         if *caCertsPath != "" {
1085                 arvadosclient.CertFiles = []string{*caCertsPath}
1086         }
1087
1088         api, err := arvadosclient.MakeArvadosClient()
1089         if err != nil {
1090                 log.Fatalf("%s: %v", containerId, err)
1091         }
1092         api.Retries = 8
1093
1094         var kc *keepclient.KeepClient
1095         kc, err = keepclient.MakeKeepClient(api)
1096         if err != nil {
1097                 log.Fatalf("%s: %v", containerId, err)
1098         }
1099         kc.Retries = 4
1100
1101         var docker *dockerclient.DockerClient
1102         docker, err = dockerclient.NewDockerClient("unix:///var/run/docker.sock", nil)
1103         if err != nil {
1104                 log.Fatalf("%s: %v", containerId, err)
1105         }
1106
1107         cr := NewContainerRunner(api, kc, docker, containerId)
1108         cr.statInterval = *statInterval
1109         cr.cgroupRoot = *cgroupRoot
1110         cr.expectCgroupParent = *cgroupParent
1111         if *cgroupParentSubsystem != "" {
1112                 p := findCgroup(*cgroupParentSubsystem)
1113                 cr.setCgroupParent = p
1114                 cr.expectCgroupParent = p
1115         }
1116
1117         err = cr.Run()
1118         if err != nil {
1119                 log.Fatalf("%s: %v", containerId, err)
1120         }
1121
1122 }