18903: Merge branch 'main' into 18903-fix-activity-script

[arvados.git] / lib / crunchrun / crunchrun.go
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go

index c9456ccc743ba2989901e5e561865f5ee81539d1..65f43e96440aa57508cb7e2a80af99e455420120 100644 (file)
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -19,6 +19,7 @@ import (
         "os"
         "os/exec"
         "os/signal"
+       "os/user"
         "path"
         "path/filepath"
         "regexp"
@@ -36,6 +37,7 @@ import (
         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
         "git.arvados.org/arvados.git/sdk/go/keepclient"
         "git.arvados.org/arvados.git/sdk/go/manifest"
+       "golang.org/x/sys/unix"
  )
  
  type command struct{}
@@ -414,11 +416,14 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
         arvMountCmd := []string{
                 "arv-mount",
                 "--foreground",
-               "--allow-other",
                 "--read-write",
                 "--storage-classes", strings.Join(runner.Container.OutputStorageClasses, ","),
                 fmt.Sprintf("--crunchstat-interval=%v", runner.statInterval.Seconds())}
  
+       if runner.executor.Runtime() == "docker" {
+               arvMountCmd = append(arvMountCmd, "--allow-other")
+       }
+
         if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
                 arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
         }
@@ -449,8 +454,8 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
         sort.Strings(binds)
  
         for _, bind := range binds {
-               mnt, ok := runner.Container.Mounts[bind]
-               if !ok {
+               mnt, notSecret := runner.Container.Mounts[bind]
+               if !notSecret {
                         mnt = runner.SecretMounts[bind]
                 }
                 if bind == "stdout" || bind == "stderr" {
@@ -519,8 +524,7 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
                                 }
                         } else {
                                 src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
-                               arvMountCmd = append(arvMountCmd, "--mount-tmp")
-                               arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
+                               arvMountCmd = append(arvMountCmd, "--mount-tmp", fmt.Sprintf("tmp%d", tmpcount))
                                 tmpcount++
                         }
                         if mnt.Writable {
@@ -580,9 +584,32 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
                         if err != nil {
                                 return nil, fmt.Errorf("writing temp file: %v", err)
                         }
-                       if strings.HasPrefix(bind, runner.Container.OutputPath+"/") {
+                       if strings.HasPrefix(bind, runner.Container.OutputPath+"/") && (notSecret || runner.Container.Mounts[runner.Container.OutputPath].Kind != "collection") {
+                               // In most cases, if the container
+                               // specifies a literal file inside the
+                               // output path, we copy it into the
+                               // output directory (either a mounted
+                               // collection or a staging area on the
+                               // host fs). If it's a secret, it will
+                               // be skipped when copying output from
+                               // staging to Keep later.
                                 copyFiles = append(copyFiles, copyFile{tmpfn, runner.HostOutputDir + bind[len(runner.Container.OutputPath):]})
                         } else {
+                               // If a secret is outside OutputPath,
+                               // we bind mount the secret file
+                               // directly just like other mounts. We
+                               // also use this strategy when a
+                               // secret is inside OutputPath but
+                               // OutputPath is a live collection, to
+                               // avoid writing the secret to
+                               // Keep. Attempting to remove a
+                               // bind-mounted secret file from
+                               // inside the container will return a
+                               // "Device or resource busy" error
+                               // that might not be handled well by
+                               // the container, which is why we
+                               // don't use this strategy when
+                               // OutputPath is a staging directory.
                                 bindmounts[bind] = bindmount{HostPath: tmpfn, ReadOnly: true}
                         }
  
@@ -614,10 +641,15 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
         }
  
         if pdhOnly {
-               arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
+               // If we are only mounting collections by pdh, make
+               // sure we don't subscribe to websocket events to
+               // avoid putting undesired load on the API server
+               arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id", "--disable-event-listening")
         } else {
                 arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
         }
+       // the by_uuid mount point is used by singularity when writing
+       // out docker images converted to SIF
         arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_uuid")
         arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
  
@@ -978,20 +1010,26 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st
         runner.executorStdin = stdin
         runner.executorStdout = stdout
         runner.executorStderr = stderr
+
+       if runner.Container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+               nvidiaModprobe(runner.CrunchLog)
+       }
+
         return runner.executor.Create(containerSpec{
-               Image:         imageID,
-               VCPUs:         runner.Container.RuntimeConstraints.VCPUs,
-               RAM:           ram,
-               WorkingDir:    workdir,
-               Env:           env,
-               BindMounts:    bindmounts,
-               Command:       runner.Container.Command,
-               EnableNetwork: enableNetwork,
-               NetworkMode:   runner.networkMode,
-               CgroupParent:  runner.setCgroupParent,
-               Stdin:         stdin,
-               Stdout:        stdout,
-               Stderr:        stderr,
+               Image:           imageID,
+               VCPUs:           runner.Container.RuntimeConstraints.VCPUs,
+               RAM:             ram,
+               WorkingDir:      workdir,
+               Env:             env,
+               BindMounts:      bindmounts,
+               Command:         runner.Container.Command,
+               EnableNetwork:   enableNetwork,
+               CUDADeviceCount: runner.Container.RuntimeConstraints.CUDA.DeviceCount,
+               NetworkMode:     runner.networkMode,
+               CgroupParent:    runner.setCgroupParent,
+               Stdin:           stdin,
+               Stdout:          stdout,
+               Stderr:          stderr,
         })
  }
  
@@ -1042,6 +1080,20 @@ func (runner *ContainerRunner) WaitFinish() error {
         }
         runner.ExitCode = &exitcode
  
+       extra := ""
+       if exitcode&0x80 != 0 {
+               // Convert raw exit status (0x80 + signal number) to a
+               // string to log after the code, like " (signal 101)"
+               // or " (signal 9, killed)"
+               sig := syscall.WaitStatus(exitcode).Signal()
+               if name := unix.SignalName(sig); name != "" {
+                       extra = fmt.Sprintf(" (signal %d, %s)", sig, name)
+               } else {
+                       extra = fmt.Sprintf(" (signal %d)", sig)
+               }
+       }
+       runner.CrunchLog.Printf("Container exited with status code %d%s", exitcode, extra)
+
         var returnErr error
         if err = runner.executorStdin.Close(); err != nil {
                 err = fmt.Errorf("error closing container stdin: %s", err)
@@ -1424,6 +1476,7 @@ func (runner *ContainerRunner) NewArvLogWriter(name string) (io.WriteCloser, err
  // Run the full container lifecycle.
  func (runner *ContainerRunner) Run() (err error) {
         runner.CrunchLog.Printf("crunch-run %s started", cmd.Version.String())
+       runner.CrunchLog.Printf("%s", currentUserAndGroups())
         runner.CrunchLog.Printf("Executing container '%s' using %s runtime", runner.Container.UUID, runner.executor.Runtime())
  
         hostname, hosterr := os.Hostname()
@@ -1691,11 +1744,11 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
                 ignoreDetachFlag = true
         }
  
-       if err := flags.Parse(args); err == flag.ErrHelp {
-               return 0
-       } else if err != nil {
-               log.Print(err)
-               return 1
+       if ok, code := cmd.ParseFlags(flags, prog, args, "container-uuid", stderr); !ok {
+               return code
+       } else if !*list && flags.NArg() != 1 {
+               fmt.Fprintf(stderr, "missing required argument: container-uuid (try -help)\n")
+               return 2
         }
  
         containerUUID := flags.Arg(0)
@@ -1776,11 +1829,18 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
         }
  
         if keepstore == nil {
-               // Nothing is written to keepstoreLogbuf, no need to
-               // call SetWriter.
+               // Log explanation (if any) for why we're not running
+               // a local keepstore.
+               var buf bytes.Buffer
+               keepstoreLogbuf.SetWriter(&buf)
+               if buf.Len() > 0 {
+                       cr.CrunchLog.Printf("%s", strings.TrimSpace(buf.String()))
+               }
         } else if logWhat := conf.Cluster.Containers.LocalKeepLogsToContainerLog; logWhat == "none" {
+               cr.CrunchLog.Printf("using local keepstore process (pid %d) at %s", keepstore.Process.Pid, os.Getenv("ARVADOS_KEEP_SERVICES"))
                 keepstoreLogbuf.SetWriter(io.Discard)
         } else {
+               cr.CrunchLog.Printf("using local keepstore process (pid %d) at %s, writing logs to keepstore.txt in log collection", keepstore.Process.Pid, os.Getenv("ARVADOS_KEEP_SERVICES"))
                 logwriter, err := cr.NewLogWriter("keepstore")
                 if err != nil {
                         log.Print(err)
@@ -1896,6 +1956,16 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er
         if configData.Cluster == nil || configData.KeepBuffers < 1 {
                 return nil, nil
         }
+       for uuid, vol := range configData.Cluster.Volumes {
+               if len(vol.AccessViaHosts) > 0 {
+                       fmt.Fprintf(logbuf, "not starting a local keepstore process because a volume (%s) uses AccessViaHosts\n", uuid)
+                       return nil, nil
+               }
+               if !vol.ReadOnly && vol.Replication < configData.Cluster.Collections.DefaultReplication {
+                       fmt.Fprintf(logbuf, "not starting a local keepstore process because a writable volume (%s) has replication less than Collections.DefaultReplication (%d < %d)\n", uuid, vol.Replication, configData.Cluster.Collections.DefaultReplication)
+                       return nil, nil
+               }
+       }
  
         // Rather than have an alternate way to tell keepstore how
         // many buffers to use when starting it this way, we just
@@ -1977,3 +2047,30 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er
         os.Setenv("ARVADOS_KEEP_SERVICES", url)
         return cmd, nil
  }
+
+// return current uid, gid, groups in a format suitable for logging:
+// "crunch-run process has uid=1234(arvados) gid=1234(arvados)
+// groups=1234(arvados),114(fuse)"
+func currentUserAndGroups() string {
+       u, err := user.Current()
+       if err != nil {
+               return fmt.Sprintf("error getting current user ID: %s", err)
+       }
+       s := fmt.Sprintf("crunch-run process has uid=%s(%s) gid=%s", u.Uid, u.Username, u.Gid)
+       if g, err := user.LookupGroupId(u.Gid); err == nil {
+               s += fmt.Sprintf("(%s)", g.Name)
+       }
+       s += " groups="
+       if gids, err := u.GroupIds(); err == nil {
+               for i, gid := range gids {
+                       if i > 0 {
+                               s += ","
+                       }
+                       s += gid
+                       if g, err := user.LookupGroupId(gid); err == nil {
+                               s += fmt.Sprintf("(%s)", g.Name)
+                       }
+               }
+       }
+       return s
+}