"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
"git.curoverse.com/arvados.git/sdk/go/manifest"
- "github.com/shirou/gopsutil/process"
"golang.org/x/net/context"
dockertypes "github.com/docker/docker/api/types"
PutB(buf []byte) (string, int, error)
ReadAt(locator string, p []byte, off int) (int, error)
ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
+ LocalLocator(locator string) (string, error)
ClearBlockCache()
}
ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error
ContainerRemove(ctx context.Context, container string, options dockertypes.ContainerRemoveOptions) error
ContainerWait(ctx context.Context, container string, condition dockercontainer.WaitCondition) (<-chan dockercontainer.ContainerWaitOKBody, <-chan error)
+ ContainerInspect(ctx context.Context, id string) (dockertypes.ContainerJSON, error)
ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error)
ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error)
ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
// ContainerRunner is the main stateful struct used for a single execution of a
// container.
type ContainerRunner struct {
- Docker ThinDockerClient
- client *arvados.Client
- ArvClient IArvadosClient
- Kc IKeepClient
- arvados.Container
+ Docker ThinDockerClient
+
+ // Dispatcher client is initialized with the Dispatcher token.
+ // This is a privileged token used to manage container status
+ // and logs.
+ //
+ // We have both dispatcherClient and DispatcherArvClient
+ // because there are two different incompatible Arvados Go
+ // SDKs and we have to use both (hopefully this gets fixed in
+ // #14467)
+ dispatcherClient *arvados.Client
+ DispatcherArvClient IArvadosClient
+ DispatcherKeepClient IKeepClient
+
+ // Container client is initialized with the Container token
+ // This token controls the permissions of the container, and
+ // must be used for operations such as reading collections.
+ //
+ // Same comment as above applies to
+ // containerClient/ContainerArvClient.
+ containerClient *arvados.Client
+ ContainerArvClient IArvadosClient
+ ContainerKeepClient IKeepClient
+
+ Container arvados.Container
ContainerConfig dockercontainer.Config
- dockercontainer.HostConfig
- token string
- ContainerID string
- ExitCode *int
- NewLogWriter
- loggingDone chan bool
- CrunchLog *ThrottledLogger
- Stdout io.WriteCloser
- Stderr io.WriteCloser
- LogCollection arvados.CollectionFileSystem
- LogsPDH *string
- RunArvMount
- MkTempDir
- ArvMount *exec.Cmd
- ArvMountPoint string
- HostOutputDir string
- Binds []string
- Volumes map[string]struct{}
- OutputPDH *string
- SigChan chan os.Signal
- ArvMountExit chan error
- SecretMounts map[string]arvados.Mount
- MkArvClient func(token string) (IArvadosClient, error)
- finalState string
- parentTemp string
-
- ListProcesses func() ([]PsProcess, error)
+ HostConfig dockercontainer.HostConfig
+ token string
+ ContainerID string
+ ExitCode *int
+ NewLogWriter NewLogWriter
+ loggingDone chan bool
+ CrunchLog *ThrottledLogger
+ Stdout io.WriteCloser
+ Stderr io.WriteCloser
+ logUUID string
+ logMtx sync.Mutex
+ LogCollection arvados.CollectionFileSystem
+ LogsPDH *string
+ RunArvMount RunArvMount
+ MkTempDir MkTempDir
+ ArvMount *exec.Cmd
+ ArvMountPoint string
+ HostOutputDir string
+ Binds []string
+ Volumes map[string]struct{}
+ OutputPDH *string
+ SigChan chan os.Signal
+ ArvMountExit chan error
+ SecretMounts map[string]arvados.Mount
+ MkArvClient func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error)
+ finalState string
+ parentTemp string
statLogger io.WriteCloser
statReporter *crunchstat.Reporter
cStateLock sync.Mutex
cCancelled bool // StopContainer() invoked
+ cRemoved bool // docker confirmed the container no longer exists
- enableNetwork string // one of "default" or "always"
- networkMode string // passed through to HostConfig.NetworkMode
- arvMountLog *ThrottledLogger
- checkContainerd time.Duration
+ enableNetwork string // one of "default" or "always"
+ networkMode string // passed through to HostConfig.NetworkMode
+ arvMountLog *ThrottledLogger
+
+ containerWatchdogInterval time.Duration
}
// setupSignals sets up signal handling to gracefully terminate the underlying
if err != nil {
runner.CrunchLog.Printf("error removing container: %s", err)
}
+ if err == nil || strings.Contains(err.Error(), "No such container: "+runner.ContainerID) {
+ runner.cRemoved = true
+ }
}
var errorBlacklist = []string{
func (runner *ContainerRunner) runBrokenNodeHook() {
if *brokenNodeHook == "" {
- runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
+ path := filepath.Join(lockdir, brokenfile)
+ runner.CrunchLog.Printf("Writing %s to mark node as broken", path)
+ f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0700)
+ if err != nil {
+ runner.CrunchLog.Printf("Error writing %s: %s", path, err)
+ return
+ }
+ f.Close()
} else {
runner.CrunchLog.Printf("Running broken node hook %q", *brokenNodeHook)
// run killme script
runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.Container.ContainerImage)
var collection arvados.Collection
- err = runner.ArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
+ err = runner.ContainerArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
if err != nil {
return fmt.Errorf("While getting container image collection: %v", err)
}
runner.CrunchLog.Print("Loading Docker image from keep")
var readCloser io.ReadCloser
- readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
+ readCloser, err = runner.ContainerKeepClient.ManifestFileReader(manifest, img)
if err != nil {
return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
}
runner.ContainerConfig.Image = imageID
- runner.Kc.ClearBlockCache()
+ runner.ContainerKeepClient.ClearBlockCache()
return nil
}
if err != nil {
return fmt.Errorf("creating temp dir: %v", err)
}
- err = gitMount(mnt).extractTree(runner.ArvClient, tmpdir, token)
+ err = gitMount(mnt).extractTree(runner.ContainerArvClient, tmpdir, token)
if err != nil {
return err
}
return err
}
-// LogNodeRecord logs arvados#node record corresponding to the current host.
+// LogNodeRecord logs the current host's InstanceType config entry (or
+// the arvados#node record, if running via crunch-dispatch-slurm).
func (runner *ContainerRunner) LogNodeRecord() error {
- hostname := os.Getenv("SLURMD_NODENAME")
- if hostname == "" {
- hostname, _ = os.Hostname()
- }
- _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
- // The "info" field has admin-only info when obtained
- // with a privileged token, and should not be logged.
- node, ok := resp.(map[string]interface{})
- if ok {
- delete(node, "info")
- }
- })
- return err
+ if it := os.Getenv("InstanceType"); it != "" {
+ // Dispatched via arvados-dispatch-cloud. Save
+ // InstanceType config fragment received from
+ // dispatcher on stdin.
+ w, err := runner.LogCollection.OpenFile("node.json", os.O_CREATE|os.O_WRONLY, 0666)
+ if err != nil {
+ return err
+ }
+ defer w.Close()
+ _, err = io.WriteString(w, it)
+ if err != nil {
+ return err
+ }
+ return w.Close()
+ } else {
+ // Dispatched via crunch-dispatch-slurm. Look up
+ // apiserver's node record corresponding to
+ // $SLURMD_NODENAME.
+ hostname := os.Getenv("SLURMD_NODENAME")
+ if hostname == "" {
+ hostname, _ = os.Hostname()
+ }
+ _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
+ // The "info" field has admin-only info when
+ // obtained with a privileged token, and
+ // should not be logged.
+ node, ok := resp.(map[string]interface{})
+ if ok {
+ delete(node, "info")
+ }
+ })
+ return err
+ }
}
func (runner *ContainerRunner) logAPIResponse(label, path string, params map[string]interface{}, munge func(interface{})) (logged bool, err error) {
return false, err
}
w := &ArvLogWriter{
- ArvClient: runner.ArvClient,
+ ArvClient: runner.DispatcherArvClient,
UUID: runner.Container.UUID,
loggingStream: label,
writeCloser: writer,
}
- reader, err := runner.ArvClient.CallRaw("GET", path, "", "", arvadosclient.Dict(params))
+ reader, err := runner.DispatcherArvClient.CallRaw("GET", path, "", "", arvadosclient.Dict(params))
if err != nil {
return false, fmt.Errorf("error getting %s record: %v", label, err)
}
if collId == "" {
collId = stdinMnt.PortableDataHash
}
- err = runner.ArvClient.Get("collections", collId, nil, &stdinColl)
+ err = runner.ContainerArvClient.Get("collections", collId, nil, &stdinColl)
if err != nil {
- return fmt.Errorf("While getting stding collection: %v", err)
+ return fmt.Errorf("While getting stdin collection: %v", err)
}
- stdinRdr, err = runner.Kc.ManifestFileReader(manifest.Manifest{Text: stdinColl.ManifestText}, stdinMnt.Path)
+ stdinRdr, err = runner.ContainerKeepClient.ManifestFileReader(
+ manifest.Manifest{Text: stdinColl.ManifestText},
+ stdinMnt.Path)
if os.IsNotExist(err) {
return fmt.Errorf("stdin collection path not found: %v", stdinMnt.Path)
} else if err != nil {
go func() {
_, err := io.Copy(response.Conn, stdinRdr)
if err != nil {
- runner.CrunchLog.Print("While writing stdin collection to docker container %q", err)
+ runner.CrunchLog.Printf("While writing stdin collection to docker container: %v", err)
runner.stop(nil)
}
stdinRdr.Close()
go func() {
_, err := io.Copy(response.Conn, bytes.NewReader(stdinJson))
if err != nil {
- runner.CrunchLog.Print("While writing stdin json to docker container %q", err)
+ runner.CrunchLog.Printf("While writing stdin json to docker container: %v", err)
runner.stop(nil)
}
response.CloseWrite()
return nil
}
-// checkContainerd checks if "containerd" is present in the process list.
-func (runner *ContainerRunner) CheckContainerd() error {
- if runner.checkContainerd == 0 {
- return nil
- }
- p, _ := runner.ListProcesses()
- for _, i := range p {
- e, _ := i.CmdlineSlice()
- if len(e) > 0 {
- if strings.Index(e[0], "containerd") > -1 {
- return nil
- }
- }
- }
-
- // Not found
- runner.runBrokenNodeHook()
- runner.stop(nil)
- return fmt.Errorf("'containerd' not found in process list.")
-}
-
// WaitFinish waits for the container to terminate, capture the exit code, and
// close the stdout/stderr logging.
func (runner *ContainerRunner) WaitFinish() error {
runTimeExceeded = time.After(time.Duration(timeout) * time.Second)
}
- containerdGone := make(chan error)
- defer close(containerdGone)
- if runner.checkContainerd > 0 {
- go func() {
- ticker := time.NewTicker(time.Duration(runner.checkContainerd))
- defer ticker.Stop()
- for {
- select {
- case <-ticker.C:
- if ck := runner.CheckContainerd(); ck != nil {
- containerdGone <- ck
- return
- }
- case <-containerdGone:
- // Channel closed, quit goroutine
- return
- }
+ containerGone := make(chan struct{})
+ go func() {
+ defer close(containerGone)
+ if runner.containerWatchdogInterval < 1 {
+ runner.containerWatchdogInterval = time.Minute
+ }
+ for range time.NewTicker(runner.containerWatchdogInterval).C {
+ ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(runner.containerWatchdogInterval))
+ ctr, err := runner.Docker.ContainerInspect(ctx, runner.ContainerID)
+ cancel()
+ runner.cStateLock.Lock()
+ done := runner.cRemoved || runner.ExitCode != nil
+ runner.cStateLock.Unlock()
+ if done {
+ return
+ } else if err != nil {
+ runner.CrunchLog.Printf("Error inspecting container: %s", err)
+ runner.checkBrokenNode(err)
+ return
+ } else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
+ runner.CrunchLog.Printf("Container is not running: State=%v", ctr.State)
+ return
}
- }()
- }
+ }
+ }()
for {
select {
runner.stop(nil)
runTimeExceeded = nil
- case err := <-containerdGone:
- return err
+ case <-containerGone:
+ return errors.New("docker client never returned status")
}
}
}
+func (runner *ContainerRunner) updateLogs() {
+ ticker := time.NewTicker(crunchLogUpdatePeriod / 360)
+ defer ticker.Stop()
+
+ sigusr1 := make(chan os.Signal, 1)
+ signal.Notify(sigusr1, syscall.SIGUSR1)
+ defer signal.Stop(sigusr1)
+
+ saveAtTime := time.Now().Add(crunchLogUpdatePeriod)
+ saveAtSize := crunchLogUpdateSize
+ var savedSize int64
+ for {
+ select {
+ case <-ticker.C:
+ case <-sigusr1:
+ saveAtTime = time.Now()
+ }
+ runner.logMtx.Lock()
+ done := runner.LogsPDH != nil
+ runner.logMtx.Unlock()
+ if done {
+ return
+ }
+ size := runner.LogCollection.Size()
+ if size == savedSize || (time.Now().Before(saveAtTime) && size < saveAtSize) {
+ continue
+ }
+ saveAtTime = time.Now().Add(crunchLogUpdatePeriod)
+ saveAtSize = runner.LogCollection.Size() + crunchLogUpdateSize
+ saved, err := runner.saveLogCollection(false)
+ if err != nil {
+ runner.CrunchLog.Printf("error updating log collection: %s", err)
+ continue
+ }
+
+ var updated arvados.Container
+ err = runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{
+ "container": arvadosclient.Dict{"log": saved.PortableDataHash},
+ }, &updated)
+ if err != nil {
+ runner.CrunchLog.Printf("error updating container log to %s: %s", saved.PortableDataHash, err)
+ continue
+ }
+
+ savedSize = size
+ }
+}
+
// CaptureOutput saves data from the container's output directory if
// needed, and updates the container output accordingly.
func (runner *ContainerRunner) CaptureOutput() error {
if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
// Output may have been set directly by the container, so
// refresh the container record to check.
- err := runner.ArvClient.Get("containers", runner.Container.UUID,
+ err := runner.DispatcherArvClient.Get("containers", runner.Container.UUID,
nil, &runner.Container)
if err != nil {
return err
}
txt, err := (&copier{
- client: runner.client,
- arvClient: runner.ArvClient,
- keepClient: runner.Kc,
+ client: runner.containerClient,
+ arvClient: runner.ContainerArvClient,
+ keepClient: runner.ContainerKeepClient,
hostOutputDir: runner.HostOutputDir,
ctrOutputDir: runner.Container.OutputPath,
binds: runner.Binds,
if err != nil {
return err
}
+ if n := len(regexp.MustCompile(` [0-9a-f]+\+\S*\+R`).FindAllStringIndex(txt, -1)); n > 0 {
+ runner.CrunchLog.Printf("Copying %d data blocks from remote input collections...", n)
+ fs, err := (&arvados.Collection{ManifestText: txt}).FileSystem(runner.containerClient, runner.ContainerKeepClient)
+ if err != nil {
+ return err
+ }
+ txt, err = fs.MarshalManifest(".")
+ if err != nil {
+ return err
+ }
+ }
var resp arvados.Collection
- err = runner.ArvClient.Create("collections", arvadosclient.Dict{
+ err = runner.ContainerArvClient.Create("collections", arvadosclient.Dict{
"ensure_unique_name": true,
"collection": arvadosclient.Dict{
"is_trashed": true,
// other further errors (such as failing to write the log to Keep!)
// while shutting down
runner.CrunchLog = NewThrottledLogger(&ArvLogWriter{
- ArvClient: runner.ArvClient,
+ ArvClient: runner.DispatcherArvClient,
UUID: runner.Container.UUID,
loggingStream: "crunch-run",
writeCloser: nil,
// -- it exists only to send logs to other channels.
return nil
}
+ saved, err := runner.saveLogCollection(true)
+ if err != nil {
+ return fmt.Errorf("error saving log collection: %s", err)
+ }
+ runner.logMtx.Lock()
+ defer runner.logMtx.Unlock()
+ runner.LogsPDH = &saved.PortableDataHash
+ return nil
+}
+func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.Collection, err error) {
+ runner.logMtx.Lock()
+ defer runner.logMtx.Unlock()
+ if runner.LogsPDH != nil {
+ // Already finalized.
+ return
+ }
mt, err := runner.LogCollection.MarshalManifest(".")
if err != nil {
- return fmt.Errorf("While creating log manifest: %v", err)
- }
-
- var response arvados.Collection
- err = runner.ArvClient.Create("collections",
- arvadosclient.Dict{
- "ensure_unique_name": true,
- "collection": arvadosclient.Dict{
- "is_trashed": true,
- "name": "logs for " + runner.Container.UUID,
- "manifest_text": mt}},
- &response)
+ err = fmt.Errorf("error creating log manifest: %v", err)
+ return
+ }
+ updates := arvadosclient.Dict{
+ "name": "logs for " + runner.Container.UUID,
+ "manifest_text": mt,
+ }
+ if final {
+ updates["is_trashed"] = true
+ } else {
+ exp := time.Now().Add(crunchLogUpdatePeriod * 24)
+ updates["trash_at"] = exp
+ updates["delete_at"] = exp
+ }
+ reqBody := arvadosclient.Dict{"collection": updates}
+ if runner.logUUID == "" {
+ reqBody["ensure_unique_name"] = true
+ err = runner.DispatcherArvClient.Create("collections", reqBody, &response)
+ } else {
+ err = runner.DispatcherArvClient.Update("collections", runner.logUUID, reqBody, &response)
+ }
if err != nil {
- return fmt.Errorf("While creating log collection: %v", err)
+ return
}
- runner.LogsPDH = &response.PortableDataHash
- return nil
+ runner.logUUID = response.UUID
+ return
}
// UpdateContainerRunning updates the container state to "Running"
if runner.cCancelled {
return ErrCancelled
}
- return runner.ArvClient.Update("containers", runner.Container.UUID,
+ return runner.DispatcherArvClient.Update("containers", runner.Container.UUID,
arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
}
}
var auth arvados.APIClientAuthorization
- err := runner.ArvClient.Call("GET", "containers", runner.Container.UUID, "auth", nil, &auth)
+ err := runner.DispatcherArvClient.Call("GET", "containers", runner.Container.UUID, "auth", nil, &auth)
if err != nil {
return "", err
}
- runner.token = auth.APIToken
+ runner.token = fmt.Sprintf("v2/%s/%s/%s", auth.UUID, auth.APIToken, runner.Container.UUID)
return runner.token, nil
}
update["output"] = *runner.OutputPDH
}
}
- return runner.ArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil)
+ return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil)
}
// IsCancelled returns the value of Cancelled, with goroutine safety.
return nil, err
}
return &ArvLogWriter{
- ArvClient: runner.ArvClient,
+ ArvClient: runner.DispatcherArvClient,
UUID: runner.Container.UUID,
loggingStream: name,
writeCloser: writer,
runner.CrunchLog.Close()
}()
+ err = runner.fetchContainerRecord()
+ if err != nil {
+ return
+ }
+ if runner.Container.State != "Locked" {
+ return fmt.Errorf("dispatch error detected: container %q has state %q", runner.Container.UUID, runner.Container.State)
+ }
+
defer func() {
// checkErr prints e (unless it's nil) and sets err to
// e (unless err is already non-nil). Thus, if err
checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
}()
- err = runner.fetchContainerRecord()
- if err != nil {
- return
- }
runner.setupSignals()
err = runner.startHoststat()
if err != nil {
return
}
- // Sanity check that containerd is running.
- err = runner.CheckContainerd()
- if err != nil {
- return
- }
-
// check for and/or load image
err = runner.LoadImage()
if err != nil {
// Fetch the current container record (uuid = runner.Container.UUID)
// into runner.Container.
func (runner *ContainerRunner) fetchContainerRecord() error {
- reader, err := runner.ArvClient.CallRaw("GET", "containers", runner.Container.UUID, "", nil)
+ reader, err := runner.DispatcherArvClient.CallRaw("GET", "containers", runner.Container.UUID, "", nil)
if err != nil {
return fmt.Errorf("error fetching container record: %v", err)
}
return fmt.Errorf("error getting container token: %v", err)
}
- containerClient, err := runner.MkArvClient(containerToken)
+ runner.ContainerArvClient, runner.ContainerKeepClient,
+ runner.containerClient, err = runner.MkArvClient(containerToken)
if err != nil {
return fmt.Errorf("error creating container API client: %v", err)
}
- err = containerClient.Call("GET", "containers", runner.Container.UUID, "secret_mounts", nil, &sm)
+ err = runner.ContainerArvClient.Call("GET", "containers", runner.Container.UUID, "secret_mounts", nil, &sm)
if err != nil {
if apierr, ok := err.(arvadosclient.APIServerError); !ok || apierr.HttpStatusCode != 404 {
return fmt.Errorf("error fetching secret_mounts: %v", err)
}
// NewContainerRunner creates a new container runner.
-func NewContainerRunner(client *arvados.Client, api IArvadosClient, kc IKeepClient, docker ThinDockerClient, containerUUID string) (*ContainerRunner, error) {
+func NewContainerRunner(dispatcherClient *arvados.Client,
+ dispatcherArvClient IArvadosClient,
+ dispatcherKeepClient IKeepClient,
+ docker ThinDockerClient,
+ containerUUID string) (*ContainerRunner, error) {
+
cr := &ContainerRunner{
- client: client,
- ArvClient: api,
- Kc: kc,
- Docker: docker,
+ dispatcherClient: dispatcherClient,
+ DispatcherArvClient: dispatcherArvClient,
+ DispatcherKeepClient: dispatcherKeepClient,
+ Docker: docker,
}
cr.NewLogWriter = cr.NewArvLogWriter
cr.RunArvMount = cr.ArvMountCmd
cr.MkTempDir = ioutil.TempDir
- cr.ListProcesses = func() ([]PsProcess, error) {
- pr, err := process.Processes()
- if err != nil {
- return nil, err
- }
- ps := make([]PsProcess, len(pr))
- for i, j := range pr {
- ps[i] = j
- }
- return ps, nil
- }
- cr.MkArvClient = func(token string) (IArvadosClient, error) {
+ cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
cl, err := arvadosclient.MakeArvadosClient()
if err != nil {
- return nil, err
+ return nil, nil, nil, err
}
cl.ApiToken = token
- return cl, nil
+ kc, err := keepclient.MakeKeepClient(cl)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ c2 := arvados.NewClientFromEnv()
+ c2.AuthToken = token
+ return cl, kc, c2, nil
}
var err error
- cr.LogCollection, err = (&arvados.Collection{}).FileSystem(cr.client, cr.Kc)
+ cr.LogCollection, err = (&arvados.Collection{}).FileSystem(cr.dispatcherClient, cr.DispatcherKeepClient)
if err != nil {
return nil, err
}
cr.CrunchLog = NewThrottledLogger(w)
cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
- loadLogThrottleParams(api)
+ loadLogThrottleParams(dispatcherArvClient)
+ go cr.updateLogs()
return cr, nil
}
cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
+ detach := flag.Bool("detach", false, "Detach from parent process and run in the background")
+ stdinEnv := flag.Bool("stdin-env", false, "Load environment variables from JSON message on stdin")
+ sleep := flag.Duration("sleep", 0, "Delay before starting (testing use only)")
+ kill := flag.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID")
+ list := flag.Bool("list", false, "List UUIDs of existing crunch-run processes")
enableNetwork := flag.String("container-enable-networking", "default",
`Specify if networking should be enabled for container. One of 'default', 'always':
default: only enable networking if container requests it.
`)
memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
getVersion := flag.Bool("version", false, "Print version information and exit.")
- checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
+ flag.Duration("check-containerd", 0, "Ignored. Exists for compatibility with older versions.")
+
+ ignoreDetachFlag := false
+ if len(os.Args) > 1 && os.Args[1] == "-no-detach" {
+ // This process was invoked by a parent process, which
+ // has passed along its own arguments, including
+ // -detach, after the leading -no-detach flag. Strip
+ // the leading -no-detach flag (it's not recognized by
+ // flag.Parse()) and ignore the -detach flag that
+ // comes later.
+ os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
+ ignoreDetachFlag = true
+ }
+
flag.Parse()
+ if *stdinEnv && !ignoreDetachFlag {
+ // Load env vars on stdin if asked (but not in a
+ // detached child process, in which case stdin is
+ // /dev/null).
+ loadEnv(os.Stdin)
+ }
+
+ switch {
+ case *detach && !ignoreDetachFlag:
+ os.Exit(Detach(flag.Arg(0), os.Args, os.Stdout, os.Stderr))
+ case *kill >= 0:
+ os.Exit(KillProcess(flag.Arg(0), syscall.Signal(*kill), os.Stdout, os.Stderr))
+ case *list:
+ os.Exit(ListProcesses(os.Stdout, os.Stderr))
+ }
+
// Print version information if requested
if *getVersion {
fmt.Printf("crunch-run %s\n", version)
}
log.Printf("crunch-run %s started", version)
+ time.Sleep(*sleep)
containerId := flag.Arg(0)
cr.expectCgroupParent = *cgroupParent
cr.enableNetwork = *enableNetwork
cr.networkMode = *networkMode
- cr.checkContainerd = *checkContainerd
if *cgroupParentSubsystem != "" {
p := findCgroup(*cgroupParentSubsystem)
cr.setCgroupParent = p
log.Fatalf("%s: %v", containerId, runerr)
}
}
+
+func loadEnv(rdr io.Reader) {
+ buf, err := ioutil.ReadAll(rdr)
+ if err != nil {
+ log.Fatalf("read stdin: %s", err)
+ }
+ var env map[string]string
+ err = json.Unmarshal(buf, &env)
+ if err != nil {
+ log.Fatalf("decode stdin: %s", err)
+ }
+ for k, v := range env {
+ err = os.Setenv(k, v)
+ if err != nil {
+ log.Fatalf("setenv(%q): %s", k, err)
+ }
+ }
+}