X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/844ff7cc1dc1c93a29b7ad8eca2987b987cf89e6..31779a06b28e21a9409ec7c6310f0871b65d13ff:/lib/crunchrun/crunchrun.go diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index 0253ac3fa8..9add35335f 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -12,6 +12,7 @@ import ( "flag" "fmt" "io" + "io/fs" "io/ioutil" "log" "net" @@ -31,6 +32,7 @@ import ( "syscall" "time" + "git.arvados.org/arvados.git/lib/cloud" "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/lib/crunchstat" @@ -49,9 +51,10 @@ var Command = command{} // ConfigData contains environment variables and (when needed) cluster // configuration, passed from dispatchcloud to crunch-run on stdin. type ConfigData struct { - Env map[string]string - KeepBuffers int - Cluster *arvados.Cluster + Env map[string]string + KeepBuffers int + EC2SpotCheck bool + Cluster *arvados.Cluster } // IArvadosClient is the minimal Arvados API methods used by crunch-run. @@ -140,7 +143,9 @@ type ContainerRunner struct { MkArvClient func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) finalState string parentTemp string + costStartTime time.Time + keepstore *exec.Cmd keepstoreLogger io.WriteCloser keepstoreLogbuf *bufThenWrite statLogger io.WriteCloser @@ -148,20 +153,12 @@ type ContainerRunner struct { hoststatLogger io.WriteCloser hoststatReporter *crunchstat.Reporter statInterval time.Duration - cgroupRoot string - // What we expect the container's cgroup parent to be. - expectCgroupParent string // What we tell docker to use as the container's cgroup - // parent. Note: Ideally we would use the same field for both - // expectCgroupParent and setCgroupParent, and just make it - // default to "docker". However, when using docker < 1.10 with - // systemd, specifying a non-empty cgroup parent (even the - // default value "docker") hits a docker bug - // (https://github.com/docker/docker/issues/17126). Using two - // separate fields makes it possible to use the "expect cgroup - // parent to be X" feature even on sites where the "specify - // cgroup parent" feature breaks. + // parent. setCgroupParent string + // Fake root dir where crunchstat.Reporter should read OS + // files, for testing. + crunchstatFakeFS fs.FS cStateLock sync.Mutex cCancelled bool // StopContainer() invoked @@ -175,6 +172,9 @@ type ContainerRunner struct { containerWatchdogInterval time.Duration gateway Gateway + + prices []cloud.InstancePrice + pricesLock sync.Mutex } // setupSignals sets up signal handling to gracefully terminate the @@ -313,7 +313,12 @@ func (runner *ContainerRunner) ArvMountCmd(cmdline []string, token string) (c *e "Block not found error", "Unhandled exception during FUSE operation", }, - ReportFunc: runner.reportArvMountWarning, + ReportFunc: func(pattern, text string) { + runner.updateRuntimeStatus(arvadosclient.Dict{ + "warning": "arv-mount: " + pattern, + "warningDetail": text, + }) + }, } c.Stdout = runner.arvMountLog c.Stderr = io.MultiWriter(runner.arvMountLog, os.Stderr, &scanner) @@ -426,8 +431,14 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) { arvMountCmd = append(arvMountCmd, "--allow-other") } - if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 { - arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM)) + if runner.Container.RuntimeConstraints.KeepCacheDisk > 0 { + keepcachedir, err := runner.MkTempDir(runner.parentTemp, "keepcache") + if err != nil { + return nil, fmt.Errorf("while creating keep cache temp dir: %v", err) + } + arvMountCmd = append(arvMountCmd, "--disk-cache", "--disk-cache-dir", keepcachedir, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheDisk)) + } else if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 { + arvMountCmd = append(arvMountCmd, "--ram-cache", "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM)) } collectionPaths := []string{} @@ -620,7 +631,7 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) { if err != nil { return nil, fmt.Errorf("creating temp dir: %v", err) } - err = gitMount(mnt).extractTree(runner.ContainerArvClient, tmpdir, token) + err = gitMount(mnt).extractTree(runner.containerClient, tmpdir, token) if err != nil { return nil, err } @@ -659,6 +670,9 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) { if err != nil { return nil, fmt.Errorf("while trying to start arv-mount: %v", err) } + if runner.hoststatReporter != nil && runner.ArvMount != nil { + runner.hoststatReporter.ReportPID("arv-mount", runner.ArvMount.Process.Pid) + } for _, p := range collectionPaths { _, err = os.Stat(p) @@ -713,6 +727,7 @@ func (runner *ContainerRunner) stopHoststat() error { return nil } runner.hoststatReporter.Stop() + runner.hoststatReporter.LogProcessMemMax(runner.CrunchLog) err := runner.hoststatLogger.Close() if err != nil { return fmt.Errorf("error closing hoststat logs: %v", err) @@ -727,11 +742,20 @@ func (runner *ContainerRunner) startHoststat() error { } runner.hoststatLogger = NewThrottledLogger(w) runner.hoststatReporter = &crunchstat.Reporter{ - Logger: log.New(runner.hoststatLogger, "", 0), - CgroupRoot: runner.cgroupRoot, + Logger: log.New(runner.hoststatLogger, "", 0), + // Our own cgroup is the "host" cgroup, in the sense + // that it accounts for resource usage outside the + // container. It doesn't count _all_ resource usage on + // the system. + // + // TODO?: Use the furthest ancestor of our own cgroup + // that has stats available. (Currently crunchstat + // does not have that capability.) + Pid: os.Getpid, PollPeriod: runner.statInterval, } runner.hoststatReporter.Start() + runner.hoststatReporter.ReportPID("crunch-run", os.Getpid()) return nil } @@ -742,12 +766,15 @@ func (runner *ContainerRunner) startCrunchstat() error { } runner.statLogger = NewThrottledLogger(w) runner.statReporter = &crunchstat.Reporter{ - CID: runner.executor.CgroupID(), - Logger: log.New(runner.statLogger, "", 0), - CgroupParent: runner.expectCgroupParent, - CgroupRoot: runner.cgroupRoot, - PollPeriod: runner.statInterval, - TempDir: runner.parentTemp, + Pid: runner.executor.Pid, + FS: runner.crunchstatFakeFS, + Logger: log.New(runner.statLogger, "", 0), + MemThresholds: map[string][]crunchstat.Threshold{ + "rss": crunchstat.NewThresholdsFromPercentages(runner.Container.RuntimeConstraints.RAM, []int64{90, 95, 99}), + }, + PollPeriod: runner.statInterval, + TempDir: runner.parentTemp, + ThresholdLogger: runner.CrunchLog, } runner.statReporter.Start() return nil @@ -999,6 +1026,7 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st env["ARVADOS_API_TOKEN"] = tok env["ARVADOS_API_HOST"] = os.Getenv("ARVADOS_API_HOST") env["ARVADOS_API_HOST_INSECURE"] = os.Getenv("ARVADOS_API_HOST_INSECURE") + env["ARVADOS_KEEP_SERVICES"] = os.Getenv("ARVADOS_KEEP_SERVICES") } workdir := runner.Container.Cwd if workdir == "." { @@ -1095,6 +1123,13 @@ func (runner *ContainerRunner) WaitFinish() error { } } runner.CrunchLog.Printf("Container exited with status code %d%s", exitcode, extra) + err = runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{ + "select": []string{"uuid"}, + "container": arvadosclient.Dict{"exit_code": exitcode}, + }, nil) + if err != nil { + runner.CrunchLog.Printf("ignoring error updating exit_code: %s", err) + } var returnErr error if err = runner.executorStdin.Close(); err != nil { @@ -1119,6 +1154,9 @@ func (runner *ContainerRunner) WaitFinish() error { if runner.statReporter != nil { runner.statReporter.Stop() + runner.statReporter.LogMaxima(runner.CrunchLog, map[string]int64{ + "rss": runner.Container.RuntimeConstraints.RAM, + }) err = runner.statLogger.Close() if err != nil { runner.CrunchLog.Printf("error closing crunchstat logs: %v", err) @@ -1162,10 +1200,12 @@ func (runner *ContainerRunner) updateLogs() { continue } - var updated arvados.Container err = runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{ - "container": arvadosclient.Dict{"log": saved.PortableDataHash}, - }, &updated) + "select": []string{"uuid"}, + "container": arvadosclient.Dict{ + "log": saved.PortableDataHash, + }, + }, nil) if err != nil { runner.CrunchLog.Printf("error updating container log to %s: %s", saved.PortableDataHash, err) continue @@ -1175,16 +1215,116 @@ func (runner *ContainerRunner) updateLogs() { } } -func (runner *ContainerRunner) reportArvMountWarning(pattern, text string) { - var updated arvados.Container +var spotInterruptionCheckInterval = 5 * time.Second +var ec2MetadataBaseURL = "http://169.254.169.254" + +const ec2TokenTTL = time.Second * 21600 + +func (runner *ContainerRunner) checkSpotInterruptionNotices() { + type ec2metadata struct { + Action string `json:"action"` + Time time.Time `json:"time"` + } + runner.CrunchLog.Printf("Checking for spot interruptions every %v using instance metadata at %s", spotInterruptionCheckInterval, ec2MetadataBaseURL) + var metadata ec2metadata + var token string + var tokenExp time.Time + check := func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute)) + defer cancel() + if token == "" || tokenExp.Sub(time.Now()) < time.Minute { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, ec2MetadataBaseURL+"/latest/api/token", nil) + if err != nil { + return err + } + req.Header.Set("X-aws-ec2-metadata-token-ttl-seconds", fmt.Sprintf("%d", int(ec2TokenTTL/time.Second))) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s", resp.Status) + } + newtoken, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + token = strings.TrimSpace(string(newtoken)) + tokenExp = time.Now().Add(ec2TokenTTL) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, ec2MetadataBaseURL+"/latest/meta-data/spot/instance-action", nil) + if err != nil { + return err + } + req.Header.Set("X-aws-ec2-metadata-token", token) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + metadata = ec2metadata{} + switch resp.StatusCode { + case http.StatusOK: + break + case http.StatusNotFound: + // "If Amazon EC2 is not preparing to stop or + // terminate the instance, or if you + // terminated the instance yourself, + // instance-action is not present in the + // instance metadata and you receive an HTTP + // 404 error when you try to retrieve it." + return nil + case http.StatusUnauthorized: + token = "" + return fmt.Errorf("%s", resp.Status) + default: + return fmt.Errorf("%s", resp.Status) + } + err = json.NewDecoder(resp.Body).Decode(&metadata) + if err != nil { + return err + } + return nil + } + failures := 0 + var lastmetadata ec2metadata + for range time.NewTicker(spotInterruptionCheckInterval).C { + err := check() + if err != nil { + runner.CrunchLog.Printf("Error checking spot interruptions: %s", err) + failures++ + if failures > 5 { + runner.CrunchLog.Printf("Giving up on checking spot interruptions after too many consecutive failures") + return + } + continue + } + failures = 0 + if metadata != lastmetadata { + lastmetadata = metadata + text := fmt.Sprintf("Cloud provider scheduled instance %s at %s", metadata.Action, metadata.Time.UTC().Format(time.RFC3339)) + runner.CrunchLog.Printf("%s", text) + runner.updateRuntimeStatus(arvadosclient.Dict{ + "warning": "preemption notice", + "warningDetail": text, + "preemptionNotice": text, + }) + if proc, err := os.FindProcess(os.Getpid()); err == nil { + // trigger updateLogs + proc.Signal(syscall.SIGUSR1) + } + } + } +} + +func (runner *ContainerRunner) updateRuntimeStatus(status arvadosclient.Dict) { err := runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{ + "select": []string{"uuid"}, "container": arvadosclient.Dict{ - "runtime_status": arvadosclient.Dict{ - "warning": "arv-mount: " + pattern, - "warningDetail": text, - }, + "runtime_status": status, }, - }, &updated) + }, nil) if err != nil { runner.CrunchLog.Printf("error updating container runtime_status: %s", err) } @@ -1197,7 +1337,9 @@ func (runner *ContainerRunner) CaptureOutput(bindmounts map[string]bindmount) er // Output may have been set directly by the container, so // refresh the container record to check. err := runner.DispatcherArvClient.Get("containers", runner.Container.UUID, - nil, &runner.Container) + arvadosclient.Dict{ + "select": []string{"output"}, + }, &runner.Container) if err != nil { return err } @@ -1210,7 +1352,6 @@ func (runner *ContainerRunner) CaptureOutput(bindmounts map[string]bindmount) er txt, err := (&copier{ client: runner.containerClient, - arvClient: runner.ContainerArvClient, keepClient: runner.ContainerKeepClient, hostOutputDir: runner.HostOutputDir, ctrOutputDir: runner.Container.OutputPath, @@ -1236,6 +1377,7 @@ func (runner *ContainerRunner) CaptureOutput(bindmounts map[string]bindmount) er var resp arvados.Collection err = runner.ContainerArvClient.Create("collections", arvadosclient.Dict{ "ensure_unique_name": true, + "select": []string{"portable_data_hash"}, "collection": arvadosclient.Dict{ "is_trashed": true, "name": "output for " + runner.Container.UUID, @@ -1362,6 +1504,8 @@ func (runner *ContainerRunner) CommitLogs() error { return nil } +// Create/update the log collection. Return value has UUID and +// PortableDataHash fields populated, but others may be blank. func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.Collection, err error) { runner.logMtx.Lock() defer runner.logMtx.Unlock() @@ -1386,11 +1530,20 @@ func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.C if final { updates["is_trashed"] = true } else { - exp := time.Now().Add(crunchLogUpdatePeriod * 24) + // We set trash_at so this collection gets + // automatically cleaned up eventually. It used to be + // 12 hours but we had a situation where the API + // server was down over a weekend but the containers + // kept running such that the log collection got + // trashed, so now we make it 2 weeks. refs #20378 + exp := time.Now().Add(time.Duration(24*14) * time.Hour) updates["trash_at"] = exp updates["delete_at"] = exp } - reqBody := arvadosclient.Dict{"collection": updates} + reqBody := arvadosclient.Dict{ + "select": []string{"uuid", "portable_data_hash"}, + "collection": updates, + } var err2 error if runner.logUUID == "" { reqBody["ensure_unique_name"] = true @@ -1409,14 +1562,28 @@ func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.C } // UpdateContainerRunning updates the container state to "Running" -func (runner *ContainerRunner) UpdateContainerRunning() error { +func (runner *ContainerRunner) UpdateContainerRunning(logId string) error { runner.cStateLock.Lock() defer runner.cStateLock.Unlock() if runner.cCancelled { return ErrCancelled } - return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, - arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gateway.Address}}, nil) + updates := arvadosclient.Dict{ + "gateway_address": runner.gateway.Address, + "state": "Running", + } + if logId != "" { + updates["log"] = logId + } + return runner.DispatcherArvClient.Update( + "containers", + runner.Container.UUID, + arvadosclient.Dict{ + "select": []string{"uuid"}, + "container": updates, + }, + nil, + ) } // ContainerToken returns the api_token the container (and any @@ -1443,15 +1610,19 @@ func (runner *ContainerRunner) UpdateContainerFinal() error { if runner.LogsPDH != nil { update["log"] = *runner.LogsPDH } - if runner.finalState == "Complete" { - if runner.ExitCode != nil { - update["exit_code"] = *runner.ExitCode - } - if runner.OutputPDH != nil { - update["output"] = *runner.OutputPDH - } + if runner.ExitCode != nil { + update["exit_code"] = *runner.ExitCode + } else { + update["exit_code"] = nil + } + if runner.finalState == "Complete" && runner.OutputPDH != nil { + update["output"] = *runner.OutputPDH } - return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil) + update["cost"] = runner.calculateCost(time.Now()) + return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{ + "select": []string{"uuid"}, + "container": update, + }, nil) } // IsCancelled returns the value of Cancelled, with goroutine safety. @@ -1483,6 +1654,7 @@ func (runner *ContainerRunner) Run() (err error) { runner.CrunchLog.Printf("Using FUSE mount: %s", v) runner.CrunchLog.Printf("Using container runtime: %s", runner.executor.Runtime()) runner.CrunchLog.Printf("Executing container: %s", runner.Container.UUID) + runner.costStartTime = time.Now() hostname, hosterr := os.Hostname() if hosterr != nil { @@ -1491,6 +1663,12 @@ func (runner *ContainerRunner) Run() (err error) { runner.CrunchLog.Printf("Executing on host '%s'", hostname) } + sigusr2 := make(chan os.Signal, 1) + signal.Notify(sigusr2, syscall.SIGUSR2) + defer signal.Stop(sigusr2) + runner.loadPrices() + go runner.handleSIGUSR2(sigusr2) + runner.finalState = "Queued" defer func() { @@ -1557,6 +1735,9 @@ func (runner *ContainerRunner) Run() (err error) { if err != nil { return } + if runner.keepstore != nil { + runner.hoststatReporter.ReportPID("keepstore", runner.keepstore.Process.Pid) + } // set up FUSE mount and binds bindmounts, err = runner.SetupMounts() @@ -1599,7 +1780,14 @@ func (runner *ContainerRunner) Run() (err error) { return } - err = runner.UpdateContainerRunning() + logCollection, err := runner.saveLogCollection(false) + var logId string + if err == nil { + logId = logCollection.PortableDataHash + } else { + runner.CrunchLog.Printf("Error committing initial log collection: %v", err) + } + err = runner.UpdateContainerRunning(logId) if err != nil { return } @@ -1721,16 +1909,16 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s log := log.New(stderr, "", 0) flags := flag.NewFlagSet(prog, flag.ContinueOnError) statInterval := flags.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting") - cgroupRoot := flags.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree") - cgroupParent := flags.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)") - cgroupParentSubsystem := flags.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container") + flags.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree (obsolete, ignored)") + flags.String("cgroup-parent", "docker", "name of container's parent cgroup (obsolete, ignored)") + cgroupParentSubsystem := flags.String("cgroup-parent-subsystem", "", "use current cgroup for given `subsystem` as parent cgroup for container (subsystem argument is only relevant for cgroups v1; in cgroups v2 / unified mode, any non-empty value means use current cgroup)") caCertsPath := flags.String("ca-certs", "", "Path to TLS root certificates") detach := flags.Bool("detach", false, "Detach from parent process and run in the background") stdinConfig := flags.Bool("stdin-config", false, "Load config and environment variables from JSON message on stdin") configFile := flags.String("config", arvados.DefaultConfigFile, "filename of cluster config file to try loading if -stdin-config=false (default is $ARVADOS_CONFIG)") sleep := flags.Duration("sleep", 0, "Delay before starting (testing use only)") kill := flags.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID") - list := flags.Bool("list", false, "List UUIDs of existing crunch-run processes") + list := flags.Bool("list", false, "List UUIDs of existing crunch-run processes (and notify them to use price data passed on stdin)") enableMemoryLimit := flags.Bool("enable-memory-limit", true, "tell container runtime to limit container's memory usage") enableNetwork := flags.String("container-enable-networking", "default", "enable networking \"always\" (for all containers) or \"default\" (for containers that request it)") networkMode := flags.String("container-network-mode", "default", `Docker network mode for container (use any argument valid for docker --net)`) @@ -1738,6 +1926,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s runtimeEngine := flags.String("runtime-engine", "docker", "container runtime: docker or singularity") brokenNodeHook := flags.String("broken-node-hook", "", "script to run if node is detected to be broken (for example, Docker daemon is not running)") flags.Duration("check-containerd", 0, "Ignored. Exists for compatibility with older versions.") + version := flags.Bool("version", false, "Write version information to stdout and exit 0.") ignoreDetachFlag := false if len(args) > 0 && args[0] == "-no-detach" { @@ -1753,6 +1942,9 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s if ok, code := cmd.ParseFlags(flags, prog, args, "container-uuid", stderr); !ok { return code + } else if *version { + fmt.Fprintln(stdout, prog, cmd.Version.String()) + return 0 } else if !*list && flags.NArg() != 1 { fmt.Fprintf(stderr, "missing required argument: container-uuid (try -help)\n") return 2 @@ -1762,11 +1954,11 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s switch { case *detach && !ignoreDetachFlag: - return Detach(containerUUID, prog, args, os.Stdin, os.Stdout, os.Stderr) + return Detach(containerUUID, prog, args, stdin, stdout, stderr) case *kill >= 0: - return KillProcess(containerUUID, syscall.Signal(*kill), os.Stdout, os.Stderr) + return KillProcess(containerUUID, syscall.Signal(*kill), stdout, stderr) case *list: - return ListProcesses(os.Stdout, os.Stderr) + return ListProcesses(stdin, stdout, stderr) } if len(containerUUID) != 27 { @@ -1821,7 +2013,9 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s log.Printf("%s: %v", containerUUID, err) return 1 } - api.Retries = 8 + // arvadosclient now interprets Retries=10 to mean + // Timeout=10m, retrying with exponential backoff + jitter. + api.Retries = 10 kc, err := keepclient.MakeKeepClient(api) if err != nil { @@ -1837,6 +2031,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s return 1 } + cr.keepstore = keepstore if keepstore == nil { // Log explanation (if any) for why we're not running // a local keepstore. @@ -1900,18 +2095,30 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s // not safe to run a gateway service without an auth // secret cr.CrunchLog.Printf("Not starting a gateway server (GatewayAuthSecret was not provided by dispatcher)") - } else if gwListen := os.Getenv("GatewayAddress"); gwListen == "" { - // dispatcher did not tell us which external IP - // address to advertise --> no gateway service - cr.CrunchLog.Printf("Not starting a gateway server (GatewayAddress was not provided by dispatcher)") - } else if de, ok := cr.executor.(*dockerExecutor); ok { + } else { + gwListen := os.Getenv("GatewayAddress") cr.gateway = Gateway{ - Address: gwListen, - AuthSecret: gwAuthSecret, - ContainerUUID: containerUUID, - DockerContainerID: &de.containerID, - Log: cr.CrunchLog, - ContainerIPAddress: dockerContainerIPAddress(&de.containerID), + Address: gwListen, + AuthSecret: gwAuthSecret, + ContainerUUID: containerUUID, + Target: cr.executor, + Log: cr.CrunchLog, + LogCollection: cr.LogCollection, + } + if gwListen == "" { + // Direct connection won't work, so we use the + // gateway_address field to indicate the + // internalURL of the controller process that + // has the current tunnel connection. + cr.gateway.ArvadosClient = cr.dispatcherClient + cr.gateway.UpdateTunnelURL = func(url string) { + cr.gateway.Address = "tunnel " + url + cr.DispatcherArvClient.Update("containers", containerUUID, + arvadosclient.Dict{ + "select": []string{"uuid"}, + "container": arvadosclient.Dict{"gateway_address": cr.gateway.Address}, + }, nil) + } } err = cr.gateway.Start() if err != nil { @@ -1928,19 +2135,20 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s cr.parentTemp = parentTemp cr.statInterval = *statInterval - cr.cgroupRoot = *cgroupRoot - cr.expectCgroupParent = *cgroupParent cr.enableMemoryLimit = *enableMemoryLimit cr.enableNetwork = *enableNetwork cr.networkMode = *networkMode if *cgroupParentSubsystem != "" { - p, err := findCgroup(*cgroupParentSubsystem) + p, err := findCgroup(os.DirFS("/"), *cgroupParentSubsystem) if err != nil { log.Printf("fatal: cgroup parent subsystem: %s", err) return 1 } cr.setCgroupParent = p - cr.expectCgroupParent = p + } + + if conf.EC2SpotCheck { + go cr.checkSpotInterruptionNotices() } runerr := cr.Run() @@ -1984,7 +2192,9 @@ func hpcConfData(uuid string, configFile string, stderr io.Writer) ConfigData { fmt.Fprintf(stderr, "error setting up arvadosclient: %s\n", err) return conf } - arv.Retries = 8 + // arvadosclient now interprets Retries=10 to mean + // Timeout=10m, retrying with exponential backoff + jitter. + arv.Retries = 10 var ctr arvados.Container err = arv.Call("GET", "containers", uuid, "", arvadosclient.Dict{"select": []string{"runtime_constraints"}}, &ctr) if err != nil { @@ -2041,7 +2251,8 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er // modify the cluster configuration that we feed it on stdin. configData.Cluster.API.MaxKeepBlobBuffers = configData.KeepBuffers - ln, err := net.Listen("tcp", "localhost:0") + localaddr := localKeepstoreAddr() + ln, err := net.Listen("tcp", net.JoinHostPort(localaddr, "0")) if err != nil { return nil, err } @@ -2051,7 +2262,7 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er return nil, err } ln.Close() - url := "http://localhost:" + port + url := "http://" + net.JoinHostPort(localaddr, port) fmt.Fprintf(logbuf, "starting keepstore on %s\n", url) @@ -2143,3 +2354,140 @@ func currentUserAndGroups() string { } return s } + +// Return a suitable local interface address for a local keepstore +// service. Currently this is the numerically lowest non-loopback ipv4 +// address assigned to a local interface that is not in any of the +// link-local/vpn/loopback ranges 169.254/16, 100.64/10, or 127/8. +func localKeepstoreAddr() string { + var ips []net.IP + // Ignore error (proceed with zero IPs) + addrs, _ := processIPs(os.Getpid()) + for addr := range addrs { + ip := net.ParseIP(addr) + if ip == nil { + // invalid + continue + } + if ip.Mask(net.CIDRMask(8, 32)).Equal(net.IPv4(127, 0, 0, 0)) || + ip.Mask(net.CIDRMask(10, 32)).Equal(net.IPv4(100, 64, 0, 0)) || + ip.Mask(net.CIDRMask(16, 32)).Equal(net.IPv4(169, 254, 0, 0)) { + // unsuitable + continue + } + ips = append(ips, ip) + } + if len(ips) == 0 { + return "0.0.0.0" + } + sort.Slice(ips, func(ii, jj int) bool { + i, j := ips[ii], ips[jj] + if len(i) != len(j) { + return len(i) < len(j) + } + for x := range i { + if i[x] != j[x] { + return i[x] < j[x] + } + } + return false + }) + return ips[0].String() +} + +func (cr *ContainerRunner) loadPrices() { + buf, err := os.ReadFile(filepath.Join(lockdir, pricesfile)) + if err != nil { + if !os.IsNotExist(err) { + cr.CrunchLog.Printf("loadPrices: read: %s", err) + } + return + } + var prices []cloud.InstancePrice + err = json.Unmarshal(buf, &prices) + if err != nil { + cr.CrunchLog.Printf("loadPrices: decode: %s", err) + return + } + cr.pricesLock.Lock() + defer cr.pricesLock.Unlock() + var lastKnown time.Time + if len(cr.prices) > 0 { + lastKnown = cr.prices[0].StartTime + } + cr.prices = cloud.NormalizePriceHistory(append(prices, cr.prices...)) + for i := len(cr.prices) - 1; i >= 0; i-- { + price := cr.prices[i] + if price.StartTime.After(lastKnown) { + cr.CrunchLog.Printf("Instance price changed to %#.3g at %s", price.Price, price.StartTime.UTC()) + } + } +} + +func (cr *ContainerRunner) calculateCost(now time.Time) float64 { + cr.pricesLock.Lock() + defer cr.pricesLock.Unlock() + + // First, make a "prices" slice with the real data as far back + // as it goes, and (if needed) a "since the beginning of time" + // placeholder containing a reasonable guess about what the + // price was between cr.costStartTime and the earliest real + // data point. + prices := cr.prices + if len(prices) == 0 { + // use price info in InstanceType record initially + // provided by cloud dispatcher + var p float64 + var it arvados.InstanceType + if j := os.Getenv("InstanceType"); j != "" && json.Unmarshal([]byte(j), &it) == nil && it.Price > 0 { + p = it.Price + } + prices = []cloud.InstancePrice{{Price: p}} + } else if prices[len(prices)-1].StartTime.After(cr.costStartTime) { + // guess earlier pricing was the same as the earliest + // price we know about + filler := prices[len(prices)-1] + filler.StartTime = time.Time{} + prices = append(prices, filler) + } + + // Now that our history of price changes goes back at least as + // far as cr.costStartTime, add up the costs for each + // interval. + cost := 0.0 + spanEnd := now + for _, ip := range prices { + spanStart := ip.StartTime + if spanStart.After(now) { + // pricing information from the future -- not + // expected from AWS, but possible in + // principle, and exercised by tests. + continue + } + last := false + if spanStart.Before(cr.costStartTime) { + spanStart = cr.costStartTime + last = true + } + cost += ip.Price * spanEnd.Sub(spanStart).Seconds() / 3600 + if last { + break + } + spanEnd = spanStart + } + + return cost +} + +func (runner *ContainerRunner) handleSIGUSR2(sigchan chan os.Signal) { + for range sigchan { + runner.loadPrices() + update := arvadosclient.Dict{ + "select": []string{"uuid"}, + "container": arvadosclient.Dict{ + "cost": runner.calculateCost(time.Now()), + }, + } + runner.DispatcherArvClient.Update("containers", runner.Container.UUID, update, nil) + } +}