X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/dfb9598282b677ead60f91c14f5e96405735d42f..ea3f1b8246c27a6a44edfc561f13935ef377c1cb:/lib/crunchrun/crunchrun.go diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index b0a4007f74..e05b352fae 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -870,25 +870,24 @@ func (runner *ContainerRunner) LogNodeRecord() error { return err } return w.Close() - } else { - // Dispatched via crunch-dispatch-slurm. Look up - // apiserver's node record corresponding to - // $SLURMD_NODENAME. - hostname := os.Getenv("SLURMD_NODENAME") - if hostname == "" { - hostname, _ = os.Hostname() - } - _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) { - // The "info" field has admin-only info when - // obtained with a privileged token, and - // should not be logged. - node, ok := resp.(map[string]interface{}) - if ok { - delete(node, "info") - } - }) - return err } + // Dispatched via crunch-dispatch-slurm. Look up + // apiserver's node record corresponding to + // $SLURMD_NODENAME. + hostname := os.Getenv("SLURMD_NODENAME") + if hostname == "" { + hostname, _ = os.Hostname() + } + _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) { + // The "info" field has admin-only info when + // obtained with a privileged token, and + // should not be logged. + node, ok := resp.(map[string]interface{}) + if ok { + delete(node, "info") + } + }) + return err } func (runner *ContainerRunner) logAPIResponse(label, path string, params map[string]interface{}, munge func(interface{})) (logged bool, err error) { @@ -1074,9 +1073,10 @@ func (runner *ContainerRunner) CreateContainer() error { runner.ContainerConfig.Volumes = runner.Volumes maxRAM := int64(runner.Container.RuntimeConstraints.RAM) - if maxRAM < 4*1024*1024 { - // Docker daemon won't let you set a limit less than 4 MiB - maxRAM = 4 * 1024 * 1024 + minDockerRAM := int64(16) + if maxRAM < minDockerRAM*1024*1024 { + // Docker daemon won't let you set a limit less than ~10 MiB + maxRAM = minDockerRAM * 1024 * 1024 } runner.HostConfig = dockercontainer.HostConfig{ Binds: runner.Binds, @@ -1431,15 +1431,20 @@ func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.C // Already finalized. return } - mt, err := runner.LogCollection.MarshalManifest(".") - if err != nil { - err = fmt.Errorf("error creating log manifest: %v", err) - return - } updates := arvadosclient.Dict{ - "name": "logs for " + runner.Container.UUID, - "manifest_text": mt, + "name": "logs for " + runner.Container.UUID, + } + mt, err1 := runner.LogCollection.MarshalManifest(".") + if err1 == nil { + // Only send updated manifest text if there was no + // error. + updates["manifest_text"] = mt } + + // Even if flushing the manifest had an error, we still want + // to update the log record, if possible, to push the trash_at + // and delete_at times into the future. Details on bug + // #17293. if final { updates["is_trashed"] = true } else { @@ -1448,16 +1453,20 @@ func (runner *ContainerRunner) saveLogCollection(final bool) (response arvados.C updates["delete_at"] = exp } reqBody := arvadosclient.Dict{"collection": updates} + var err2 error if runner.logUUID == "" { reqBody["ensure_unique_name"] = true - err = runner.DispatcherArvClient.Create("collections", reqBody, &response) + err2 = runner.DispatcherArvClient.Create("collections", reqBody, &response) } else { - err = runner.DispatcherArvClient.Update("collections", runner.logUUID, reqBody, &response) + err2 = runner.DispatcherArvClient.Update("collections", runner.logUUID, reqBody, &response) } - if err != nil { - return + if err2 == nil { + runner.logUUID = response.UUID + } + + if err1 != nil || err2 != nil { + err = fmt.Errorf("error recording logs: %q, %q", err1, err2) } - runner.logUUID = response.UUID return } @@ -1488,7 +1497,7 @@ func (runner *ContainerRunner) ContainerToken() (string, error) { return runner.token, nil } -// UpdateContainerComplete updates the container record state on API +// UpdateContainerFinal updates the container record state on API // server to "Complete" or "Cancelled" func (runner *ContainerRunner) UpdateContainerFinal() error { update := arvadosclient.Dict{}