From: Tom Clegg Date: Tue, 8 Nov 2022 16:56:36 +0000 (-0500) Subject: 19702: Increase default ReserveExtraRAM, allow keepstore overhead. X-Git-Tag: 2.5.0~38^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/5ee9b22d0777d7ef42fa6acc05b666fd30e1ab2b 19702: Increase default ReserveExtraRAM, allow keepstore overhead. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 09c068a0b9..0246cb88d5 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -1028,7 +1028,7 @@ Clusters: # Extra RAM to reserve on the node, in addition to # the amount specified in the container's RuntimeConstraints - ReserveExtraRAM: 256MiB + ReserveExtraRAM: 550MiB # Minimum time between two attempts to run the same container MinRetryPeriod: 0s diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go index 7c7643bfc7..8cc63dc208 100644 --- a/lib/dispatchcloud/node_size.go +++ b/lib/dispatchcloud/node_size.go @@ -110,7 +110,12 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM needRAM += int64(cc.Containers.ReserveExtraRAM) - needRAM += int64(cc.Containers.LocalKeepBlobBuffersPerVCPU * needVCPUs * (1 << 26)) + if cc.Containers.LocalKeepBlobBuffersPerVCPU > 0 { + // + 200 MiB for keepstore process + 10% for GOGC=10 + needRAM += 220 << 20 + // + 64 MiB for each blob buffer + 10% for GOGC=10 + needRAM += int64(cc.Containers.LocalKeepBlobBuffersPerVCPU * needVCPUs * (1 << 26) * 11 / 10) + } needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent) ok := false diff --git a/lib/dispatchcloud/node_size_test.go b/lib/dispatchcloud/node_size_test.go index eb3648e8ac..86bfbec7b6 100644 --- a/lib/dispatchcloud/node_size_test.go +++ b/lib/dispatchcloud/node_size_test.go @@ -80,7 +80,10 @@ func (*NodeSizeSuite) TestChoose(c *check.C) { "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"}, }, } { - best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{ReserveExtraRAM: 268435456}}, &arvados.Container{ + best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{ + LocalKeepBlobBuffersPerVCPU: 1, + ReserveExtraRAM: 268435456, + }}, &arvados.Container{ Mounts: map[string]arvados.Mount{ "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)}, }, @@ -98,7 +101,30 @@ func (*NodeSizeSuite) TestChoose(c *check.C) { } } -func (*NodeSizeSuite) TestChoosePreemptable(c *check.C) { +func (*NodeSizeSuite) TestChooseWithBlobBuffersOverhead(c *check.C) { + menu := map[string]arvados.InstanceType{ + "nearly": {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "small"}, + "best": {Price: 3.3, RAM: 8000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"}, + "costly": {Price: 4.4, RAM: 12000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"}, + } + best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{ + LocalKeepBlobBuffersPerVCPU: 16, // 1 GiB per vcpu => 2 GiB + ReserveExtraRAM: 268435456, + }}, &arvados.Container{ + Mounts: map[string]arvados.Mount{ + "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)}, + }, + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 2, + RAM: 987654321, + KeepCacheRAM: 123456789, + }, + }) + c.Check(err, check.IsNil) + c.Check(best.Name, check.Equals, "best") +} + +func (*NodeSizeSuite) TestChoosePreemptible(c *check.C) { menu := map[string]arvados.InstanceType{ "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Preemptible: true, Name: "costly"}, "almost best": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "almost best"},