19702: Increase default ReserveExtraRAM, allow keepstore overhead.
authorTom Clegg <tom@curii.com>
Tue, 8 Nov 2022 16:56:36 +0000 (11:56 -0500)
committerTom Clegg <tom@curii.com>
Tue, 8 Nov 2022 16:56:36 +0000 (11:56 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/config/config.default.yml
lib/dispatchcloud/node_size.go
lib/dispatchcloud/node_size_test.go

index 09c068a0b9f701db70c9724f84d59b4845924f68..0246cb88d5736e158bb1f502d91423b7b7072832 100644 (file)
@@ -1028,7 +1028,7 @@ Clusters:
 
       # Extra RAM to reserve on the node, in addition to
       # the amount specified in the container's RuntimeConstraints
 
       # Extra RAM to reserve on the node, in addition to
       # the amount specified in the container's RuntimeConstraints
-      ReserveExtraRAM: 256MiB
+      ReserveExtraRAM: 550MiB
 
       # Minimum time between two attempts to run the same container
       MinRetryPeriod: 0s
 
       # Minimum time between two attempts to run the same container
       MinRetryPeriod: 0s
index 7c7643bfc7622fc8c876eba9c2e01d9203385074..8cc63dc208c8c48e3afc47a1dab1435b11a12996 100644 (file)
@@ -110,7 +110,12 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
 
        needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM
        needRAM += int64(cc.Containers.ReserveExtraRAM)
 
        needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM
        needRAM += int64(cc.Containers.ReserveExtraRAM)
-       needRAM += int64(cc.Containers.LocalKeepBlobBuffersPerVCPU * needVCPUs * (1 << 26))
+       if cc.Containers.LocalKeepBlobBuffersPerVCPU > 0 {
+               // + 200 MiB for keepstore process + 10% for GOGC=10
+               needRAM += 220 << 20
+               // + 64 MiB for each blob buffer + 10% for GOGC=10
+               needRAM += int64(cc.Containers.LocalKeepBlobBuffersPerVCPU * needVCPUs * (1 << 26) * 11 / 10)
+       }
        needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent)
 
        ok := false
        needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent)
 
        ok := false
index eb3648e8ac13265995bf98b040c47106ea380ea3..86bfbec7b629dc731e309740346dec85a24ae2d7 100644 (file)
@@ -80,7 +80,10 @@ func (*NodeSizeSuite) TestChoose(c *check.C) {
                        "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
                },
        } {
                        "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
                },
        } {
-               best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{ReserveExtraRAM: 268435456}}, &arvados.Container{
+               best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{
+                       LocalKeepBlobBuffersPerVCPU: 1,
+                       ReserveExtraRAM:             268435456,
+               }}, &arvados.Container{
                        Mounts: map[string]arvados.Mount{
                                "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)},
                        },
                        Mounts: map[string]arvados.Mount{
                                "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)},
                        },
@@ -98,7 +101,30 @@ func (*NodeSizeSuite) TestChoose(c *check.C) {
        }
 }
 
        }
 }
 
-func (*NodeSizeSuite) TestChoosePreemptable(c *check.C) {
+func (*NodeSizeSuite) TestChooseWithBlobBuffersOverhead(c *check.C) {
+       menu := map[string]arvados.InstanceType{
+               "nearly": {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "small"},
+               "best":   {Price: 3.3, RAM: 8000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
+               "costly": {Price: 4.4, RAM: 12000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
+       }
+       best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu, Containers: arvados.ContainersConfig{
+               LocalKeepBlobBuffersPerVCPU: 16, // 1 GiB per vcpu => 2 GiB
+               ReserveExtraRAM:             268435456,
+       }}, &arvados.Container{
+               Mounts: map[string]arvados.Mount{
+                       "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)},
+               },
+               RuntimeConstraints: arvados.RuntimeConstraints{
+                       VCPUs:        2,
+                       RAM:          987654321,
+                       KeepCacheRAM: 123456789,
+               },
+       })
+       c.Check(err, check.IsNil)
+       c.Check(best.Name, check.Equals, "best")
+}
+
+func (*NodeSizeSuite) TestChoosePreemptible(c *check.C) {
        menu := map[string]arvados.InstanceType{
                "costly":      {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Preemptible: true, Name: "costly"},
                "almost best": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "almost best"},
        menu := map[string]arvados.InstanceType{
                "costly":      {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Preemptible: true, Name: "costly"},
                "almost best": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "almost best"},