From: Tom Clegg Date: Fri, 11 May 2018 19:25:36 +0000 (-0400) Subject: 13473: Drop slurm resource constraints when using instancetype=X. X-Git-Tag: 1.2.0~153^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/8fc8aa8179f88694ccbab9a23fdc7d2e8988ce64 13473: Drop slurm resource constraints when using instancetype=X. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go index 23e4b3a8cb..e3801df41b 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go @@ -213,17 +213,17 @@ func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error } disk = int64(math.Ceil(float64(disk) / float64(1048576))) - var sbatchArgs []string - sbatchArgs = append(sbatchArgs, disp.SbatchArguments...) - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--job-name=%s", container.UUID)) - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--mem=%d", mem)) - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs)) - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--tmp=%d", disk)) - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--nice=%d", initialNiceValue)) - if len(container.SchedulingParameters.Partitions) > 0 { - sbatchArgs = append(sbatchArgs, fmt.Sprintf("--partition=%s", strings.Join(container.SchedulingParameters.Partitions, ","))) + var args []string + args = append(args, disp.SbatchArguments...) + args = append(args, + fmt.Sprintf("--job-name=%s", container.UUID), + fmt.Sprintf("--nice=%d", initialNiceValue)) + + constraintArgs := []string{ + fmt.Sprintf("--mem=%d", mem), + fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs), + fmt.Sprintf("--tmp=%d", disk), } - if disp.cluster == nil { // no instance types configured } else if it, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured { @@ -231,10 +231,16 @@ func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error } else if err != nil { return nil, err } else { - sbatchArgs = append(sbatchArgs, "--constraint=instancetype="+it.Name) + // use instancetype constraint instead of slurm mem/cpu/tmp specs + constraintArgs = []string{"--constraint=instancetype=" + it.Name} + } + args = append(args, constraintArgs...) + + if len(container.SchedulingParameters.Partitions) > 0 { + args = append(args, fmt.Sprintf("--partition=%s", strings.Join(container.SchedulingParameters.Partitions, ","))) } - return sbatchArgs, nil + return args, nil } func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []string) error { diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go index 499f6d5d72..85617cf115 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go @@ -193,10 +193,11 @@ func (s *IntegrationSuite) TestMissingFromSqueue(c *C) { container := s.integrationTest(c, [][]string{{ fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"), + fmt.Sprintf("--nice=%d", 10000), fmt.Sprintf("--mem=%d", 11445), fmt.Sprintf("--cpus-per-task=%d", 4), fmt.Sprintf("--tmp=%d", 45777), - fmt.Sprintf("--nice=%d", 10000)}}, + }}, func(dispatcher *dispatch.Dispatcher, container arvados.Container) { dispatcher.UpdateState(container.UUID, dispatch.Running) time.Sleep(3 * time.Second) @@ -208,7 +209,7 @@ func (s *IntegrationSuite) TestMissingFromSqueue(c *C) { func (s *IntegrationSuite) TestSbatchFail(c *C) { s.slurm = slurmFake{errBatch: errors.New("something terrible happened")} container := s.integrationTest(c, - [][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--mem=11445", "--cpus-per-task=4", "--tmp=45777", "--nice=10000"}}, + [][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--nice=10000", "--mem=11445", "--cpus-per-task=4", "--tmp=45777"}}, func(dispatcher *dispatch.Dispatcher, container arvados.Container) { dispatcher.UpdateState(container.UUID, dispatch.Running) dispatcher.UpdateState(container.UUID, dispatch.Complete) @@ -353,7 +354,7 @@ func (s *StubbedSuite) TestSbatchArgs(c *C) { s.disp.SbatchArguments = defaults args, err := s.disp.sbatchArgs(container) - c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--mem=239", "--cpus-per-task=2", "--tmp=0", "--nice=10000")) + c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--nice=10000", "--mem=239", "--cpus-per-task=2", "--tmp=0")) c.Check(err, IsNil) } } @@ -383,7 +384,7 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) { // No node types configured => no slurm constraint { types: nil, - sbatchArgs: nil, + sbatchArgs: []string{"--mem=239", "--cpus-per-task=2", "--tmp=0"}, }, // No node type is big enough => error { @@ -399,7 +400,7 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) { args, err := s.disp.sbatchArgs(container) c.Check(err, Equals, trial.err) if trial.err == nil { - c.Check(args, DeepEquals, append([]string{"--job-name=123", "--mem=239", "--cpus-per-task=2", "--tmp=0", "--nice=10000"}, trial.sbatchArgs...)) + c.Check(args, DeepEquals, append([]string{"--job-name=123", "--nice=10000"}, trial.sbatchArgs...)) } } } @@ -414,7 +415,8 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) { args, err := s.disp.sbatchArgs(container) c.Check(args, DeepEquals, []string{ - "--job-name=123", "--mem=239", "--cpus-per-task=1", "--tmp=0", "--nice=10000", + "--job-name=123", "--nice=10000", + "--mem=239", "--cpus-per-task=1", "--tmp=0", "--partition=blurb,b2", }) c.Check(err, IsNil)