21449: Propagate max_run_time to LSF scheduler.
authorTom Clegg <tom@curii.com>
Wed, 13 Mar 2024 21:22:39 +0000 (17:22 -0400)
committerTom Clegg <tom@curii.com>
Thu, 14 Mar 2024 17:18:43 +0000 (13:18 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
lib/config/config.default.yml
lib/lsf/dispatch.go
lib/lsf/dispatch_test.go
sdk/go/arvados/config.go
services/api/app/models/container_request.rb

index fc4393d0b6acc8d5e8c4cd9d7a513030a1cda755..6aeb11040cd1d387966263dc66cd4662e6e689a8 100644 (file)
@@ -75,6 +75,7 @@ Template variables starting with % will be substituted as follows:
 %M memory in MB
 %T tmp in MB
 %G number of GPU devices (@runtime_constraints.cuda.device_count@)
+%W maximum job run time in minutes, suitable for use with @-W@ or @-We@ flags (see MaxRunTimeOverhead MaxRunTimeDefault below)
 
 Use %% to express a literal %. The %%J in the default will be changed to %J, which is interpreted by @bsub@ itself.
 
@@ -83,7 +84,7 @@ For example:
 <notextile>
 <pre>    Containers:
       LSF:
-        <code class="userinput">BsubArgumentsList: <b>["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]</b></code>
+        <code class="userinput">BsubArgumentsList: <b>["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]</b></code>
 </pre>
 </notextile>
 
@@ -100,6 +101,14 @@ If the container requests access to GPUs (@runtime_constraints.cuda.device_count
 </pre>
 </notextile>
 
+h3(#MaxRunTimeOverhead). Containers.LSF.MaxRunTimeOverhead
+
+Extra time to add to each container's @scheduling_parameters.max_run_time@ value when substituting for @%W@ in @BsubArgumentsList@, to account for time spent setting up the container image, copying output files, etc.
+
+h3(#MaxRunTimeDefault). Containers.LSF.MaxRunTimeDefault
+
+Default @max_run_time@ value to use for containers that do not specify one in @scheduling_parameters.max_run_time@. If this is zero, and @BsubArgumentsList@ contains @"-W", "%W"@ or @"-We", "%W"@, those arguments will be dropped when submitting containers that do not specify @scheduling_parameters.max_run_time@.
+
 h3(#PollInterval). Containers.PollInterval
 
 arvados-dispatch-lsf polls the API server periodically for new containers to run.  The @PollInterval@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h@.  For example:
index fa74e8e6c6d52a37ee50f36efa0cf7626086a43e..a3ae4fd56bbc179f67fc1f21e6c9cdb2db5c43df 100644 (file)
@@ -1379,15 +1379,23 @@ Clusters:
         # %M memory in MB
         # %T tmp in MB
         # %G number of GPU devices (runtime_constraints.cuda.device_count)
+        # %W maximum run time in minutes (see MaxRunTimeOverhead and
+        #    MaxRunTimeDefault below)
         #
-        # Use %% to express a literal %. The %%J in the default will be changed
-        # to %J, which is interpreted by bsub itself.
+        # Use %% to express a literal %. For example, the %%J in the
+        # default argument list will be changed to %J, which is
+        # interpreted by bsub itself.
         #
         # Note that the default arguments cause LSF to write two files
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
+        #
+        # If ["-We", "%W"] or ["-W", "%W"] appear in this argument
+        # list, and MaxRunTimeDefault is not set (see below), both of
+        # those arguments will be dropped from the argument list when
+        # running a container that has no max_run_time value.
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]
 
         # Arguments that will be appended to the bsub command line
         # when submitting Arvados containers as LSF jobs with
@@ -1402,6 +1410,19 @@ Clusters:
         # Arvados LSF dispatcher runs ("submission host").
         BsubSudoUser: "crunch"
 
+        # When passing the scheduling_constraints.max_run_time value
+        # to LSF via "%W", add this much time to account for
+        # crunch-run startup/shutdown overhead.
+        MaxRunTimeOverhead: 5m
+
+        # If non-zero, MaxRunTimeDefault is used as the default value
+        # for max_run_time for containers that do not specify a time
+        # limit.  MaxRunTimeOverhead will be added to this.
+        #
+        # Example:
+        # MaxRunTimeDefault: 2h
+        MaxRunTimeDefault: 0
+
       JobsAPI:
         # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
         #
index d1408d23cb1a4e3c2274f40d2f02b66bda29e82d..897e5803f2334d6d7b3a40671c3a1c04e5cdd34d 100644 (file)
@@ -306,6 +306,15 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
                container.RuntimeConstraints.KeepCacheRAM+
                int64(disp.Cluster.Containers.ReserveExtraRAM)) / 1048576))
 
+       maxruntime := time.Duration(container.SchedulingParameters.MaxRunTime) * time.Second
+       if maxruntime == 0 {
+               maxruntime = disp.Cluster.Containers.LSF.MaxRunTimeDefault.Duration()
+       }
+       if maxruntime > 0 {
+               maxruntime += disp.Cluster.Containers.LSF.MaxRunTimeOverhead.Duration()
+       }
+       maxrunminutes := int64(math.Ceil(float64(maxruntime.Seconds()) / 60))
+
        repl := map[string]string{
                "%%": "%",
                "%C": fmt.Sprintf("%d", vcpus),
@@ -313,6 +322,7 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
                "%T": fmt.Sprintf("%d", tmp),
                "%U": container.UUID,
                "%G": fmt.Sprintf("%d", container.RuntimeConstraints.CUDA.DeviceCount),
+               "%W": fmt.Sprintf("%d", maxrunminutes),
        }
 
        re := regexp.MustCompile(`%.`)
@@ -321,7 +331,16 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error)
        if container.RuntimeConstraints.CUDA.DeviceCount > 0 {
                argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...)
        }
-       for _, a := range argumentTemplate {
+       for idx, a := range argumentTemplate {
+               if idx > 0 && (argumentTemplate[idx-1] == "-W" || argumentTemplate[idx-1] == "-We") && a == "%W" && maxrunminutes == 0 {
+                       // LSF docs don't specify an argument to "-W"
+                       // or "-We" that indicates "unknown", so
+                       // instead we drop the "-W %W" part of the
+                       // command line entirely when max runtime is
+                       // unknown.
+                       args = args[:len(args)-1]
+                       continue
+               }
                args = append(args, re.ReplaceAllStringFunc(a, func(s string) string {
                        subst := repl[s]
                        if len(subst) == 0 {
index cd41071d2cebc8bb62c96764ba767fa7da5e5835..e1e0bcae310ed2a52c9cfa4c8be281ba1ab96716 100644 (file)
@@ -34,6 +34,7 @@ type suite struct {
        crTooBig      arvados.ContainerRequest
        crPending     arvados.ContainerRequest
        crCUDARequest arvados.ContainerRequest
+       crMaxRunTime  arvados.ContainerRequest
 }
 
 func (s *suite) TearDownTest(c *check.C) {
@@ -116,6 +117,25 @@ func (s *suite) SetUpTest(c *check.C) {
        })
        c.Assert(err, check.IsNil)
 
+       err = arvados.NewClientFromEnv().RequestAndDecode(&s.crMaxRunTime, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+               "container_request": map[string]interface{}{
+                       "runtime_constraints": arvados.RuntimeConstraints{
+                               RAM:   1000000,
+                               VCPUs: 1,
+                       },
+                       "scheduling_parameters": arvados.SchedulingParameters{
+                               MaxRunTime: 124,
+                       },
+                       "container_image":     arvadostest.DockerImage112PDH,
+                       "command":             []string{"sleep", "123"},
+                       "mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+                       "output_path":         "/mnt/out",
+                       "state":               arvados.ContainerRequestStateCommitted,
+                       "priority":            1,
+                       "container_count_max": 1,
+               },
+       })
+       c.Assert(err, check.IsNil)
 }
 
 type lsfstub struct {
@@ -141,12 +161,7 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                }
                switch prog {
                case "bsub":
-                       defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
-                       if args[5] == s.crCUDARequest.ContainerUUID {
-                               c.Assert(len(args), check.Equals, len(defaultArgs)+len(s.disp.Cluster.Containers.LSF.BsubCUDAArguments))
-                       } else {
-                               c.Assert(len(args), check.Equals, len(defaultArgs))
-                       }
+                       c.Assert(len(args) > 5, check.Equals, true)
                        // %%J must have been rewritten to %J
                        c.Check(args[1], check.Equals, "/tmp/crunch-run.%J.out")
                        args = args[4:]
@@ -204,6 +219,21 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                                fakejobq[nextjobid] = args[1]
                                nextjobid++
                                mtx.Unlock()
+                       case s.crMaxRunTime.ContainerUUID:
+                               c.Check(args, check.DeepEquals, []string{
+                                       "-J", s.crMaxRunTime.ContainerUUID,
+                                       "-n", "1",
+                                       "-D", "257MB",
+                                       "-R", "rusage[mem=257MB:tmp=2304MB] span[hosts=1]",
+                                       "-R", "select[mem>=257MB]",
+                                       "-R", "select[tmp>=2304MB]",
+                                       "-R", "select[ncpus>=1]",
+                                       "-We", "8", // 124s + 5m overhead + roundup = 8m
+                               })
+                               mtx.Lock()
+                               fakejobq[nextjobid] = args[1]
+                               nextjobid++
+                               mtx.Unlock()
                        default:
                                c.Errorf("unexpected uuid passed to bsub: args %q", args)
                                return exec.Command("false")
index 9b2cf155b0b9e1ec712b1132c9c25ba11df6b6af..698ee20d8c6bcc58119a02e0330f19ca0e7a64ee 100644 (file)
@@ -545,9 +545,11 @@ type ContainersConfig struct {
                }
        }
        LSF struct {
-               BsubSudoUser      string
-               BsubArgumentsList []string
-               BsubCUDAArguments []string
+               BsubSudoUser       string
+               BsubArgumentsList  []string
+               BsubCUDAArguments  []string
+               MaxRunTimeOverhead Duration
+               MaxRunTimeDefault  Duration
        }
 }
 
index 0a8e33d05f5964ea7dc734ece6c1c740e3dc848c..f5789f31f684f89b2ac553de09cc199dfba005aa 100644 (file)
@@ -470,8 +470,9 @@ class ContainerRequest < ArvadosModel
 
   def validate_scheduling_parameters
     if self.state == Committed
-      if scheduling_parameters.include? 'partitions' and
-         (!scheduling_parameters['partitions'].is_a?(Array) ||
+      if scheduling_parameters.include?('partitions') and
+        !scheduling_parameters['partitions'].nil? and
+        (!scheduling_parameters['partitions'].is_a?(Array) ||
           scheduling_parameters['partitions'].reject{|x| !x.is_a?(String)}.size !=
             scheduling_parameters['partitions'].size)
             errors.add :scheduling_parameters, "partitions must be an array of strings"