19418: LSF: use InstanceTypes to detect unsatisfiable constraints.

author Tom Clegg <tom@curii.com>

Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)

committer Tom Clegg <tom@curii.com>

Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)
author Tom Clegg <tom@curii.com>
Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)
committer Tom Clegg <tom@curii.com>
Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)
diff --git a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid

index 37adffd18d4e9bef5162614b015a3155df3333a5..ded244046dde211ea2b18dab7779d5159ffc100e 100644 (file)
--- a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
@@ -62,7 +62,7 @@ Alternatively, you can arrange for the arvados-dispatch-lsf process to run as an
  </notextile>
  
  
-h3(#SbatchArguments). Containers.LSF.BsubArgumentsList
+h3(#BsubArgumentsList). Containers.LSF.BsubArgumentsList
  
  When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command by specifying @BsubArgumentsList@.  You can use this to send the jobs to specific cluster partitions or add resource requests.  Set @BsubArgumentsList@ to an array of strings.
  
@@ -87,7 +87,7 @@ For example:
  
  Note that the default value for @BsubArgumentsList@ uses the @-o@ and @-e@ arguments to write stdout/stderr data to files in @/tmp@ on the compute nodes, which is helpful for troubleshooting installation/configuration problems. Ensure you have something in place to delete old files from @/tmp@, or adjust these arguments accordingly.
  
-h3(#SbatchArguments). Containers.LSF.BsubCUDAArguments
+h3(#BsubCUDAArguments). Containers.LSF.BsubCUDAArguments
  
  If the container requests access to GPUs (@runtime_constraints.cuda.device_count@ of the container request is greater than zero), the command line arguments in @BsubCUDAArguments@ will be added to the command line _after_ @BsubArgumentsList@.  This should consist of the additional @bsub@ flags your site requires to schedule the job on a node with GPU support.  Set @BsubCUDAArguments@ to an array of strings.  For example:
  
@@ -98,7 +98,7 @@ If the container requests access to GPUs (@runtime_constraints.cuda.device_count
  </pre>
  </notextile>
  
-h3(#PollPeriod). Containers.PollInterval
+h3(#PollInterval). Containers.PollInterval
  
  arvados-dispatch-lsf polls the API server periodically for new containers to run.  The @PollInterval@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h@.  For example:
  
@@ -122,7 +122,7 @@ Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB
  </notextile>
  
  
-h3(#CrunchRunCommand-network). Containers.CrunchRunArgumentList: Using host networking for containers
+h3(#CrunchRunArgumentList). Containers.CrunchRunArgumentList: Using host networking for containers
  
  Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups.  This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net".   If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster.  Be aware this reduces container isolation, which may be a security risk.
  
@@ -134,6 +134,37 @@ Older Linux kernels (prior to 3.18) have bugs in network namespace handling whic
  </pre>
  </notextile>
  
+
+h3(#InstanceTypes). InstanceTypes: Avoid submitting jobs with unsatisfiable resource constraints
+
+LSF does not provide feedback when a submitted job's RAM, CPU, or disk space constraints cannot be satisfied by any node: the job will wait in the queue indefinitely with "pending" status, reported by Arvados as "queued".
+
+As a workaround, you can configure @InstanceTypes@ with your LSF cluster's compute node sizes. Arvados will use these sizes to determine when a container is impossible to run, and cancel it instead of submitting an LSF job.
+
+Apart from detecting non-runnable containers, the configured instance types will not have any effect on scheduling.
+
+<notextile>
+<pre>    InstanceTypes:
+      most-ram:
+        VCPUs: 8
+        RAM: 640GiB
+        IncludedScratch: 640GB
+      most-cpus:
+        VCPUs: 32
+        RAM: 256GiB
+        IncludedScratch: 640GB
+      gpu:
+        VCPUs: 8
+        RAM: 256GiB
+        IncludedScratch: 640GB
+        CUDA:
+          DriverVersion: "11.4"
+          HardwareCapability: "7.5"
+          DeviceCount: 1
+</pre>
+</notextile>
+
+
  {% assign arvados_component = 'arvados-dispatch-lsf' %}
  
  {% include 'install_packages' %}
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go

index e2348337e62992eb4463947690e809e1927bb232..d362f66d14b3ee12b9a4fb6b197b9a34747d944c 100644 (file)
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -170,6 +170,19 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
         if ctr.State != dispatch.Locked {
                 // already started by prior invocation
         } else if _, ok := disp.lsfqueue.Lookup(ctr.UUID); !ok {
+               if _, err := dispatchcloud.ChooseInstanceType(disp.Cluster, &ctr); errors.As(err, &dispatchcloud.ConstraintsNotSatisfiableError{}) {
+                       err := disp.arvDispatcher.Arv.Update("containers", ctr.UUID, arvadosclient.Dict{
+                               "container": map[string]interface{}{
+                                       "runtime_status": map[string]string{
+                                               "error": err.Error(),
+                                       },
+                               },
+                       }, nil)
+                       if err != nil {
+                               return fmt.Errorf("error setting runtime_status on %s: %s", ctr.UUID, err)
+                       }
+                       return disp.arvDispatcher.UpdateState(ctr.UUID, dispatch.Cancelled)
+               }
                 disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
                 cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
                 cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
@@ -184,9 +197,8 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
         defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
  
         go func(uuid string) {
-               cancelled := false
                 for ctx.Err() == nil {
-                       qent, ok := disp.lsfqueue.Lookup(uuid)
+                       _, ok := disp.lsfqueue.Lookup(uuid)
                         if !ok {
                                 // If the container disappears from
                                 // the lsf queue, there is no point in
@@ -196,25 +208,6 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
                                 cancel()
                                 return
                         }
-                       if !cancelled && qent.Stat == "PEND" && strings.Contains(qent.PendReason, "There are no suitable hosts for the job") {
-                               disp.logger.Printf("container %s: %s", uuid, qent.PendReason)
-                               err := disp.arvDispatcher.Arv.Update("containers", uuid, arvadosclient.Dict{
-                                       "container": map[string]interface{}{
-                                               "runtime_status": map[string]string{
-                                                       "error": qent.PendReason,
-                                               },
-                                       },
-                               }, nil)
-                               if err != nil {
-                                       disp.logger.Printf("error setting runtime_status on %s: %s", uuid, err)
-                                       continue // retry
-                               }
-                               err = disp.arvDispatcher.UpdateState(uuid, dispatch.Cancelled)
-                               if err != nil {
-                                       continue // retry (UpdateState() already logged the error)
-                               }
-                               cancelled = true
-                       }
                 }
         }(ctr.UUID)
  
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go

index a99983f34a8ae4163f9a91ba59c43ab9e57c3e00..e51e719066cbdf2b3f71d245eea9a7fc326fcbc3 100644 (file)
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -32,6 +32,7 @@ var _ = check.Suite(&suite{})
  type suite struct {
         disp          *dispatcher
         crTooBig      arvados.ContainerRequest
+       crPending     arvados.ContainerRequest
         crCUDARequest arvados.ContainerRequest
  }
  
@@ -46,6 +47,13 @@ func (s *suite) SetUpTest(c *check.C) {
         c.Assert(err, check.IsNil)
         cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second / 4)
         cluster.Containers.MinRetryPeriod = arvados.Duration(time.Second / 4)
+       cluster.InstanceTypes = arvados.InstanceTypeMap{
+               "biggest_available_node": arvados.InstanceType{
+                       RAM:             100 << 30, // 100 GiB
+                       VCPUs:           4,
+                       IncludedScratch: 100 << 30,
+                       Scratch:         100 << 30,
+               }}
         s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
         s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
                 return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
@@ -67,6 +75,23 @@ func (s *suite) SetUpTest(c *check.C) {
         })
         c.Assert(err, check.IsNil)
  
+       err = arvados.NewClientFromEnv().RequestAndDecode(&s.crPending, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+               "container_request": map[string]interface{}{
+                       "runtime_constraints": arvados.RuntimeConstraints{
+                               RAM:   100000000,
+                               VCPUs: 2,
+                       },
+                       "container_image":     arvadostest.DockerImage112PDH,
+                       "command":             []string{"sleep", "1"},
+                       "mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+                       "output_path":         "/mnt/out",
+                       "state":               arvados.ContainerRequestStateCommitted,
+                       "priority":            1,
+                       "container_count_max": 1,
+               },
+       })
+       c.Assert(err, check.IsNil)
+
         err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
                 "container_request": map[string]interface{}{
                         "runtime_constraints": arvados.RuntimeConstraints{
@@ -150,15 +175,15 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                                 fakejobq[nextjobid] = args[1]
                                 nextjobid++
                                 mtx.Unlock()
-                       case s.crTooBig.ContainerUUID:
+                       case s.crPending.ContainerUUID:
                                 c.Check(args, check.DeepEquals, []string{
-                                       "-J", s.crTooBig.ContainerUUID,
-                                       "-n", "1",
-                                       "-D", "954187MB",
-                                       "-R", "rusage[mem=954187MB:tmp=256MB] span[hosts=1]",
-                                       "-R", "select[mem>=954187MB]",
+                                       "-J", s.crPending.ContainerUUID,
+                                       "-n", "2",
+                                       "-D", "608MB",
+                                       "-R", "rusage[mem=608MB:tmp=256MB] span[hosts=1]",
+                                       "-R", "select[mem>=608MB]",
                                         "-R", "select[tmp>=256MB]",
-                                       "-R", "select[ncpus>=1]"})
+                                       "-R", "select[ncpus>=2]"})
                                 mtx.Lock()
                                 fakejobq[nextjobid] = args[1]
                                 nextjobid++
@@ -187,7 +212,7 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                         var records []map[string]interface{}
                         for jobid, uuid := range fakejobq {
                                 stat, reason := "RUN", ""
-                               if uuid == s.crTooBig.ContainerUUID {
+                               if uuid == s.crPending.ContainerUUID {
                                         // The real bjobs output includes a trailing ';' here:
                                         stat, reason = "PEND", "There are no suitable hosts for the job;"
                                 }
@@ -242,23 +267,28 @@ func (s *suite) TestSubmit(c *check.C) {
                         c.Error("timed out")
                         break
                 }
+               // "crTooBig" should never be submitted to lsf because
+               // it is bigger than any configured instance type
+               if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+                       c.Errorf("Lookup(crTooBig) == true, ent = %#v", ent)
+                       break
+               }
                 // "queuedcontainer" should be running
                 if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
                         c.Log("Lookup(queuedcontainer) == false")
                         continue
                 }
+               // "crPending" should be pending
+               if ent, ok := s.disp.lsfqueue.Lookup(s.crPending.ContainerUUID); !ok {
+                       c.Logf("Lookup(crPending) == false", ent)
+                       continue
+               }
                 // "lockedcontainer" should be cancelled because it
                 // has priority 0 (no matching container requests)
                 if ent, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
                         c.Logf("Lookup(lockedcontainer) == true, ent = %#v", ent)
                         continue
                 }
-               // "crTooBig" should be cancelled because lsf stub
-               // reports there is no suitable instance type
-               if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
-                       c.Logf("Lookup(crTooBig) == true, ent = %#v", ent)
-                       continue
-               }
                 var ctr arvados.Container
                 if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
                         c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
@@ -275,7 +305,7 @@ func (s *suite) TestSubmit(c *check.C) {
                         c.Logf("container %s is not in the LSF queue but its arvados record has not been updated to state==Cancelled (state is %q)", s.crTooBig.ContainerUUID, ctr.State)
                         continue
                 } else {
-                       c.Check(ctr.RuntimeStatus["error"], check.Equals, "There are no suitable hosts for the job;")
+                       c.Check(ctr.RuntimeStatus["error"], check.Equals, "constraints not satisfiable by any configured instance type")
                 }
                 c.Log("reached desired state")
                 break
author	Tom Clegg <tom@curii.com>
	Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)
committer	Tom Clegg <tom@curii.com>
	Tue, 4 Oct 2022 15:30:40 +0000 (11:30 -0400)
doc/install/crunch2-lsf/install-dispatch.html.textile.liquid		patch \| blob \| history
lib/lsf/dispatch.go		patch \| blob \| history
lib/lsf/dispatch_test.go		patch \| blob \| history