</notextile>
-h3(#SbatchArguments). Containers.LSF.BsubArgumentsList
+h3(#BsubArgumentsList). Containers.LSF.BsubArgumentsList
When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command by specifying @BsubArgumentsList@. You can use this to send the jobs to specific cluster partitions or add resource requests. Set @BsubArgumentsList@ to an array of strings.
Note that the default value for @BsubArgumentsList@ uses the @-o@ and @-e@ arguments to write stdout/stderr data to files in @/tmp@ on the compute nodes, which is helpful for troubleshooting installation/configuration problems. Ensure you have something in place to delete old files from @/tmp@, or adjust these arguments accordingly.
-h3(#SbatchArguments). Containers.LSF.BsubCUDAArguments
+h3(#BsubCUDAArguments). Containers.LSF.BsubCUDAArguments
If the container requests access to GPUs (@runtime_constraints.cuda.device_count@ of the container request is greater than zero), the command line arguments in @BsubCUDAArguments@ will be added to the command line _after_ @BsubArgumentsList@. This should consist of the additional @bsub@ flags your site requires to schedule the job on a node with GPU support. Set @BsubCUDAArguments@ to an array of strings. For example:
</pre>
</notextile>
-h3(#PollPeriod). Containers.PollInterval
+h3(#PollInterval). Containers.PollInterval
arvados-dispatch-lsf polls the API server periodically for new containers to run. The @PollInterval@ option controls how often this poll happens. Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h@. For example:
</notextile>
-h3(#CrunchRunCommand-network). Containers.CrunchRunArgumentList: Using host networking for containers
+h3(#CrunchRunArgumentList). Containers.CrunchRunArgumentList: Using host networking for containers
Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups. This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net". If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster. Be aware this reduces container isolation, which may be a security risk.
</pre>
</notextile>
+
+h3(#InstanceTypes). InstanceTypes: Avoid submitting jobs with unsatisfiable resource constraints
+
+LSF does not provide feedback when a submitted job's RAM, CPU, or disk space constraints cannot be satisfied by any node: the job will wait in the queue indefinitely with "pending" status, reported by Arvados as "queued".
+
+As a workaround, you can configure @InstanceTypes@ with your LSF cluster's compute node sizes. Arvados will use these sizes to determine when a container is impossible to run, and cancel it instead of submitting an LSF job.
+
+Apart from detecting non-runnable containers, the configured instance types will not have any effect on scheduling.
+
+<notextile>
+<pre> InstanceTypes:
+ most-ram:
+ VCPUs: 8
+ RAM: 640GiB
+ IncludedScratch: 640GB
+ most-cpus:
+ VCPUs: 32
+ RAM: 256GiB
+ IncludedScratch: 640GB
+ gpu:
+ VCPUs: 8
+ RAM: 256GiB
+ IncludedScratch: 640GB
+ CUDA:
+ DriverVersion: "11.4"
+ HardwareCapability: "7.5"
+ DeviceCount: 1
+</pre>
+</notextile>
+
+
{% assign arvados_component = 'arvados-dispatch-lsf' %}
{% include 'install_packages' %}
if ctr.State != dispatch.Locked {
// already started by prior invocation
} else if _, ok := disp.lsfqueue.Lookup(ctr.UUID); !ok {
+ if _, err := dispatchcloud.ChooseInstanceType(disp.Cluster, &ctr); errors.As(err, &dispatchcloud.ConstraintsNotSatisfiableError{}) {
+ err := disp.arvDispatcher.Arv.Update("containers", ctr.UUID, arvadosclient.Dict{
+ "container": map[string]interface{}{
+ "runtime_status": map[string]string{
+ "error": err.Error(),
+ },
+ },
+ }, nil)
+ if err != nil {
+ return fmt.Errorf("error setting runtime_status on %s: %s", ctr.UUID, err)
+ }
+ return disp.arvDispatcher.UpdateState(ctr.UUID, dispatch.Cancelled)
+ }
disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
go func(uuid string) {
- cancelled := false
for ctx.Err() == nil {
- qent, ok := disp.lsfqueue.Lookup(uuid)
+ _, ok := disp.lsfqueue.Lookup(uuid)
if !ok {
// If the container disappears from
// the lsf queue, there is no point in
cancel()
return
}
- if !cancelled && qent.Stat == "PEND" && strings.Contains(qent.PendReason, "There are no suitable hosts for the job") {
- disp.logger.Printf("container %s: %s", uuid, qent.PendReason)
- err := disp.arvDispatcher.Arv.Update("containers", uuid, arvadosclient.Dict{
- "container": map[string]interface{}{
- "runtime_status": map[string]string{
- "error": qent.PendReason,
- },
- },
- }, nil)
- if err != nil {
- disp.logger.Printf("error setting runtime_status on %s: %s", uuid, err)
- continue // retry
- }
- err = disp.arvDispatcher.UpdateState(uuid, dispatch.Cancelled)
- if err != nil {
- continue // retry (UpdateState() already logged the error)
- }
- cancelled = true
- }
}
}(ctr.UUID)
type suite struct {
disp *dispatcher
crTooBig arvados.ContainerRequest
+ crPending arvados.ContainerRequest
crCUDARequest arvados.ContainerRequest
}
c.Assert(err, check.IsNil)
cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second / 4)
cluster.Containers.MinRetryPeriod = arvados.Duration(time.Second / 4)
+ cluster.InstanceTypes = arvados.InstanceTypeMap{
+ "biggest_available_node": arvados.InstanceType{
+ RAM: 100 << 30, // 100 GiB
+ VCPUs: 4,
+ IncludedScratch: 100 << 30,
+ Scratch: 100 << 30,
+ }}
s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
})
c.Assert(err, check.IsNil)
+ err = arvados.NewClientFromEnv().RequestAndDecode(&s.crPending, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+ "container_request": map[string]interface{}{
+ "runtime_constraints": arvados.RuntimeConstraints{
+ RAM: 100000000,
+ VCPUs: 2,
+ },
+ "container_image": arvadostest.DockerImage112PDH,
+ "command": []string{"sleep", "1"},
+ "mounts": map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+ "output_path": "/mnt/out",
+ "state": arvados.ContainerRequestStateCommitted,
+ "priority": 1,
+ "container_count_max": 1,
+ },
+ })
+ c.Assert(err, check.IsNil)
+
err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
"container_request": map[string]interface{}{
"runtime_constraints": arvados.RuntimeConstraints{
fakejobq[nextjobid] = args[1]
nextjobid++
mtx.Unlock()
- case s.crTooBig.ContainerUUID:
+ case s.crPending.ContainerUUID:
c.Check(args, check.DeepEquals, []string{
- "-J", s.crTooBig.ContainerUUID,
- "-n", "1",
- "-D", "954187MB",
- "-R", "rusage[mem=954187MB:tmp=256MB] span[hosts=1]",
- "-R", "select[mem>=954187MB]",
+ "-J", s.crPending.ContainerUUID,
+ "-n", "2",
+ "-D", "608MB",
+ "-R", "rusage[mem=608MB:tmp=256MB] span[hosts=1]",
+ "-R", "select[mem>=608MB]",
"-R", "select[tmp>=256MB]",
- "-R", "select[ncpus>=1]"})
+ "-R", "select[ncpus>=2]"})
mtx.Lock()
fakejobq[nextjobid] = args[1]
nextjobid++
var records []map[string]interface{}
for jobid, uuid := range fakejobq {
stat, reason := "RUN", ""
- if uuid == s.crTooBig.ContainerUUID {
+ if uuid == s.crPending.ContainerUUID {
// The real bjobs output includes a trailing ';' here:
stat, reason = "PEND", "There are no suitable hosts for the job;"
}
c.Error("timed out")
break
}
+ // "crTooBig" should never be submitted to lsf because
+ // it is bigger than any configured instance type
+ if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+ c.Errorf("Lookup(crTooBig) == true, ent = %#v", ent)
+ break
+ }
// "queuedcontainer" should be running
if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
c.Log("Lookup(queuedcontainer) == false")
continue
}
+ // "crPending" should be pending
+ if ent, ok := s.disp.lsfqueue.Lookup(s.crPending.ContainerUUID); !ok {
+ c.Logf("Lookup(crPending) == false", ent)
+ continue
+ }
// "lockedcontainer" should be cancelled because it
// has priority 0 (no matching container requests)
if ent, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
c.Logf("Lookup(lockedcontainer) == true, ent = %#v", ent)
continue
}
- // "crTooBig" should be cancelled because lsf stub
- // reports there is no suitable instance type
- if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
- c.Logf("Lookup(crTooBig) == true, ent = %#v", ent)
- continue
- }
var ctr arvados.Container
if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
c.Logf("container %s is not in the LSF queue but its arvados record has not been updated to state==Cancelled (state is %q)", s.crTooBig.ContainerUUID, ctr.State)
continue
} else {
- c.Check(ctr.RuntimeStatus["error"], check.Equals, "There are no suitable hosts for the job;")
+ c.Check(ctr.RuntimeStatus["error"], check.Equals, "constraints not satisfiable by any configured instance type")
}
c.Log("reached desired state")
break