- log.Printf("configuring node %q with all node type features", slurmDummyNode)
- cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+allFeatures)
- log.Printf("running: %q %q", cmd.Path, cmd.Args)
- out, err = cmd.CombinedOutput()
- if err != nil {
- log.Printf("error: scontrol: %s (output was %q)", err, out)
+ ok := false
+ for _, it := range cc.InstanceTypes {
+ driverInsuff, driverErr := versionLess(it.CUDA.DriverVersion, ctr.RuntimeConstraints.CUDA.DriverVersion)
+ capabilityInsuff, capabilityErr := versionLess(it.CUDA.HardwareCapability, ctr.RuntimeConstraints.CUDA.HardwareCapability)
+
+ switch {
+ // reasons to reject a node
+ case ok && it.Price > best.Price: // already selected a node, and this one is more expensive
+ case int64(it.Scratch) < needScratch: // insufficient scratch
+ case int64(it.RAM) < needRAM: // insufficient RAM
+ case it.VCPUs < needVCPUs: // insufficient VCPUs
+ case it.Preemptible != ctr.SchedulingParameters.Preemptible: // wrong preemptable setting
+ case it.Price == best.Price && (it.RAM < best.RAM || it.VCPUs < best.VCPUs): // same price, worse specs
+ case it.CUDA.DeviceCount < ctr.RuntimeConstraints.CUDA.DeviceCount: // insufficient CUDA devices
+ case ctr.RuntimeConstraints.CUDA.DeviceCount > 0 && (driverInsuff || driverErr != nil): // insufficient driver version
+ case ctr.RuntimeConstraints.CUDA.DeviceCount > 0 && (capabilityInsuff || capabilityErr != nil): // insufficient hardware capability
+ // Don't select this node
+ default:
+ // Didn't reject the node, so select it
+ // Lower price || (same price && better specs)
+ best = it
+ ok = true
+ }