import (
"bytes"
+ _ "embed"
"encoding/json"
"errors"
"flag"
"io/ioutil"
"os"
"regexp"
+ "strconv"
"strings"
"git.arvados.org/arvados.git/sdk/go/arvados"
"github.com/sirupsen/logrus"
)
+//go:embed config.default.yml
+var DefaultYAML []byte
+
var ErrNoClustersDefined = errors.New("config does not define any clusters")
type Loader struct {
ldr.checkEnum("Containers.LocalKeepLogsToContainerLog", cc.Containers.LocalKeepLogsToContainerLog, "none", "all", "errors"),
ldr.checkEmptyKeepstores(cc),
ldr.checkUnlistedKeepstores(cc),
- ldr.checkLocalKeepstoreVolumes(cc),
ldr.checkStorageClasses(cc),
+ ldr.checkCUDAVersions(cc),
// TODO: check non-empty Rendezvous on
// services other than Keepstore
} {
return nil
}
-func (ldr *Loader) checkLocalKeepstoreVolumes(cc arvados.Cluster) error {
- if cc.Containers.LocalKeepBlobBuffersPerVCPU < 1 {
- return nil
- }
- for _, vol := range cc.Volumes {
- if len(vol.AccessViaHosts) == 0 {
- return nil
- }
- }
- return fmt.Errorf("LocalKeepBlobBuffersPerVCPU is %d, but no volumes would be accessible from a worker instance", cc.Containers.LocalKeepBlobBuffersPerVCPU)
-}
-
func (ldr *Loader) checkStorageClasses(cc arvados.Cluster) error {
classOnVolume := map[string]bool{}
for volid, vol := range cc.Volumes {
return nil
}
+func (ldr *Loader) checkCUDAVersions(cc arvados.Cluster) error {
+ for _, it := range cc.InstanceTypes {
+ if it.CUDA.DeviceCount == 0 {
+ continue
+ }
+
+ _, err := strconv.ParseFloat(it.CUDA.DriverVersion, 64)
+ if err != nil {
+ return fmt.Errorf("InstanceType %q has invalid CUDA.DriverVersion %q, expected format X.Y (%v)", it.Name, it.CUDA.DriverVersion, err)
+ }
+ _, err = strconv.ParseFloat(it.CUDA.HardwareCapability, 64)
+ if err != nil {
+ return fmt.Errorf("InstanceType %q has invalid CUDA.HardwareCapability %q, expected format X.Y (%v)", it.Name, it.CUDA.HardwareCapability, err)
+ }
+ }
+ return nil
+}
+
func checkKeyConflict(label string, m map[string]string) error {
saw := map[string]bool{}
for k := range m {