"encoding/json"
"errors"
"io"
+ "os"
"os/exec"
"os/user"
"strings"
"git.arvados.org/arvados.git/lib/cloud"
"git.arvados.org/arvados.git/lib/dispatchcloud/test"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
)
mtx sync.Mutex
}
-func newInstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+func newInstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) {
is := &instanceSet{
instanceSetID: instanceSetID,
logger: logger,
if len(is.instances) > 0 {
return nil, errQuota
}
+ // A crunch-run process running in a previous instance may
+ // have marked the node as broken. In the loopback scenario a
+ // destroy+create cycle doesn't fix whatever was broken -- but
+ // nothing else will either, so the best we can do is remove
+ // the "broken" flag and try again.
+ if err := os.Remove("/var/lock/crunch-run-broken"); err == nil {
+ is.logger.Info("removed /var/lock/crunch-run-broken")
+ } else if !errors.Is(err, os.ErrNotExist) {
+ return nil, err
+ }
u, err := user.Current()
if err != nil {
return nil, err
sshService test.SSHService
}
-func (i *instance) ID() cloud.InstanceID { return cloud.InstanceID(i.instanceType.ProviderType) }
-func (i *instance) String() string { return i.instanceType.ProviderType }
-func (i *instance) ProviderType() string { return i.instanceType.ProviderType }
-func (i *instance) Address() string { return i.sshService.Address() }
-func (i *instance) RemoteUser() string { return i.adminUser }
-func (i *instance) Tags() cloud.InstanceTags { return i.tags }
+func (i *instance) ID() cloud.InstanceID { return cloud.InstanceID(i.instanceType.ProviderType) }
+func (i *instance) String() string { return i.instanceType.ProviderType }
+func (i *instance) ProviderType() string { return i.instanceType.ProviderType }
+func (i *instance) Address() string { return i.sshService.Address() }
+func (i *instance) PriceHistory(arvados.InstanceType) []cloud.InstancePrice { return nil }
+func (i *instance) RemoteUser() string { return i.adminUser }
+func (i *instance) Tags() cloud.InstanceTags { return i.tags }
func (i *instance) SetTags(tags cloud.InstanceTags) error {
i.tags = tags
return nil