X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/58e6402a72e9ac1a210b2d318591f973a37e1e57..f15d246be0ddac1ece0151ed8cdca8781add5d4b:/lib/cloud/loopback/loopback.go diff --git a/lib/cloud/loopback/loopback.go b/lib/cloud/loopback/loopback.go index 6ad4f876d9..41878acd22 100644 --- a/lib/cloud/loopback/loopback.go +++ b/lib/cloud/loopback/loopback.go @@ -11,6 +11,7 @@ import ( "encoding/json" "errors" "io" + "os" "os/exec" "os/user" "strings" @@ -20,6 +21,7 @@ import ( "git.arvados.org/arvados.git/lib/cloud" "git.arvados.org/arvados.git/lib/dispatchcloud/test" "git.arvados.org/arvados.git/sdk/go/arvados" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" ) @@ -44,7 +46,7 @@ type instanceSet struct { mtx sync.Mutex } -func newInstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (cloud.InstanceSet, error) { +func newInstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) { is := &instanceSet{ instanceSetID: instanceSetID, logger: logger, @@ -58,6 +60,16 @@ func (is *instanceSet) Create(it arvados.InstanceType, _ cloud.ImageID, tags clo if len(is.instances) > 0 { return nil, errQuota } + // A crunch-run process running in a previous instance may + // have marked the node as broken. In the loopback scenario a + // destroy+create cycle doesn't fix whatever was broken -- but + // nothing else will either, so the best we can do is remove + // the "broken" flag and try again. + if err := os.Remove("/var/lock/crunch-run-broken"); err == nil { + is.logger.Info("removed /var/lock/crunch-run-broken") + } else if !errors.Is(err, os.ErrNotExist) { + return nil, err + } u, err := user.Current() if err != nil { return nil, err @@ -119,12 +131,13 @@ type instance struct { sshService test.SSHService } -func (i *instance) ID() cloud.InstanceID { return cloud.InstanceID(i.instanceType.ProviderType) } -func (i *instance) String() string { return i.instanceType.ProviderType } -func (i *instance) ProviderType() string { return i.instanceType.ProviderType } -func (i *instance) Address() string { return i.sshService.Address() } -func (i *instance) RemoteUser() string { return i.adminUser } -func (i *instance) Tags() cloud.InstanceTags { return i.tags } +func (i *instance) ID() cloud.InstanceID { return cloud.InstanceID(i.instanceType.ProviderType) } +func (i *instance) String() string { return i.instanceType.ProviderType } +func (i *instance) ProviderType() string { return i.instanceType.ProviderType } +func (i *instance) Address() string { return i.sshService.Address() } +func (i *instance) PriceHistory(arvados.InstanceType) []cloud.InstancePrice { return nil } +func (i *instance) RemoteUser() string { return i.adminUser } +func (i *instance) Tags() cloud.InstanceTags { return i.tags } func (i *instance) SetTags(tags cloud.InstanceTags) error { i.tags = tags return nil