projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch '19889-live-log-webdav'
[arvados.git]
/
lib
/
dispatchcloud
/
worker
/
worker.go
diff --git
a/lib/dispatchcloud/worker/worker.go
b/lib/dispatchcloud/worker/worker.go
index b2ed6c2bff5b039435944b851a9fe3646c922001..8b4be1a3c77aa8f01ebe7dad4a3c266da3c36c81 100644
(file)
--- a/
lib/dispatchcloud/worker/worker.go
+++ b/
lib/dispatchcloud/worker/worker.go
@@
-253,19
+253,30
@@
func (wkr *worker) probeAndUpdate() {
if !booted {
booted, stderr = wkr.probeBooted()
if !booted {
booted, stderr = wkr.probeBooted()
+ shouldCopy := booted || initialState == StateUnknown
if !booted {
// Pretend this probe succeeded if another
// concurrent attempt succeeded.
wkr.mtx.Lock()
if !booted {
// Pretend this probe succeeded if another
// concurrent attempt succeeded.
wkr.mtx.Lock()
- booted = wkr.state == StateRunning || wkr.state == StateIdle
+ if wkr.state == StateRunning || wkr.state == StateIdle {
+ booted = true
+ shouldCopy = false
+ }
wkr.mtx.Unlock()
}
wkr.mtx.Unlock()
}
+ if shouldCopy {
+ _, stderrCopy, err := wkr.copyRunnerData()
+ if err != nil {
+ booted = false
+ wkr.logger.WithError(err).WithField("stderr", string(stderrCopy)).Warn("error copying runner binary")
+ }
+ }
if booted {
logger.Info("instance booted; will try probeRunning")
}
}
reportedBroken := false
if booted {
logger.Info("instance booted; will try probeRunning")
}
}
reportedBroken := false
- if booted ||
wkr.s
tate == StateUnknown {
+ if booted ||
initialS
tate == StateUnknown {
ctrUUIDs, reportedBroken, ok = wkr.probeRunning()
}
wkr.mtx.Lock()
ctrUUIDs, reportedBroken, ok = wkr.probeRunning()
}
wkr.mtx.Lock()
@@
-467,21
+478,18
@@
func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
return false, stderr
}
logger.Info("boot probe succeeded")
return false, stderr
}
logger.Info("boot probe succeeded")
+ return true, stderr
+}
+
+func (wkr *worker) copyRunnerData() (stdout, stderr []byte, err error) {
if err = wkr.wp.loadRunnerData(); err != nil {
wkr.logger.WithError(err).Warn("cannot boot worker: error loading runner binary")
if err = wkr.wp.loadRunnerData(); err != nil {
wkr.logger.WithError(err).Warn("cannot boot worker: error loading runner binary")
- return
false, stderr
+ return
} else if len(wkr.wp.runnerData) == 0 {
// Assume crunch-run is already installed
} else if len(wkr.wp.runnerData) == 0 {
// Assume crunch-run is already installed
- } else if _, stderr2, err := wkr.copyRunnerData(); err != nil {
- wkr.logger.WithError(err).WithField("stderr", string(stderr2)).Warn("error copying runner binary")
- return false, stderr2
- } else {
- stderr = append(stderr, stderr2...)
+ return
}
}
- return true, stderr
-}
-func (wkr *worker) copyRunnerData() (stdout, stderr []byte, err error) {
hash := fmt.Sprintf("%x", wkr.wp.runnerMD5)
dstdir, _ := filepath.Split(wkr.wp.runnerCmd)
logger := wkr.logger.WithFields(logrus.Fields{
hash := fmt.Sprintf("%x", wkr.wp.runnerMD5)
dstdir, _ := filepath.Split(wkr.wp.runnerCmd)
logger := wkr.logger.WithFields(logrus.Fields{