"syscall"
"time"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
- "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
- "git.curoverse.com/arvados.git/sdk/go/dispatch"
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/dispatch"
"github.com/sirupsen/logrus"
)
return nil
}
+ loader := config.NewLoader(nil, logger)
+ cfg, err := loader.Load()
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return fmt.Errorf("config error: %s", err)
+ }
+
logger.Printf("crunch-dispatch-local %s started", version)
runningCmds = make(map[string]*exec.Cmd)
+ var client arvados.Client
+ client.APIHost = cluster.Services.Controller.ExternalURL.Host
+ client.AuthToken = cluster.SystemRootToken
+ client.Insecure = cluster.TLS.Insecure
+
+ if client.APIHost != "" || client.AuthToken != "" {
+ // Copy real configs into env vars so [a]
+ // MakeArvadosClient() uses them, and [b] they get
+ // propagated to crunch-run via SLURM.
+ os.Setenv("ARVADOS_API_HOST", client.APIHost)
+ os.Setenv("ARVADOS_API_TOKEN", client.AuthToken)
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "")
+ if client.Insecure {
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "1")
+ }
+ os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
+ } else {
+ logger.Warnf("Client credentials missing from config, so falling back on environment variables (deprecated).")
+ }
+
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
logger.Errorf("error making Arvados client: %v", err)
}
arv.Retries = 25
+ ctx, cancel := context.WithCancel(context.Background())
+
dispatcher := dispatch.Dispatcher{
Logger: logger,
Arv: arv,
- RunContainer: run,
+ RunContainer: (&LocalRun{startFunc, make(chan bool, 8), ctx, cluster}).run,
PollPeriod: time.Duration(*pollInterval) * time.Second,
}
- ctx, cancel := context.WithCancel(context.Background())
err = dispatcher.Run(ctx)
if err != nil {
return err
return cmd.Start()
}
-var startCmd = startFunc
+type LocalRun struct {
+ startCmd func(container arvados.Container, cmd *exec.Cmd) error
+ concurrencyLimit chan bool
+ ctx context.Context
+ cluster *arvados.Cluster
+}
// Run a container.
//
//
// If the container is in any other state, or is not Complete/Cancelled after
// crunch-run terminates, mark the container as Cancelled.
-func run(dispatcher *dispatch.Dispatcher,
+func (lr *LocalRun) run(dispatcher *dispatch.Dispatcher,
container arvados.Container,
- status <-chan arvados.Container) {
+ status <-chan arvados.Container) error {
uuid := container.UUID
if container.State == dispatch.Locked {
+
+ select {
+ case lr.concurrencyLimit <- true:
+ break
+ case <-lr.ctx.Done():
+ return lr.ctx.Err()
+ }
+
+ defer func() { <-lr.concurrencyLimit }()
+
+ select {
+ case c := <-status:
+ // Check for state updates after possibly
+ // waiting to be ready-to-run
+ if c.Priority == 0 {
+ goto Finish
+ }
+ default:
+ break
+ }
+
waitGroup.Add(1)
+ defer waitGroup.Done()
- cmd := exec.Command(*crunchRunCommand, uuid)
+ cmd := exec.Command(*crunchRunCommand, "--runtime-engine="+lr.cluster.Containers.RuntimeEngine, uuid)
cmd.Stdin = nil
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stderr
// succeed in starting crunch-run.
runningCmdsMutex.Lock()
- if err := startCmd(container, cmd); err != nil {
+ if err := lr.startCmd(container, cmd); err != nil {
runningCmdsMutex.Unlock()
dispatcher.Logger.Warnf("error starting %q for %s: %s", *crunchRunCommand, uuid, err)
dispatcher.UpdateState(uuid, dispatch.Cancelled)
delete(runningCmds, uuid)
runningCmdsMutex.Unlock()
}
- waitGroup.Done()
}
+Finish:
+
// If the container is not finalized, then change it to "Cancelled".
err := dispatcher.Arv.Get("containers", uuid, nil, &container)
if err != nil {
}
dispatcher.Logger.Printf("finalized container %v", uuid)
+ return nil
}