"Arvados cluster controller daemon"
package_go_binary cmd/arvados-server arvados-dispatch-cloud \
"Arvados cluster cloud dispatch"
+ package_go_binary cmd/arvados-server arvados-dispatch-lsf \
+ "Dispatch Arvados containers to an LSF cluster"
package_go_binary services/arv-git-httpd arvados-git-httpd \
"Provide authenticated http access to Arvados-hosted git repositories"
package_go_binary services/crunch-dispatch-local crunch-dispatch-local \
mv /tmp/x /etc/arvados/config.yml
perl -p -i -e 'BEGIN{undef $/;} s/WebDAV(.*?):\n( *)ExternalURL: ""/WebDAV$1:\n$2ExternalURL: "example.com"/g' /etc/arvados/config.yml
- ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >"$STDOUT_IF_DEBUG"
- ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >"$STDOUT_IF_DEBUG"
+ ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bin/rake npm:install >"$STDOUT_IF_DEBUG"
+ ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bin/rake assets:precompile >"$STDOUT_IF_DEBUG"
# Remove generated configuration files so they don't go in the package.
rm -rf /etc/arvados/
- architecture/manifest-format.html.textile.liquid
- Computation with Crunch:
- api/execution.html.textile.liquid
+ - architecture/dispatchcloud.html.textile.liquid
- Other:
- api/permission-model.html.textile.liquid
- architecture/federation.html.textile.liquid
- Data Management:
- admin/collection-versioning.html.textile.liquid
- admin/collection-managed-properties.html.textile.liquid
+ - admin/restricting-upload-download.html.textile.liquid
- admin/keep-balance.html.textile.liquid
- admin/controlling-container-reuse.html.textile.liquid
- admin/logs-table-management.html.textile.liquid
- install/crunch2-slurm/configure-slurm.html.textile.liquid
- install/crunch2-slurm/install-compute-node.html.textile.liquid
- install/crunch2-slurm/install-test.html.textile.liquid
+ - Containers API (lsf):
+ - install/crunch2-lsf/install-dispatch.html.textile.liquid
- Additional configuration:
- install/container-shell-access.html.textile.liquid
- External dependencies:
"Containers.JobsAPI.GitInternalDir": false,
"Containers.Logging": false,
"Containers.LogReuseDecisions": false,
+ "Containers.LSF": false,
"Containers.MaxComputeVMs": false,
"Containers.MaxDispatchAttempts": false,
"Containers.MaxRetryAttempts": true,
"Volumes.*.ReadOnly": true,
"Volumes.*.Replication": true,
"Volumes.*.StorageClasses": true,
- "Volumes.*.StorageClasses.*": false,
+ "Volumes.*.StorageClasses.*": true,
"Workbench": true,
"Workbench.ActivationContactLink": false,
"Workbench.APIClientConnectTimeout": true,
"syscall"
"time"
+ "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/dispatch"
return nil
}
+ loader := config.NewLoader(nil, logger)
+ cfg, err := loader.Load()
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return fmt.Errorf("config error: %s", err)
+ }
+
logger.Printf("crunch-dispatch-local %s started", version)
runningCmds = make(map[string]*exec.Cmd)
+ var client arvados.Client
+ client.APIHost = cluster.Services.Controller.ExternalURL.Host
+ client.AuthToken = cluster.SystemRootToken
+ client.Insecure = cluster.TLS.Insecure
+
+ if client.APIHost != "" || client.AuthToken != "" {
+ // Copy real configs into env vars so [a]
+ // MakeArvadosClient() uses them, and [b] they get
+ // propagated to crunch-run via SLURM.
+ os.Setenv("ARVADOS_API_HOST", client.APIHost)
+ os.Setenv("ARVADOS_API_TOKEN", client.AuthToken)
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "")
+ if client.Insecure {
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "1")
+ }
+ os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
+ } else {
+ logger.Warnf("Client credentials missing from config, so falling back on environment variables (deprecated).")
+ }
+
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
logger.Errorf("error making Arvados client: %v", err)
dispatcher := dispatch.Dispatcher{
Logger: logger,
Arv: arv,
- RunContainer: (&LocalRun{startFunc, make(chan bool, 8), ctx}).run,
+ RunContainer: (&LocalRun{startFunc, make(chan bool, 8), ctx, cluster}).run,
PollPeriod: time.Duration(*pollInterval) * time.Second,
}
startCmd func(container arvados.Container, cmd *exec.Cmd) error
concurrencyLimit chan bool
ctx context.Context
+ cluster *arvados.Cluster
}
// Run a container.
// crunch-run terminates, mark the container as Cancelled.
func (lr *LocalRun) run(dispatcher *dispatch.Dispatcher,
container arvados.Container,
- status <-chan arvados.Container) {
+ status <-chan arvados.Container) error {
uuid := container.UUID
case lr.concurrencyLimit <- true:
break
case <-lr.ctx.Done():
- return
+ return lr.ctx.Err()
}
defer func() { <-lr.concurrencyLimit }()
waitGroup.Add(1)
defer waitGroup.Done()
- cmd := exec.Command(*crunchRunCommand, uuid)
+ cmd := exec.Command(*crunchRunCommand, "--runtime-engine="+lr.cluster.Containers.RuntimeEngine, uuid)
cmd.Stdin = nil
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stderr
}
dispatcher.Logger.Printf("finalized container %v", uuid)
+ return nil
}
return cmd.Start()
}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
+
- return (&LocalRun{startCmd, make(chan bool, 8), ctx}).run(d, c, s)
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
++ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
err = dispatcher.Run(ctx)
return cmd.Start()
}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
+
- return (&LocalRun{startCmd, make(chan bool, 8), ctx}).run(d, c, s)
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
++ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
re := regexp.MustCompile(`(?ms).*` + expected + `.*`)
// Dispatcher service for Crunch that submits containers to the slurm queue.
import (
- "bytes"
"context"
"flag"
"fmt"
// append() here avoids modifying crunchRunCommand's
// underlying array, which is shared with other goroutines.
crArgs := append([]string(nil), crunchRunCommand...)
+ crArgs = append(crArgs, "--runtime-engine="+disp.cluster.Containers.RuntimeEngine)
crArgs = append(crArgs, container.UUID)
crScript := strings.NewReader(execScript(crArgs))
// already in the queue). Cancel the slurm job if the container's
// priority changes to zero or its state indicates it's no longer
// running.
- func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
log.Printf("Submitting container %s to slurm", ctr.UUID)
cmd := []string{disp.cluster.Containers.CrunchRunCommand}
cmd = append(cmd, disp.cluster.Containers.CrunchRunArgumentsList...)
- if err := disp.submit(ctr, cmd); err != nil {
- var text string
- switch err := err.(type) {
- case dispatchcloud.ConstraintsNotSatisfiableError:
- var logBuf bytes.Buffer
- fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", ctr.UUID, err)
- if len(err.AvailableTypes) == 0 {
- fmt.Fprint(&logBuf, "No instance types are configured.\n")
- } else {
- fmt.Fprint(&logBuf, "Available instance types:\n")
- for _, t := range err.AvailableTypes {
- fmt.Fprintf(&logBuf,
- "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
- t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price,
- )
- }
- }
- text = logBuf.String()
- disp.UpdateState(ctr.UUID, dispatch.Cancelled)
- default:
- text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)
- }
- log.Print(text)
-
- lr := arvadosclient.Dict{"log": arvadosclient.Dict{
- "object_uuid": ctr.UUID,
- "event_type": "dispatch",
- "properties": map[string]string{"text": text}}}
- disp.Arv.Create("logs", lr, nil)
-
- disp.Unlock(ctr.UUID)
- return
+ err := disp.submit(ctr, cmd)
+ if err != nil {
+ return err
}
}
case dispatch.Locked:
disp.Unlock(ctr.UUID)
}
- return
+ return nil
case updated, ok := <-status:
if !ok {
log.Printf("container %s is done: cancel slurm job", ctr.UUID)