X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c36ec856598f214e340e3335ddd347d131335bf8..ac312e0acae4fd114114081c9f4791d05e640831:/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go?ds=inline diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go index c31d799752..5a9ef91c3d 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go @@ -7,6 +7,8 @@ package dispatchslurm import ( "context" + "crypto/hmac" + "crypto/sha256" "fmt" "log" "math" @@ -17,6 +19,8 @@ import ( "time" "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/controller/dblock" + "git.arvados.org/arvados.git/lib/ctrlctx" "git.arvados.org/arvados.git/lib/dispatchcloud" "git.arvados.org/arvados.git/lib/service" "git.arvados.org/arvados.git/sdk/go/arvados" @@ -53,10 +57,11 @@ const initialNiceValue int64 = 10000 type Dispatcher struct { *dispatch.Dispatcher - logger logrus.FieldLogger - cluster *arvados.Cluster - sqCheck *SqueueChecker - slurm Slurm + logger logrus.FieldLogger + cluster *arvados.Cluster + sqCheck *SqueueChecker + slurm Slurm + dbConnector ctrlctx.DBConnector done chan struct{} err error @@ -88,6 +93,7 @@ func (disp *Dispatcher) configure() error { disp.Client.APIHost = disp.cluster.Services.Controller.ExternalURL.Host disp.Client.AuthToken = disp.cluster.SystemRootToken disp.Client.Insecure = disp.cluster.TLS.Insecure + disp.dbConnector = ctrlctx.DBConnector{PostgreSQL: disp.cluster.PostgreSQL} if disp.Client.APIHost != "" || disp.Client.AuthToken != "" { // Copy real configs into env vars so [a] @@ -99,7 +105,6 @@ func (disp *Dispatcher) configure() error { if disp.Client.Insecure { os.Setenv("ARVADOS_API_HOST_INSECURE", "1") } - os.Setenv("ARVADOS_EXTERNAL_CLIENT", "") for k, v := range disp.cluster.Containers.SLURM.SbatchEnvironmentVariables { os.Setenv(k, v) } @@ -136,6 +141,8 @@ func (disp *Dispatcher) setup() { } func (disp *Dispatcher) run() error { + dblock.Dispatch.Lock(context.Background(), disp.dbConnector.GetDB) + defer dblock.Dispatch.Unlock() defer disp.sqCheck.Stop() if disp.cluster != nil && len(disp.cluster.InstanceTypes) > 0 { @@ -190,14 +197,16 @@ func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error if disp.cluster == nil { // no instance types configured args = append(args, disp.slurmConstraintArgs(container)...) - } else if it, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured { + } else if types, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured { // ditto args = append(args, disp.slurmConstraintArgs(container)...) } else if err != nil { return nil, err } else { - // use instancetype constraint instead of slurm mem/cpu/tmp specs - args = append(args, "--constraint=instancetype="+it.Name) + // use instancetype constraint instead of slurm + // mem/cpu/tmp specs (note types[0] is the lowest-cost + // suitable instance type) + args = append(args, "--constraint=instancetype="+types[0].Name) } if len(container.SchedulingParameters.Partitions) > 0 { @@ -213,7 +222,12 @@ func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []s crArgs := append([]string(nil), crunchRunCommand...) crArgs = append(crArgs, "--runtime-engine="+disp.cluster.Containers.RuntimeEngine) crArgs = append(crArgs, container.UUID) - crScript := strings.NewReader(execScript(crArgs)) + + h := hmac.New(sha256.New, []byte(disp.cluster.SystemRootToken)) + fmt.Fprint(h, container.UUID) + authsecret := fmt.Sprintf("%x", h.Sum(nil)) + + crScript := strings.NewReader(execScript(crArgs, map[string]string{"GatewayAuthSecret": authsecret})) sbArgs, err := disp.sbatchArgs(container) if err != nil {