From: Ward Vandewege Date: Tue, 5 Jun 2018 17:55:31 +0000 (-0400) Subject: * Add a basic Slurm config X-Git-Url: https://git.arvados.org/arvados-k8s.git/commitdiff_plain/dfbdd9f483f65051cf1835af18d4b7c29a687b6d * Add a basic Slurm config * Add crunch-dispatch-slurm No issue # Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- diff --git a/.licenseignore b/.licenseignore index 3712913..71c0386 100644 --- a/.licenseignore +++ b/.licenseignore @@ -9,3 +9,5 @@ charts/arvados/config/ssl/cert charts/arvados/config/ssl/key charts/arvados/templates/NOTES.txt charts/arvados/templates/_helpers.tpl +charts/arvados/config/slurm/slurm.conf +charts/arvados/config/slurm/munge.key diff --git a/charts/arvados/config/api-server/application.yml b/charts/arvados/config/api-server/application.yml index 385c64c..1d24522 100644 --- a/charts/arvados/config/api-server/application.yml +++ b/charts/arvados/config/api-server/application.yml @@ -304,7 +304,7 @@ common: docker_image_formats: ["v2"] # :none or :slurm_immediate - crunch_job_wrapper: :none + crunch_job_wrapper: :slurm_immediate # username, or false = do not set uid when running jobs. crunch_job_user: crunch diff --git a/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml b/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml new file mode 100644 index 0000000..754a905 --- /dev/null +++ b/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +Client: + APIHost: {{ .Values.externalIP }}:444 + AuthToken: {{ .Values.superUserSecret }} + Insecure: true +PollPeriod: 5000ms +MinRetryPeriod: 30s +SbatchArguments: +- "--workdir=/tmp" diff --git a/charts/arvados/config/slurm/munge.key b/charts/arvados/config/slurm/munge.key new file mode 100644 index 0000000..33019a5 Binary files /dev/null and b/charts/arvados/config/slurm/munge.key differ diff --git a/charts/arvados/config/slurm/slurm.conf b/charts/arvados/config/slurm/slurm.conf new file mode 100644 index 0000000..6a54969 --- /dev/null +++ b/charts/arvados/config/slurm/slurm.conf @@ -0,0 +1,49 @@ +ControlMachine=arvados-slurm-controller-0 +SlurmctldPort=6817 +SlurmdPort=6818 +SrunPortRange=60001-63000 +AuthType=auth/munge +StateSaveLocation=/var/slurm +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +CacheGroups=0 +ReturnToService=2 +TaskPlugin=task/affinity +TreeWidth=64 +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MessageTimeout=10 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +SchedulerPort=7321 +#SelectType=select/linear +FastSchedule=0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +JobAcctGatherType=jobacct_gather/none +# +# COMPUTE NODES +NodeName=DEFAULT State=UNKNOWN +NodeName=arvados-slurm-compute-63 CPUs=32 State=UNKNOWN RealMemory=58368 Weight=58368 TmpDisk=640000 +PartitionName=DEFAULT MaxTime=INFINITE State=UP + +NodeName=arvados-slurm-compute-[0-63] + +PartitionName=compute Nodes=arvados-slurm-compute-[0-63] Default=YES Shared=YES diff --git a/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml b/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml new file mode 100644 index 0000000..7d98e52 --- /dev/null +++ b/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml @@ -0,0 +1,15 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: crunch-dispatch-slurm-configmap + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ (tpl (.Files.Glob "config/crunch-dispatch-slurm/*").AsConfig . ) | indent 2 }} diff --git a/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml b/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml new file mode 100644 index 0000000..95304be --- /dev/null +++ b/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml @@ -0,0 +1,62 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "arvados-crunch-dispatch-slurm" + labels: + app: arvados-crunch-dispatch-slurm + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 1 + serviceName: arvados-crunch-dispatch-slurm + selector: + matchLabels: + app: arvados-crunch-dispatch-slurm + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-crunch-dispatch-slurm + release: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + hostname: arvados-crunch-dispatch-slurm + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && crunch-dispatch-slurm" + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + - name: crunch-dispatch-slurm-configmap + mountPath: /etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml + subPath: crunch-dispatch-slurm.yml + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + - name: crunch-dispatch-slurm-configmap + configMap: + name: crunch-dispatch-slurm-configmap + diff --git a/charts/arvados/templates/slurm-compute-deployment.yaml b/charts/arvados/templates/slurm-compute-deployment.yaml new file mode 100644 index 0000000..ad0edf4 --- /dev/null +++ b/charts/arvados/templates/slurm-compute-deployment.yaml @@ -0,0 +1,68 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: "arvados-slurm-compute" + labels: + app: arvados-slurm-compute + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 4 + serviceName: arvados-slurm-compute + selector: + matchLabels: + app: arvados-slurm-compute + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-slurm-compute + release: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + hostname: arvados-slurm-compute + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && /etc/init.d/docker start && slurmd -D" + securityContext: + privileged: true + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + - name: docker-graph-storage + mountPath: /var/lib/docker + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + - name: docker-graph-storage + emptyDir: {} + +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && chown munge /etc/munge/munge.key && chmod 0400 /etc/munge/munge.key && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm" +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm" +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && crunch-dispatch-slurm" + +# for slurm client, compute node +# - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && chown munge /etc/munge/munge.key; chmod 0400 /etc/munge/munge.key; sudo -u munge munged && slurmd -D" diff --git a/charts/arvados/templates/slurm-compute-service.yaml b/charts/arvados/templates/slurm-compute-service.yaml new file mode 100644 index 0000000..088db37 --- /dev/null +++ b/charts/arvados/templates/slurm-compute-service.yaml @@ -0,0 +1,20 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: arvados-slurm-compute + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ports: + - port: 6818 + protocol: TCP + clusterIP: None + selector: + app: arvados-slurm-compute diff --git a/charts/arvados/templates/slurm-configmap.yaml b/charts/arvados/templates/slurm-configmap.yaml new file mode 100644 index 0000000..65d7ef6 --- /dev/null +++ b/charts/arvados/templates/slurm-configmap.yaml @@ -0,0 +1,18 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: slurm-configmap + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ (tpl (.Files.Glob "config/slurm/slurm.conf").AsConfig . ) | indent 2 }} +binaryData: + munge.key: + {{ .Files.Get "config/slurm/munge.key" | b64enc }} diff --git a/charts/arvados/templates/slurm-controller-deployment.yaml b/charts/arvados/templates/slurm-controller-deployment.yaml new file mode 100644 index 0000000..3443421 --- /dev/null +++ b/charts/arvados/templates/slurm-controller-deployment.yaml @@ -0,0 +1,59 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: "arvados-slurm-controller" + labels: + app: arvados-slurm-controller + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 1 + serviceName: arvados-slurm-controller + selector: + matchLabels: + app: arvados-slurm-controller + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-slurm-controller + release: {{ .Release.Name }} + spec: + dnsConfig: + searches: + - "arvados-slurm-compute.default.svc.cluster.local" + containers: + - name: {{ .Chart.Name }} + hostname: arvados-slurm-controller + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && slurmctld -D" + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + diff --git a/charts/arvados/templates/slurm-controller-service.yaml b/charts/arvados/templates/slurm-controller-service.yaml new file mode 100644 index 0000000..955a508 --- /dev/null +++ b/charts/arvados/templates/slurm-controller-service.yaml @@ -0,0 +1,19 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: arvados-slurm-controller-0 + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ports: + - port: 6817 + protocol: TCP + selector: + app: arvados-slurm-controller diff --git a/charts/arvados/values.yaml b/charts/arvados/values.yaml index eddd0ef..3c00b94 100644 --- a/charts/arvados/values.yaml +++ b/charts/arvados/values.yaml @@ -70,7 +70,7 @@ arvados: arvadosWorkbench: 1.1.4.20180521175419* arvadosWs: 1.1.4.20180516205929* arvadosSSOServer: 0.1.20171122141118.ba584a7* - crunchDispatchSlurm: 1.1.4.20180516205929* + crunchDispatchSlurm: 1.1.4.20180524153119* crunchRun: 1.1.4.20180516205929* crunchRunner: 1.1.4.20180516205929* keepBalance: 1.1.4.20180516205929*