From dfbdd9f483f65051cf1835af18d4b7c29a687b6d Mon Sep 17 00:00:00 2001 From: Ward Vandewege Date: Tue, 5 Jun 2018 13:55:31 -0400 Subject: [PATCH] * Add a basic Slurm config * Add crunch-dispatch-slurm No issue # Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- .licenseignore | 2 + .../arvados/config/api-server/application.yml | 2 +- .../crunch-dispatch-slurm.yml | 12 ++++ charts/arvados/config/slurm/munge.key | Bin 0 -> 1024 bytes charts/arvados/config/slurm/slurm.conf | 49 +++++++++++++ .../crunch-dispatch-slurm-configmap.yaml | 15 ++++ .../crunch-dispatch-slurm-deployment.yaml | 62 ++++++++++++++++ .../templates/slurm-compute-deployment.yaml | 68 ++++++++++++++++++ .../templates/slurm-compute-service.yaml | 20 ++++++ charts/arvados/templates/slurm-configmap.yaml | 18 +++++ .../slurm-controller-deployment.yaml | 59 +++++++++++++++ .../templates/slurm-controller-service.yaml | 19 +++++ charts/arvados/values.yaml | 2 +- 13 files changed, 326 insertions(+), 2 deletions(-) create mode 100644 charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml create mode 100644 charts/arvados/config/slurm/munge.key create mode 100644 charts/arvados/config/slurm/slurm.conf create mode 100644 charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml create mode 100644 charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml create mode 100644 charts/arvados/templates/slurm-compute-deployment.yaml create mode 100644 charts/arvados/templates/slurm-compute-service.yaml create mode 100644 charts/arvados/templates/slurm-configmap.yaml create mode 100644 charts/arvados/templates/slurm-controller-deployment.yaml create mode 100644 charts/arvados/templates/slurm-controller-service.yaml diff --git a/.licenseignore b/.licenseignore index 3712913..71c0386 100644 --- a/.licenseignore +++ b/.licenseignore @@ -9,3 +9,5 @@ charts/arvados/config/ssl/cert charts/arvados/config/ssl/key charts/arvados/templates/NOTES.txt charts/arvados/templates/_helpers.tpl +charts/arvados/config/slurm/slurm.conf +charts/arvados/config/slurm/munge.key diff --git a/charts/arvados/config/api-server/application.yml b/charts/arvados/config/api-server/application.yml index 385c64c..1d24522 100644 --- a/charts/arvados/config/api-server/application.yml +++ b/charts/arvados/config/api-server/application.yml @@ -304,7 +304,7 @@ common: docker_image_formats: ["v2"] # :none or :slurm_immediate - crunch_job_wrapper: :none + crunch_job_wrapper: :slurm_immediate # username, or false = do not set uid when running jobs. crunch_job_user: crunch diff --git a/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml b/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml new file mode 100644 index 0000000..754a905 --- /dev/null +++ b/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +Client: + APIHost: {{ .Values.externalIP }}:444 + AuthToken: {{ .Values.superUserSecret }} + Insecure: true +PollPeriod: 5000ms +MinRetryPeriod: 30s +SbatchArguments: +- "--workdir=/tmp" diff --git a/charts/arvados/config/slurm/munge.key b/charts/arvados/config/slurm/munge.key new file mode 100644 index 0000000000000000000000000000000000000000..33019a530c4ca6667ba11bbda1349f2c390d8122 GIT binary patch literal 1024 zcmV+b1poWnPnYbUw#xH#I06c0V7emh)UubT+Dq}b)sAIWIV9iKn8*VYp+w3>aN1@j zpx6@UryM~&e1zZ7Q7?5aN<<_-`y5qmjDb29Z`3F^?do!fO8Wi32f>rrf4&Qof9<-I zavj=Vu&mM*fVZwIx@9=l`wNTl47Qdmx#Z7lQa!ed>SM9m8qB{4Cdb1c-`fAUPQ}2mS9{z?N zA_^vD2maXF4o^dr62I1lQss`5#x9J=M4AGgN{7;#AV1kvw%7QZamY_YW5IL--?Q{z z<(sSO21voS;Y6uFGtNgA@Ph-E3vLf?cH0!MQ1zw2x~(gkU|_&~|DPwl2oHuv4p+{m z;4LKMjvCxa^^u@Oup$rgRf&T_hSlJMgbHr`n;40XLLqxybMdLp_hl&Q1~zX?!(YSC znBb64Sq@nsgd>chpU;8MAg&vs?))q;oeVt;R@{In%aqRc!CFN>wGxd3Q#&6I9vw_H z<4gKKCKQj9{4RR~B1&ecc=r(XX_`U1&-(Q94XEbC;lB~7=Xjr{NUD-f+#9VR$d?lc zF>i(7y=OahIsFfSK+zi4eBtnKR|8zFkT-3dNsIgk#jxKkOSMdv zf}F?4%ZKkXLa+g0X(uwCm(#|4-qnZSASE83^>TxZO*W|bpi1KoqWgKnk|zIQ zZKC#~uj*S3j8R{kZ{cdniZtQRQ6BY@qy`a0yumg!@V&S=1~S5X=nw`}A@;K_fS6EH z+4MI(AQohLPcEc?s|=6xvnTlvEuBl}7_4Y6H3P|#uQC8~FuK|BEv=@Sh_zpf9fp}S zr_|Nl92<4R_7!MDqpn@Qt52+0s(;==gS=nGc> literal 0 HcmV?d00001 diff --git a/charts/arvados/config/slurm/slurm.conf b/charts/arvados/config/slurm/slurm.conf new file mode 100644 index 0000000..6a54969 --- /dev/null +++ b/charts/arvados/config/slurm/slurm.conf @@ -0,0 +1,49 @@ +ControlMachine=arvados-slurm-controller-0 +SlurmctldPort=6817 +SlurmdPort=6818 +SrunPortRange=60001-63000 +AuthType=auth/munge +StateSaveLocation=/var/slurm +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +CacheGroups=0 +ReturnToService=2 +TaskPlugin=task/affinity +TreeWidth=64 +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MessageTimeout=10 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +SchedulerPort=7321 +#SelectType=select/linear +FastSchedule=0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +JobAcctGatherType=jobacct_gather/none +# +# COMPUTE NODES +NodeName=DEFAULT State=UNKNOWN +NodeName=arvados-slurm-compute-63 CPUs=32 State=UNKNOWN RealMemory=58368 Weight=58368 TmpDisk=640000 +PartitionName=DEFAULT MaxTime=INFINITE State=UP + +NodeName=arvados-slurm-compute-[0-63] + +PartitionName=compute Nodes=arvados-slurm-compute-[0-63] Default=YES Shared=YES diff --git a/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml b/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml new file mode 100644 index 0000000..7d98e52 --- /dev/null +++ b/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml @@ -0,0 +1,15 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: crunch-dispatch-slurm-configmap + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ (tpl (.Files.Glob "config/crunch-dispatch-slurm/*").AsConfig . ) | indent 2 }} diff --git a/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml b/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml new file mode 100644 index 0000000..95304be --- /dev/null +++ b/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml @@ -0,0 +1,62 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "arvados-crunch-dispatch-slurm" + labels: + app: arvados-crunch-dispatch-slurm + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 1 + serviceName: arvados-crunch-dispatch-slurm + selector: + matchLabels: + app: arvados-crunch-dispatch-slurm + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-crunch-dispatch-slurm + release: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + hostname: arvados-crunch-dispatch-slurm + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && crunch-dispatch-slurm" + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + - name: crunch-dispatch-slurm-configmap + mountPath: /etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml + subPath: crunch-dispatch-slurm.yml + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + - name: crunch-dispatch-slurm-configmap + configMap: + name: crunch-dispatch-slurm-configmap + diff --git a/charts/arvados/templates/slurm-compute-deployment.yaml b/charts/arvados/templates/slurm-compute-deployment.yaml new file mode 100644 index 0000000..ad0edf4 --- /dev/null +++ b/charts/arvados/templates/slurm-compute-deployment.yaml @@ -0,0 +1,68 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: "arvados-slurm-compute" + labels: + app: arvados-slurm-compute + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 4 + serviceName: arvados-slurm-compute + selector: + matchLabels: + app: arvados-slurm-compute + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-slurm-compute + release: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + hostname: arvados-slurm-compute + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && /etc/init.d/docker start && slurmd -D" + securityContext: + privileged: true + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + - name: docker-graph-storage + mountPath: /var/lib/docker + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + - name: docker-graph-storage + emptyDir: {} + +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && chown munge /etc/munge/munge.key && chmod 0400 /etc/munge/munge.key && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm" +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm" +# - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && crunch-dispatch-slurm" + +# for slurm client, compute node +# - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && chown munge /etc/munge/munge.key; chmod 0400 /etc/munge/munge.key; sudo -u munge munged && slurmd -D" diff --git a/charts/arvados/templates/slurm-compute-service.yaml b/charts/arvados/templates/slurm-compute-service.yaml new file mode 100644 index 0000000..088db37 --- /dev/null +++ b/charts/arvados/templates/slurm-compute-service.yaml @@ -0,0 +1,20 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: arvados-slurm-compute + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ports: + - port: 6818 + protocol: TCP + clusterIP: None + selector: + app: arvados-slurm-compute diff --git a/charts/arvados/templates/slurm-configmap.yaml b/charts/arvados/templates/slurm-configmap.yaml new file mode 100644 index 0000000..65d7ef6 --- /dev/null +++ b/charts/arvados/templates/slurm-configmap.yaml @@ -0,0 +1,18 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: slurm-configmap + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ (tpl (.Files.Glob "config/slurm/slurm.conf").AsConfig . ) | indent 2 }} +binaryData: + munge.key: + {{ .Files.Get "config/slurm/munge.key" | b64enc }} diff --git a/charts/arvados/templates/slurm-controller-deployment.yaml b/charts/arvados/templates/slurm-controller-deployment.yaml new file mode 100644 index 0000000..3443421 --- /dev/null +++ b/charts/arvados/templates/slurm-controller-deployment.yaml @@ -0,0 +1,59 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: "arvados-slurm-controller" + labels: + app: arvados-slurm-controller + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 1 + serviceName: arvados-slurm-controller + selector: + matchLabels: + app: arvados-slurm-controller + release: {{ .Release.Name }} + template: + metadata: + labels: + app: arvados-slurm-controller + release: {{ .Release.Name }} + spec: + dnsConfig: + searches: + - "arvados-slurm-compute.default.svc.cluster.local" + containers: + - name: {{ .Chart.Name }} + hostname: arvados-slurm-controller + image: "cure/arvados-slurm-runtime" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "sh" + - "-c" + - "mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && slurmctld -D" + env: + - name : ARVADOS_API_HOST + value: "{{ .Values.externalIP }}:444" + - name : ARVADOS_API_HOST_INSECURE + value: "true" + - name : ARVADOS_API_TOKEN + value: "{{ .Values.superUserSecret }}" + volumeMounts: + - name: slurm-configmap + mountPath: /etc/slurm-llnl/slurm.conf + subPath: slurm.conf + - name: slurm-configmap + mountPath: /etc/munge/munge.key + subPath: munge.key + + volumes: + - name: slurm-configmap + configMap: + name: slurm-configmap + defaultMode: 0400 + diff --git a/charts/arvados/templates/slurm-controller-service.yaml b/charts/arvados/templates/slurm-controller-service.yaml new file mode 100644 index 0000000..955a508 --- /dev/null +++ b/charts/arvados/templates/slurm-controller-service.yaml @@ -0,0 +1,19 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: arvados-slurm-controller-0 + labels: + app: {{ template "arvados.name" . }} + chart: {{ template "arvados.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ports: + - port: 6817 + protocol: TCP + selector: + app: arvados-slurm-controller diff --git a/charts/arvados/values.yaml b/charts/arvados/values.yaml index eddd0ef..3c00b94 100644 --- a/charts/arvados/values.yaml +++ b/charts/arvados/values.yaml @@ -70,7 +70,7 @@ arvados: arvadosWorkbench: 1.1.4.20180521175419* arvadosWs: 1.1.4.20180516205929* arvadosSSOServer: 0.1.20171122141118.ba584a7* - crunchDispatchSlurm: 1.1.4.20180516205929* + crunchDispatchSlurm: 1.1.4.20180524153119* crunchRun: 1.1.4.20180516205929* crunchRunner: 1.1.4.20180516205929* keepBalance: 1.1.4.20180516205929* -- 2.30.2