* Add a basic Slurm config
authorWard Vandewege <wvandewege@veritasgenetics.com>
Tue, 5 Jun 2018 17:55:31 +0000 (13:55 -0400)
committerWard Vandewege <wvandewege@veritasgenetics.com>
Tue, 12 Jun 2018 19:10:42 +0000 (15:10 -0400)
* Add crunch-dispatch-slurm

No issue #

Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <wvandewege@veritasgenetics.com>

13 files changed:
.licenseignore
charts/arvados/config/api-server/application.yml
charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml [new file with mode: 0644]
charts/arvados/config/slurm/munge.key [new file with mode: 0644]
charts/arvados/config/slurm/slurm.conf [new file with mode: 0644]
charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml [new file with mode: 0644]
charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml [new file with mode: 0644]
charts/arvados/templates/slurm-compute-deployment.yaml [new file with mode: 0644]
charts/arvados/templates/slurm-compute-service.yaml [new file with mode: 0644]
charts/arvados/templates/slurm-configmap.yaml [new file with mode: 0644]
charts/arvados/templates/slurm-controller-deployment.yaml [new file with mode: 0644]
charts/arvados/templates/slurm-controller-service.yaml [new file with mode: 0644]
charts/arvados/values.yaml

index 371291376b41c717d9de9428d9b60693ac4a1271..71c03866281edda0c1934b7328404bdde7c23cc5 100644 (file)
@@ -9,3 +9,5 @@ charts/arvados/config/ssl/cert
 charts/arvados/config/ssl/key
 charts/arvados/templates/NOTES.txt
 charts/arvados/templates/_helpers.tpl
 charts/arvados/config/ssl/key
 charts/arvados/templates/NOTES.txt
 charts/arvados/templates/_helpers.tpl
+charts/arvados/config/slurm/slurm.conf
+charts/arvados/config/slurm/munge.key
index 385c64c7ec51b6a98eeae191915fb9a7b57b9266..1d2452237e91808356f4fd3bfbdf0af483a00f45 100644 (file)
@@ -304,7 +304,7 @@ common:
   docker_image_formats: ["v2"]
 
   # :none or :slurm_immediate
   docker_image_formats: ["v2"]
 
   # :none or :slurm_immediate
-  crunch_job_wrapper: :none
+  crunch_job_wrapper: :slurm_immediate
 
   # username, or false = do not set uid when running jobs.
   crunch_job_user: crunch
 
   # username, or false = do not set uid when running jobs.
   crunch_job_user: crunch
diff --git a/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml b/charts/arvados/config/crunch-dispatch-slurm/crunch-dispatch-slurm.yml
new file mode 100644 (file)
index 0000000..754a905
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+Client:
+  APIHost: {{ .Values.externalIP }}:444
+  AuthToken: {{ .Values.superUserSecret }}
+  Insecure: true
+PollPeriod: 5000ms
+MinRetryPeriod: 30s
+SbatchArguments:
+- "--workdir=/tmp"
diff --git a/charts/arvados/config/slurm/munge.key b/charts/arvados/config/slurm/munge.key
new file mode 100644 (file)
index 0000000..33019a5
Binary files /dev/null and b/charts/arvados/config/slurm/munge.key differ
diff --git a/charts/arvados/config/slurm/slurm.conf b/charts/arvados/config/slurm/slurm.conf
new file mode 100644 (file)
index 0000000..6a54969
--- /dev/null
@@ -0,0 +1,49 @@
+ControlMachine=arvados-slurm-controller-0
+SlurmctldPort=6817
+SlurmdPort=6818
+SrunPortRange=60001-63000
+AuthType=auth/munge
+StateSaveLocation=/var/slurm
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+TreeWidth=64
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MessageTimeout=10
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+SchedulerPort=7321
+#SelectType=select/linear
+FastSchedule=0
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#
+# COMPUTE NODES
+NodeName=DEFAULT State=UNKNOWN
+NodeName=arvados-slurm-compute-63 CPUs=32 State=UNKNOWN RealMemory=58368 Weight=58368 TmpDisk=640000
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+
+NodeName=arvados-slurm-compute-[0-63]
+
+PartitionName=compute Nodes=arvados-slurm-compute-[0-63] Default=YES Shared=YES
diff --git a/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml b/charts/arvados/templates/crunch-dispatch-slurm-configmap.yaml
new file mode 100644 (file)
index 0000000..7d98e52
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: crunch-dispatch-slurm-configmap
+  labels:
+    app: {{ template "arvados.name" . }}
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+data:
+{{ (tpl (.Files.Glob "config/crunch-dispatch-slurm/*").AsConfig . ) | indent 2 }}
diff --git a/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml b/charts/arvados/templates/crunch-dispatch-slurm-deployment.yaml
new file mode 100644 (file)
index 0000000..95304be
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: "arvados-crunch-dispatch-slurm"
+  labels:
+    app: arvados-crunch-dispatch-slurm
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: 1
+  serviceName: arvados-crunch-dispatch-slurm
+  selector:
+    matchLabels:
+      app: arvados-crunch-dispatch-slurm
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: arvados-crunch-dispatch-slurm
+        release: {{ .Release.Name }}
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          hostname: arvados-crunch-dispatch-slurm
+          image: "cure/arvados-slurm-runtime"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command:
+            - "sh"
+            - "-c"
+            - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && crunch-dispatch-slurm"
+          env:
+            - name : ARVADOS_API_HOST
+              value: "{{ .Values.externalIP }}:444"
+            - name : ARVADOS_API_HOST_INSECURE
+              value: "true"
+            - name : ARVADOS_API_TOKEN
+              value: "{{ .Values.superUserSecret }}"
+          volumeMounts:
+            - name: slurm-configmap
+              mountPath: /etc/slurm-llnl/slurm.conf
+              subPath: slurm.conf
+            - name: slurm-configmap
+              mountPath: /etc/munge/munge.key
+              subPath: munge.key
+            - name: crunch-dispatch-slurm-configmap
+              mountPath: /etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml
+              subPath: crunch-dispatch-slurm.yml
+
+      volumes:
+        - name: slurm-configmap
+          configMap:
+            name: slurm-configmap
+            defaultMode: 0400
+        - name: crunch-dispatch-slurm-configmap
+          configMap:
+            name: crunch-dispatch-slurm-configmap
+
diff --git a/charts/arvados/templates/slurm-compute-deployment.yaml b/charts/arvados/templates/slurm-compute-deployment.yaml
new file mode 100644 (file)
index 0000000..ad0edf4
--- /dev/null
@@ -0,0 +1,68 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: "arvados-slurm-compute"
+  labels:
+    app: arvados-slurm-compute
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: 4
+  serviceName: arvados-slurm-compute
+  selector:
+    matchLabels:
+      app: arvados-slurm-compute
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: arvados-slurm-compute
+        release: {{ .Release.Name }}
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          hostname: arvados-slurm-compute
+          image: "cure/arvados-slurm-runtime"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command:
+            - "sh"
+            - "-c"
+            - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && /etc/init.d/docker start && slurmd -D"
+          securityContext:
+            privileged: true
+          env:
+            - name : ARVADOS_API_HOST
+              value: "{{ .Values.externalIP }}:444"
+            - name : ARVADOS_API_HOST_INSECURE
+              value: "true"
+            - name : ARVADOS_API_TOKEN
+              value: "{{ .Values.superUserSecret }}"
+          volumeMounts:
+            - name: slurm-configmap
+              mountPath: /etc/slurm-llnl/slurm.conf
+              subPath: slurm.conf
+            - name: slurm-configmap
+              mountPath: /etc/munge/munge.key
+              subPath: munge.key
+            - name: docker-graph-storage
+              mountPath: /var/lib/docker
+
+      volumes:
+        - name: slurm-configmap
+          configMap:
+            name: slurm-configmap
+            defaultMode: 0400
+        - name: docker-graph-storage
+          emptyDir: {}
+
+#            - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && chown munge /etc/munge/munge.key && chmod 0400 /etc/munge/munge.key && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm"
+#            - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && sudo -u munge munged && slurmctld -D && crunch-dispatch-slurm"
+#            - "/usr/local/bin/bootstrap.sh crunch-dispatch-slurm={{ .Values.arvados.versions.distribution.crunchDispatchSlurm }} && crunch-dispatch-slurm"
+
+# for slurm client, compute node
+#            - "/usr/local/bin/bootstrap.sh python-arvados-python-client={{ .Values.arvados.versions.distribution.pythonArvadosPythonClient }} crunch-run={{ .Values.arvados.versions.distribution.crunchRun }} python-arvados-fuse={{ .Values.arvados.versions.distribution.pythonArvadosFuse }} && chown munge /etc/munge/munge.key; chmod 0400 /etc/munge/munge.key; sudo -u munge munged && slurmd -D"
diff --git a/charts/arvados/templates/slurm-compute-service.yaml b/charts/arvados/templates/slurm-compute-service.yaml
new file mode 100644 (file)
index 0000000..088db37
--- /dev/null
@@ -0,0 +1,20 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: arvados-slurm-compute
+  labels:
+    app: {{ template "arvados.name" . }}
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  ports:
+  - port: 6818
+    protocol: TCP
+  clusterIP: None
+  selector:
+    app: arvados-slurm-compute
diff --git a/charts/arvados/templates/slurm-configmap.yaml b/charts/arvados/templates/slurm-configmap.yaml
new file mode 100644 (file)
index 0000000..65d7ef6
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: slurm-configmap
+  labels:
+    app: {{ template "arvados.name" . }}
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+data:
+{{ (tpl (.Files.Glob "config/slurm/slurm.conf").AsConfig . ) | indent 2 }}
+binaryData:
+  munge.key:
+    {{ .Files.Get "config/slurm/munge.key" | b64enc }}
diff --git a/charts/arvados/templates/slurm-controller-deployment.yaml b/charts/arvados/templates/slurm-controller-deployment.yaml
new file mode 100644 (file)
index 0000000..3443421
--- /dev/null
@@ -0,0 +1,59 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: "arvados-slurm-controller"
+  labels:
+    app: arvados-slurm-controller
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: 1
+  serviceName: arvados-slurm-controller
+  selector:
+    matchLabels:
+      app: arvados-slurm-controller
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: arvados-slurm-controller
+        release: {{ .Release.Name }}
+    spec:
+      dnsConfig:
+        searches:
+          - "arvados-slurm-compute.default.svc.cluster.local"
+      containers:
+        - name: {{ .Chart.Name }}
+          hostname: arvados-slurm-controller
+          image: "cure/arvados-slurm-runtime"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command:
+            - "sh"
+            - "-c"
+            - "mkdir /munge && cp -p /etc/munge/munge.key /munge && chown munge:munge /munge/munge.key && mkdir /var/slurm && chmod 700 /var/slurm && sudo -u munge munged --key-file=/munge/munge.key && slurmctld -D"
+          env:
+            - name : ARVADOS_API_HOST
+              value: "{{ .Values.externalIP }}:444"
+            - name : ARVADOS_API_HOST_INSECURE
+              value: "true"
+            - name : ARVADOS_API_TOKEN
+              value: "{{ .Values.superUserSecret }}"
+          volumeMounts:
+            - name: slurm-configmap
+              mountPath: /etc/slurm-llnl/slurm.conf
+              subPath: slurm.conf
+            - name: slurm-configmap
+              mountPath: /etc/munge/munge.key
+              subPath: munge.key
+
+      volumes:
+        - name: slurm-configmap
+          configMap:
+            name: slurm-configmap
+            defaultMode: 0400
+
diff --git a/charts/arvados/templates/slurm-controller-service.yaml b/charts/arvados/templates/slurm-controller-service.yaml
new file mode 100644 (file)
index 0000000..955a508
--- /dev/null
@@ -0,0 +1,19 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: arvados-slurm-controller-0
+  labels:
+    app: {{ template "arvados.name" . }}
+    chart: {{ template "arvados.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  ports:
+  - port: 6817
+    protocol: TCP
+  selector:
+    app: arvados-slurm-controller
index eddd0ef15e1bd56391aace800a67e04e4f5be87a..3c00b942792960414facfa06e1bc014f927657ec 100644 (file)
@@ -70,7 +70,7 @@ arvados:
       arvadosWorkbench: 1.1.4.20180521175419*
       arvadosWs: 1.1.4.20180516205929*
       arvadosSSOServer: 0.1.20171122141118.ba584a7*
       arvadosWorkbench: 1.1.4.20180521175419*
       arvadosWs: 1.1.4.20180516205929*
       arvadosSSOServer: 0.1.20171122141118.ba584a7*
-      crunchDispatchSlurm: 1.1.4.20180516205929*
+      crunchDispatchSlurm: 1.1.4.20180524153119*
       crunchRun: 1.1.4.20180516205929*
       crunchRunner: 1.1.4.20180516205929*
       keepBalance: 1.1.4.20180516205929*
       crunchRun: 1.1.4.20180516205929*
       crunchRunner: 1.1.4.20180516205929*
       keepBalance: 1.1.4.20180516205929*