Merge branch '20489-iam-policy-fix'. Closes #20489
[arvados.git] / tools / salt-install / provision.sh
index 7a60363d696306fa1073e66ce2dc39a9ccda8808..4f044c42e9e2c323ed0289ed4bd947f9ee46546e 100755 (executable)
@@ -32,6 +32,7 @@ usage() {
   echo >&2 "                                                keepbalance"
   echo >&2 "                                                keepstore"
   echo >&2 "                                                keepweb"
+  echo >&2 "                                                monitoring"
   echo >&2 "                                                shell"
   echo >&2 "                                                webshell"
   echo >&2 "                                                websocket"
@@ -108,7 +109,7 @@ arguments() {
         for i in ${2//,/ }
           do
             # Verify the role exists
-            if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepbalance,keepproxy,shell,workbench,dispatcher" == *"$i"* ]]; then
+            if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepbalance,keepproxy,shell,workbench,dispatcher,monitoring" == *"$i"* ]]; then
               echo "The role '${i}' is not a valid role"
               usage
               exit 1
@@ -141,19 +142,16 @@ copy_custom_cert() {
   cert_dir=${1}
   cert_name=${2}
 
-  mkdir -p /srv/salt/certs
-  chmod 700 /srv/salt/certs
+  mkdir -p --mode=0700 /srv/salt/certs
 
   if [ -f ${cert_dir}/${cert_name}.crt ]; then
-    cp -v ${cert_dir}/${cert_name}.crt /srv/salt/certs/arvados-${cert_name}.pem
-    chmod 600 /srv/salt/certs/arvados-${cert_name}.pem
+    install --mode=0600 ${cert_dir}/${cert_name}.crt /srv/salt/certs/arvados-${cert_name}.pem
   else
     echo "${cert_dir}/${cert_name}.crt does not exist. Exiting"
     exit 1
   fi
   if [ -f ${cert_dir}/${cert_name}.key ]; then
-    cp -v ${cert_dir}/${cert_name}.key /srv/salt/certs/arvados-${cert_name}.key
-    chmod 600 /srv/salt/certs/arvados-${cert_name}.key
+    install --mode=0600 ${cert_dir}/${cert_name}.key /srv/salt/certs/arvados-${cert_name}.key
   else
     echo "${cert_dir}/${cert_name}.key does not exist. Exiting"
     exit 1
@@ -194,6 +192,8 @@ SSL_MODE="self-signed"
 USE_LETSENCRYPT_ROUTE53="no"
 CUSTOM_CERTS_DIR="${SCRIPT_DIR}/local_config_dir/certs"
 
+GRAFANA_DASHBOARDS_DIR="${SCRIPT_DIR}/local_config_dir/dashboards"
+
 ## These are ARVADOS-related parameters
 # For a stable release, change RELEASE "production" and VERSION to the
 # package version (including the iteration, e.g. X.Y.Z-1) of the
@@ -223,6 +223,8 @@ DOCKER_TAG="v2.4.2"
 LOCALE_TAG="v0.3.4"
 LETSENCRYPT_TAG="v2.1.0"
 LOGROTATE_TAG="v0.14.0"
+PROMETHEUS_TAG="v5.6.5"
+GRAFANA_TAG="v3.1.3"
 
 # Salt's dir
 DUMP_SALT_CONFIG_DIR=""
@@ -361,6 +363,16 @@ test -d postgres && ( cd postgres && git fetch ) \
   || git clone --quiet ${POSTGRES_URL} ${F_DIR}/postgres
 ( cd postgres && git checkout --quiet tags/"${POSTGRES_TAG}" )
 
+echo "...prometheus"
+test -d prometheus && ( cd prometheus && git fetch ) \
+  || git clone --quiet https://github.com/saltstack-formulas/prometheus-formula.git ${F_DIR}/prometheus
+( cd prometheus && git checkout --quiet tags/"${PROMETHEUS_TAG}" )
+
+echo "...grafana"
+test -d grafana && ( cd grafana && git fetch ) \
+  || git clone --quiet https://github.com/saltstack-formulas/grafana-formula.git ${F_DIR}/grafana
+( cd grafana && git checkout --quiet "${GRAFANA_TAG}" )
+
 echo "...letsencrypt"
 test -d letsencrypt && ( cd letsencrypt && git fetch ) \
   || git clone --quiet https://github.com/saltstack-formulas/letsencrypt-formula.git ${F_DIR}/letsencrypt
@@ -431,7 +443,6 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
        s#__WEBSOCKET_INT_IP__#${WEBSOCKET_INT_IP}#g;
        s#__KEEP_INT_IP__#${KEEP_INT_IP}#g;
        s#__KEEPSTORE0_INT_IP__#${KEEPSTORE0_INT_IP}#g;
-       s#__KEEPSTORE1_INT_IP__#${KEEPSTORE1_INT_IP}#g;
        s#__KEEPWEB_INT_IP__#${KEEPWEB_INT_IP}#g;
        s#__WEBSHELL_INT_IP__#${WEBSHELL_INT_IP}#g;
        s#__SHELL_INT_IP__#${SHELL_INT_IP}#g;
@@ -440,7 +451,13 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
        s#__DATABASE_INT_IP__#${DATABASE_INT_IP}#g;
        s#__WORKBENCH_SECRET_KEY__#${WORKBENCH_SECRET_KEY}#g;
        s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
-       s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g" \
+       s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
+       s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
+       s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS}#g;
+       s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS}#g;
+       s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
+       s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
+       s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g" \
   "${f}" > "${P_DIR}"/$(basename "${f}")
 done
 
@@ -476,6 +493,7 @@ fi
 # Replace helper state files that differ from the formula's examples
 if [ -d "${SOURCE_STATES_DIR}" ]; then
   mkdir -p "${F_DIR}"/extra/extra
+  rm -f "${F_DIR}"/extra/extra/*
 
   for f in $(ls "${SOURCE_STATES_DIR}"/*); do
     sed "s#__ANONYMOUS_USER_TOKEN__#${ANONYMOUS_USER_TOKEN}#g;
@@ -500,7 +518,6 @@ if [ -d "${SOURCE_STATES_DIR}" ]; then
          s#__WEBSOCKET_INT_IP__#${WEBSOCKET_INT_IP}#g;
          s#__KEEP_INT_IP__#${KEEP_INT_IP}#g;
          s#__KEEPSTORE0_INT_IP__#${KEEPSTORE0_INT_IP}#g;
-         s#__KEEPSTORE1_INT_IP__#${KEEPSTORE1_INT_IP}#g;
          s#__KEEPWEB_INT_IP__#${KEEPWEB_INT_IP}#g;
          s#__WEBSHELL_INT_IP__#${WEBSHELL_INT_IP}#g;
          s#__WORKBENCH1_INT_IP__#${WORKBENCH1_INT_IP}#g;
@@ -513,7 +530,13 @@ if [ -d "${SOURCE_STATES_DIR}" ]; then
          s#__WORKBENCH2_EXT_SSL_PORT__#${WORKBENCH2_EXT_SSL_PORT}#g;
          s#__WORKBENCH_SECRET_KEY__#${WORKBENCH_SECRET_KEY}#g;
          s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
-         s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g" \
+         s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
+         s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
+         s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS}#g;
+         s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS}#g;
+         s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
+         s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
+         s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g" \
     "${f}" > "${F_DIR}/extra/extra"/$(basename "${f}")
   done
 fi
@@ -567,12 +590,10 @@ if [ -z "${ROLES}" ]; then
     fi
     grep -q "letsencrypt" ${S_DIR}/top.sls || echo "    - letsencrypt" >> ${S_DIR}/top.sls
   else
-    mkdir -p /srv/salt/certs
-    chmod 700 /srv/salt/certs
+    mkdir -p --mode=0700 /srv/salt/certs
     if [ "${SSL_MODE}" = "bring-your-own" ]; then
       # Copy certs to formula extra/files
-      cp -rv ${CUSTOM_CERTS_DIR}/* /srv/salt/certs/
-      chmod 600 /srv/salt/certs/*
+      install --mode=0600 ${CUSTOM_CERTS_DIR}/* /srv/salt/certs/
       # We add the custom_certs state
       grep -q "custom_certs" ${S_DIR}/top.sls || echo "    - extra.custom_certs" >> ${S_DIR}/top.sls
       if [ "${SSL_KEY_ENCRYPTED}" = "yes" ]; then
@@ -686,13 +707,80 @@ else
     grep -q "extra_custom_certs" ${P_DIR}/top.sls || echo "    - extra_custom_certs" >> ${P_DIR}/top.sls
   fi
 
+  # Prometheus state on all nodes due to the node exporter below
+  grep -q "\- prometheus$" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
+  # Prometheus node exporter pillar
+  grep -q "prometheus_node_exporter" ${P_DIR}/top.sls || echo "    - prometheus_node_exporter" >> ${P_DIR}/top.sls
+
   for R in ${ROLES}; do
     case "${R}" in
       "database")
         # States
-        echo "    - postgres" >> ${S_DIR}/top.sls
+        grep -q "\- postgres$" ${S_DIR}/top.sls || echo "    - postgres" >> ${S_DIR}/top.sls
+        grep -q "extra.prometheus_pg_exporter" ${S_DIR}/top.sls || echo "    - extra.prometheus_pg_exporter" >> ${S_DIR}/top.sls
         # Pillars
-        echo '    - postgresql' >> ${P_DIR}/top.sls
+        grep -q "postgresql" ${P_DIR}/top.sls || echo "    - postgresql" >> ${P_DIR}/top.sls
+        grep -q "prometheus_pg_exporter" ${P_DIR}/top.sls || echo "    - prometheus_pg_exporter" >> ${P_DIR}/top.sls
+      ;;
+      "monitoring")
+        ### Support files ###
+        GRAFANA_DASHBOARDS_DEST_DIR=/srv/salt/dashboards
+        mkdir -p "${GRAFANA_DASHBOARDS_DEST_DIR}"
+        rm -f "${GRAFANA_DASHBOARDS_DEST_DIR}"/*
+        # "ArvadosPromDataSource" is the hardcoded UID for Prometheus' datasource
+        # in Grafana.
+        for f in $(ls "${GRAFANA_DASHBOARDS_DIR}"/*.json); do
+          sed 's#${DS_PROMETHEUS}#ArvadosPromDataSource#g' \
+          "${f}" > "${GRAFANA_DASHBOARDS_DEST_DIR}"/$(basename "${f}")
+        done
+
+        ### States ###
+        grep -q "\- nginx$" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
+        grep -q "extra.nginx_prometheus_configuration" ${S_DIR}/top.sls || echo "    - extra.nginx_prometheus_configuration" >> ${S_DIR}/top.sls
+
+        grep -q "\- grafana$" ${S_DIR}/top.sls || echo "    - grafana" >> ${S_DIR}/top.sls
+        grep -q "extra.grafana_datasource" ${S_DIR}/top.sls || echo "    - extra.grafana_datasource" >> ${S_DIR}/top.sls
+        grep -q "extra.grafana_dashboards" ${S_DIR}/top.sls || echo "    - extra.grafana_dashboards" >> ${S_DIR}/top.sls
+        grep -q "extra.grafana_admin_user" ${S_DIR}/top.sls || echo "    - extra.grafana_admin_user" >> ${S_DIR}/top.sls
+
+        if [ "${SSL_MODE}" = "lets-encrypt" ]; then
+          grep -q "letsencrypt"     ${S_DIR}/top.sls || echo "    - letsencrypt" >> ${S_DIR}/top.sls
+          if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then
+            grep -q "aws_credentials" ${S_DIR}/top.sls || echo "    - aws_credentials" >> ${S_DIR}/top.sls
+          fi
+        elif [ "${SSL_MODE}" = "bring-your-own" ]; then
+          for SVC in grafana prometheus; do
+            copy_custom_cert ${CUSTOM_CERTS_DIR} ${SVC}
+          done
+        fi
+        ### Pillars ###
+        grep -q "prometheus_server" ${P_DIR}/top.sls || echo "    - prometheus_server" >> ${P_DIR}/top.sls
+        grep -q "grafana" ${P_DIR}/top.sls || echo "    - grafana" >> ${P_DIR}/top.sls
+        for SVC in grafana prometheus; do
+          grep -q "nginx_${SVC}_configuration" ${P_DIR}/top.sls || echo "    - nginx_${SVC}_configuration" >> ${P_DIR}/top.sls
+        done
+        if [ "${SSL_MODE}" = "lets-encrypt" ]; then
+          grep -q "letsencrypt"     ${P_DIR}/top.sls || echo "    - letsencrypt" >> ${P_DIR}/top.sls
+          for SVC in grafana prometheus; do
+            grep -q "letsencrypt_${SVC}_configuration" ${P_DIR}/top.sls || echo "    - letsencrypt_${SVC}_configuration" >> ${P_DIR}/top.sls
+            sed -i "s/__CERT_REQUIRES__/cmd: create-initial-cert-${SVC}.${CLUSTER}.${DOMAIN}*/g;
+                    s#__CERT_PEM__#/etc/letsencrypt/live/${SVC}.${CLUSTER}.${DOMAIN}/fullchain.pem#g;
+                    s#__CERT_KEY__#/etc/letsencrypt/live/${SVC}.${CLUSTER}.${DOMAIN}/privkey.pem#g" \
+            ${P_DIR}/nginx_${SVC}_configuration.sls
+          done
+          if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then
+            grep -q "aws_credentials" ${P_DIR}/top.sls || echo "    - aws_credentials" >> ${P_DIR}/top.sls
+          fi
+        elif [ "${SSL_MODE}" = "bring-your-own" ]; then
+          grep -q "ssl_key_encrypted" ${P_DIR}/top.sls || echo "    - ssl_key_encrypted" >> ${P_DIR}/top.sls
+          for SVC in grafana prometheus; do
+            sed -i "s/__CERT_REQUIRES__/file: extra_custom_certs_file_copy_arvados-${SVC}.pem/g;
+                    s#__CERT_PEM__#/etc/nginx/ssl/arvados-${SVC}.pem#g;
+                    s#__CERT_KEY__#/etc/nginx/ssl/arvados-${SVC}.key#g" \
+              ${P_DIR}/nginx_${SVC}_configuration.sls
+            grep -q ${SVC} ${P_DIR}/extra_custom_certs.sls || echo "  - ${SVC}" >> ${P_DIR}/extra_custom_certs.sls
+          done
+        fi
       ;;
       "api")
         # States
@@ -740,7 +828,7 @@ else
             echo "    - nginx.passenger" >> ${S_DIR}/top.sls
           fi
         else
-          grep -q "nginx" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
+          grep -q "\- nginx$" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
         fi
         if [ "${SSL_MODE}" = "lets-encrypt" ]; then
           if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then