X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d5341150545efd0960acf34186ca18b98a1b1860..ad25d712525293ce4f95f5a059b390036ec60dc3:/services/nodemanager/doc/azure.example.cfg diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg index 8fad85d656..8ba68018d5 100644 --- a/services/nodemanager/doc/azure.example.cfg +++ b/services/nodemanager/doc/azure.example.cfg @@ -1,20 +1,42 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + # Azure configuration for Arvados Node Manager. # All times are in seconds unless specified otherwise. +[Manage] +# The management server responds to http://addr:port/status.json with +# a snapshot of internal state. + +# Management server listening address (default 127.0.0.1) +#address = 0.0.0.0 + +# Management server port number (default -1, server is disabled) +#port = 8989 + [Daemon] # The dispatcher can customize the start and stop procedure for # cloud nodes. For example, the SLURM dispatcher drains nodes # through SLURM before shutting them down. #dispatcher = slurm -# Node Manager will ensure that there are at least this many nodes -# running at all times. +# Node Manager will ensure that there are at least this many nodes running at +# all times. If node manager needs to start new idle nodes for the purpose of +# satisfying min_nodes, it will use the cheapest node type. However, depending +# on usage patterns, it may also satisfy min_nodes by keeping alive some +# more-expensive nodes min_nodes = 0 # Node Manager will not start any compute nodes when at least this # many are running. max_nodes = 8 +# Upper limit on rate of spending (in $/hr), will not boot additional nodes +# if total price of already running nodes meets or exceeds this threshold. +# default 0 means no limit. +max_total_price = 0 + # Poll Azure nodes and Arvados for new information every N seconds. poll_time = 60 @@ -43,9 +65,25 @@ boot_fail_after = 1800 # an Arvados node that hasn't been updated for this long. node_stale_after = 14400 +# Number of consecutive times a node must report as "idle" before it +# will be considered eligible for shutdown. Node status is checked +# each poll period, and node can go idle at any point during a poll +# period (meaning a node could be reported as idle that has only been +# idle for 1 second). With a 60 second poll period, three consecutive +# status updates of "idle" suggests the node has been idle at least +# 121 seconds. +consecutive_idle_count = 3 + +# Scaling factor to be applied to nodes' available RAM size. Usually there's a +# variable discrepancy between the advertised RAM value on cloud nodes and the +# actual amount available. +# If not set, this value will be set to 0.95 +node_mem_scaling = 0.95 + # File path for Certificate Authorities certs_file = /etc/ssl/certs/ca-certificates.crt + [Logging] # Log file path file = /var/log/arvados/node-manager.log @@ -69,6 +107,8 @@ apiclient = WARNING host = zyxwv.arvadosapi.com token = ARVADOS_TOKEN timeout = 15 +jobs_queue = yes # Get work request from Arvados jobs queue (jobs API) +slurm_queue = yes # Get work request from squeue (containers API) # Accept an untrusted SSL certificate from the API server? insecure = no @@ -88,32 +128,75 @@ provider = azure shutdown_windows = 20, 999999 [Cloud Credentials] -subscription_id = SUBSCRIPTION_ID -key_file = PATH_TO_PEM_FILE +# Use "azure account list" with the azure CLI to get these values. +tenant_id = 00000000-0000-0000-0000-000000000000 +subscription_id = 00000000-0000-0000-0000-000000000000 + +# The following directions are based on +# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/ +# +# azure config mode arm +# azure ad app create --name "" --home-page "" --identifier-uris "" --password +# azure ad sp create "" +# azure role assignment create --objectId "" -o Owner -c /subscriptions/{subscriptionId}/ +# +# Use for "key" and the for "secret" +# +key = 00000000-0000-0000-0000-000000000000 +secret = PASSWORD timeout = 60 +region = East US [Cloud List] -# This section defines filters that find compute nodes. -# Tags that you specify here will automatically be added to nodes you create. -# Replace colons in Microsoft filters with underscores -# (e.g., write "tag:mytag" as "tag_mytag"). -instance-state-name = running +# The resource group in which the compute node virtual machines will be created +# and listed. +ex_resource_group = ArvadosResourceGroup + +[Cloud Create] +# The image id, in the form "Publisher:Offer:SKU:Version" +image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050 + +# Path to a local ssh key file that will be used to provision new nodes. +ssh_key = /home/arvadosuser/.ssh/id_rsa.pub + +# The account name for the admin user that will be provisioned on new nodes. +ex_user_name = arvadosuser + +# The Azure storage account that will be used to store the node OS disk images. +ex_storage_account = arvadosstorage + +# The virtual network the VMs will be associated with. +ex_network = ArvadosNetwork + +# Optional subnet of the virtual network. +#ex_subnet = default + +# Node tags tag_arvados-class = dynamic-compute tag_cluster = zyxwv -[Cloud Create] -image: ??? -ex_cloud_service_name: ??? - -[Size A3] -# You can define any number of Size sections to list Azure sizes you're -# willing to use. The Node Manager should boot the cheapest size(s) that -# can run jobs in the queue (N.B.: defining more than one size has not been -# tested yet). +# the API server to ping +ping_host = hostname:port + +# You can define any number of Size sections to list Azure sizes you're willing +# to use. The Node Manager should boot the cheapest size(s) that can run jobs +# in the queue. You must also provide price per hour as the Azure driver +# compute currently does not report prices. +# +# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/ +# for a list of known machine types that may be used as a Size parameter. +# # Each size section MUST define the number of cores are available in this # size class (since libcloud does not provide any consistent API for exposing # this setting). # You may also want to define the amount of scratch space (expressed # in GB) for Crunch jobs. You can also override Microsoft's provided -# data fields by setting the same names here. +# data fields by setting them here. + +[Size Standard_D3] cores = 4 +price = 0.56 + +[Size Standard_D4] +cores = 8 +price = 1.12