X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0ff4ed45a7ab1730118eadfb92ddea7d332f0328..7334ee9ee6350f2b5f0384d8166e7c4f58d86864:/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid index 151e211653..ee71d7a3f6 100644 --- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid +++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid @@ -10,7 +10,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} {% include 'notebox_begin_warning' %} -arvados-dispatch-cloud is only relevant for cloud installations. Skip this section if you are installing an on premises cluster that will spool jobs to Slurm. +@arvados-dispatch-cloud@ is only relevant for cloud installations. Skip this section if you are installing an on premises cluster that will spool jobs to Slurm or LSF. {% include 'notebox_end' %} # "Introduction":#introduction @@ -27,6 +27,8 @@ The cloud dispatch service is for running containers on cloud VMs. It works with The cloud dispatch service can run on any node that can connect to the Arvados API service, the cloud provider's API, and the SSH service on cloud VMs. It is not resource-intensive, so you can run it on the API server node. +More detail about the internal operation of the dispatcher can be found in the "architecture section":{{site.baseurl}}/architecture/dispatchcloud.html. + h2(#update-config). Update config.yml h3. Configure CloudVMs @@ -72,16 +74,43 @@ Add or update the following portions of your cluster configuration file, @config +h4. NVIDIA GPU support + +To specify instance types with NVIDIA GPUs, you must include an additional @CUDA@ section: + + +
    InstanceTypes:
+      g4dn:
+        ProviderType: g4dn.xlarge
+        VCPUs: 4
+        RAM: 16GiB
+        IncludedScratch: 125GB
+        Price: 0.56
+        CUDA:
+          DriverVersion: "11.4"
+          HardwareCapability: "7.5"
+          DeviceCount: 1
+
+
+ +The @DriverVersion@ is the version of the CUDA toolkit installed in your compute image (in X.Y format, do not include the patchlevel). The @HardwareCapability@ is the CUDA compute capability of the GPUs available for this instance type. The @DeviceCount@ is the number of GPU cores available for this instance type. + h4. Minimal configuration example for Amazon EC2 +The ImageID value is the compute node image that was built in "the previous section":install-compute-node.html#aws. +
    Containers:
       CloudVMs:
-        ImageID: ami-01234567890abcdef
+        ImageID: ami-01234567890abcdef
         Driver: ec2
         DriverParameters:
+          # If you are not using an IAM role for authentication, specify access
+          # credentials here. Otherwise, omit or set AccessKeyID and
+          # SecretAccessKey to an empty value.
           AccessKeyID: XXXXXXXXXXXXXXXXXXXX
           SecretAccessKey: YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
+
           SecurityGroupIDs:
           - sg-0123abcd
           SubnetID: subnet-0123abcd
@@ -91,14 +120,42 @@ h4. Minimal configuration example for Amazon EC2
 
+Example policy for the IAM role used by the cloud dispatcher: + + +
+{
+    "Version": "2012-10-17",
+    "Id": "arvados-dispatch-cloud policy",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Action": [
+                "iam:PassRole",
+                "ec2:DescribeKeyPairs",
+                "ec2:ImportKeyPair",
+                "ec2:RunInstances",
+                "ec2:DescribeInstances",
+                "ec2:CreateTags",
+                "ec2:TerminateInstances"
+            ],
+            "Resource": "*"
+        }
+    ]
+}
+
+
+ h4. Minimal configuration example for Azure Using managed disks: +The ImageID value is the compute node image that was built in "the previous section":install-compute-node.html#azure. +
    Containers:
       CloudVMs:
-        ImageID: "zzzzz-compute-v1597349873"
+        ImageID: "zzzzz-compute-v1597349873"
         Driver: azure
         # (azure) managed disks: set MaxConcurrentInstanceCreateOps to 20 to avoid timeouts, cf
         # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image
@@ -134,7 +191,7 @@ Using an image from a shared image gallery:
 
 
    Containers:
       CloudVMs:
-        ImageID: "shared_image_gallery_image_definition_name"
+        ImageID: "shared_image_gallery_image_definition_name"
         Driver: azure
         DriverParameters:
           # Credentials.
@@ -167,10 +224,12 @@ Using an image from a shared image gallery:
 
 Using unmanaged disks (deprecated):
 
+The ImageID value is the compute node image that was built in "the previous section":install-compute-node.html#azure.
+
 
 
    Containers:
       CloudVMs:
-        ImageID: "https://zzzzzzzz.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.55555555-5555-5555-5555-555555555555.vhd"
+        ImageID: "https://zzzzzzzz.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.55555555-5555-5555-5555-555555555555.vhd"
         Driver: azure
         DriverParameters:
           # Credentials.