20182: Add option to limit the number of supervisor containers

[arvados.git] / lib / config / config.default.yml
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index f7c2beca3372f294bb16762d6f5366e7e989a84c..882ee62c35a201efa1431ecdd9cde16d4fa82144 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -288,6 +288,9 @@ Clusters:
        # any user with "manage" permission can un-freeze.
        UnfreezeProjectRequiresAdmin: false
  
+      # (Experimental) Use row-level locking on update API calls.
+      LockBeforeUpdate: false
+
      Users:
        # Config parameters to automatically setup new users.  If enabled,
        # this users will be able to self-activate.  Enable this if you want
@@ -373,6 +376,12 @@ Clusters:
        # cluster.
        RoleGroupsVisibleToAll: true
  
+      # If CanCreateRoleGroups is true, regular (non-admin) users can
+      # create new role groups.
+      #
+      # If false, only admins can create new role groups.
+      CanCreateRoleGroups: true
+
        # During each period, a log entry with event_type="activity"
        # will be recorded for each user who is active during that
        # period. The object_uuid attribute will indicate the user's
@@ -797,6 +806,16 @@ Clusters:
          # Skip TLS certificate name verification.
          InsecureTLS: false
  
+        # Mininum TLS version to negotiate when connecting to server
+        # (ldaps://... or StartTLS). It may be necessary to set this
+        # to "1.1" for compatibility with older LDAP servers that fail
+        # with 'LDAP Result Code 200 "Network Error": TLS handshake
+        # failed (tls: server selected unsupported protocol version
+        # 301)'.
+        #
+        # If blank, use the recommended minimum version (1.2).
+        MinTLSVersion: ""
+
          # Strip the @domain part if a user supplies an email-style
          # username with this domain. If "*", strip any user-provided
          # domain. If "", never strip the domain part. Example:
@@ -967,14 +986,14 @@ Clusters:
  
        # Default value for keep_cache_ram of a container's
        # runtime_constraints.  Note: this gets added to the RAM request
-      # used to allocate a VM or submit an HPC job
+      # used to allocate a VM or submit an HPC job.
+      #
+      # If this is zero, container requests that don't specify RAM or
+      # disk cache size will use a disk cache, sized to the
+      # container's RAM requirement (but with minimum 2 GiB and
+      # maximum 32 GiB).
        DefaultKeepCacheRAM: 0
  
-      # Default value for keep_cache_disk of a container's
-      # runtime_constraints.  Note: this gets added to the disk
-      # request used to allocate a VM or submit an HPC job
-      DefaultKeepCacheDisk: 8589934592
-
        # Number of times a container can be unlocked before being
        # automatically cancelled.
        MaxDispatchAttempts: 5
@@ -986,13 +1005,6 @@ Clusters:
        # with the cancelled container.
        MaxRetryAttempts: 3
  
-      # The maximum number of compute nodes that can be in use simultaneously
-      # If this limit is reduced, any existing nodes with slot number >= new limit
-      # will not be counted against the new limit. In other words, the new limit
-      # won't be strictly enforced until those nodes with higher slot numbers
-      # go down.
-      MaxComputeVMs: 64
-
        # Schedule all child containers on preemptible instances (e.g. AWS
        # Spot Instances) even if not requested by the submitter.
        #
@@ -1043,6 +1055,16 @@ Clusters:
        # Container runtime: "docker" (default) or "singularity"
        RuntimeEngine: docker
  
+      # Number of "supervisor" containers eligible to run at any given
+      # time expressed as a fraction of CloudVMs.MaxInstances. A
+      # supervisor is a container who's purpose is to submit other
+      # containers, such as a container running arvados-cwl-runner.
+      # If there is a hard limit on the amount of concurrent
+      # containers that the cluster can run, it is important to avoid
+      # crowding out the containers doing useful work with containers
+      # who just create more work.
+      SupervisorFraction: 0.3
+
        # When running a container, run a dedicated keepstore process,
        # using the specified number of 64 MiB memory buffers per
        # allocated CPU core (VCPUs in the container's runtime
@@ -1308,6 +1330,15 @@ Clusters:
          # providers too, if desired.
          MaxConcurrentInstanceCreateOps: 1
  
+        # The maximum number of instances to run at a time, or 0 for
+        # unlimited.
+        #
+        # If more instances than this are already running and busy
+        # when the dispatcher starts up, the running containers will
+        # be allowed to finish before the excess instances are shut
+        # down.
+        MaxInstances: 64
+
          # Interval between cloud provider syncs/updates ("list all
          # instances").
          SyncInterval: 1m
@@ -1389,6 +1420,20 @@ Clusters:
            # the cloud dispatcher. Leave blank when not needed.
            IAMInstanceProfile: ""
  
+          # (ec2) how often to look up spot instance pricing data
+          # (only while running spot instances) for the purpose of
+          # calculating container cost estimates. A value of 0
+          # disables spot price lookups entirely.
+          SpotPriceUpdateInterval: 24h
+
+          # (ec2) per-GiB-month cost of EBS volumes. Matches
+          # EBSVolumeType. Used to account for AddedScratch when
+          # calculating container cost estimates. Note that
+          # https://aws.amazon.com/ebs/pricing/ defines GB to mean
+          # GiB, so an advertised price $0.10/GB indicates a real
+          # price of $0.10/GiB and can be entered here as 0.10.
+          EBSPrice: 0.10
+
            # (azure) Credentials.
            SubscriptionID: ""
            ClientID: ""
@@ -1442,6 +1487,13 @@ Clusters:
          RAM: 128MiB
          IncludedScratch: 16GB
          AddedScratch: 0
+        # Hourly price ($), used to select node types for containers,
+        # and to calculate estimated container costs. For spot
+        # instances on EC2, this is also used as the maximum price
+        # when launching spot instances, while the estimated container
+        # cost is computed based on the current spot price according
+        # to AWS. On Azure, and on-demand instances on EC2, the price
+        # given here is used to compute container cost estimates.
          Price: 0.1
          Preemptible: false
          # Include this section if the node type includes GPU (CUDA) support
@@ -1739,9 +1791,11 @@ Clusters:
        # This feature is disabled when set to zero.
        IdleTimeout: 0s
  
-      # URL to a file that is a fragment of text or HTML which should
-      # be rendered in Workbench as a banner.
-      BannerURL: ""
+      # UUID of a collection.  This collection should be shared with
+      # all users.  Workbench will look for a file "banner.html" in
+      # this collection and display its contents (should be
+      # HTML-formatted text) when users first log in to Workbench.
+      BannerUUID: ""
  
        # Workbench welcome screen, this is HTML text that will be
        # incorporated directly onto the page.