X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/276f020c46d375e9884d385340cfb4c3a5486639..9084d255611326869a1a603b3269d307329a4c59:/lib/config/config.default.yml

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 723e64ceab..a633216be7 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -225,8 +225,18 @@ Clusters:
 
       # Maximum number of concurrent requests to process concurrently
       # in a single service process, or 0 for no limit.
+      #
+      # Note this applies to all Arvados services (controller, webdav,
+      # websockets, etc.). Concurrency in the controller service is
+      # also effectively limited by MaxConcurrentRailsRequests (see
+      # below) because most controller requests proxy through to the
+      # RailsAPI service.
       MaxConcurrentRequests: 64
 
+      # Maximum number of concurrent requests to process concurrently
+      # in a single RailsAPI service process, or 0 for no limit.
+      MaxConcurrentRailsRequests: 8
+
       # Maximum number of incoming requests to hold in a priority
       # queue waiting for one of the MaxConcurrentRequests slots to be
       # free. When the queue is longer than this, respond 503 to the
@@ -234,7 +244,7 @@ Clusters:
       #
       # If MaxQueuedRequests is 0, respond 503 immediately to
       # additional requests while at the MaxConcurrentRequests limit.
-      MaxQueuedRequests: 64
+      MaxQueuedRequests: 128
 
       # Maximum time a "lock container" request is allowed to wait in
       # the incoming request queue before returning 503.
@@ -1074,7 +1084,7 @@ Clusters:
 
       # Number of times a container can be unlocked before being
       # automatically cancelled.
-      MaxDispatchAttempts: 5
+      MaxDispatchAttempts: 10
 
       # Default value for container_count_max for container requests.  This is the
       # number of times Arvados will create a new container to satisfy a container
@@ -1102,6 +1112,17 @@ Clusters:
       # A price factor of 1.0 is a reasonable starting point.
       PreemptiblePriceFactor: 0
 
+      # When the lowest-priced instance type for a given container is
+      # not available, try other instance types, up to the indicated
+      # maximum price factor.
+      #
+      # For example, with AvailabilityPriceFactor 1.5, if the
+      # lowest-cost instance type A suitable for a given container
+      # costs $2/h, Arvados may run the container on any instance type
+      # B costing $3/h or less when instance type A is not available
+      # or an idle instance of type B is already running.
+      MaximumPriceFactor: 1.5
+
       # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
       # cloud dispatcher for executing containers on worker VMs.
       # Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
@@ -1212,11 +1233,14 @@ Clusters:
         # before being silenced until the end of the period.
         LogThrottleLines: 1024
 
-        # Maximum bytes that may be logged by a single job.  Log bytes that are
-        # silenced by throttling are not counted against this total.
-        # If you set this to zero, each container will only create a single
-        # log on the API server, noting for users that logging is throttled.
-        LimitLogBytesPerJob: 67108864
+        # Maximum bytes that may be logged as legacy log events
+        # (records posted to the "logs" table). Starting with Arvados
+        # 2.7, container live logging has migrated to a new system
+        # (polling the container request live log endpoint) and this
+        # value should be 0.  As of this writing, the container will
+        # still create a single log on the API server, noting for that
+        # log events are throttled.
+        LimitLogBytesPerJob: 0
 
         LogPartialLineThrottlePeriod: 5s
 
@@ -1421,16 +1445,17 @@ Clusters:
         # as containers start up successfully and decreases in
         # response to high API load and cloud quota errors.
         #
-        # Setting this too high creates a risk that the dispatcher
-        # will cause deadlock by starting so many supervisor
-        # containers (based on SupervisorFraction and MaxInstances)
-        # that the cloud quota prevents them from running any child
-        # containers.
+        # Setting this to 0 means the dynamic instance limit will
+        # start at MaxInstances.
         #
-        # Setting this too low causes the dispatcher to be
-        # unnecessarily slow to start up new instances after a
-        # restart.
-        InitialQuotaEstimate: 16
+        # Situations where you may want to set this (to a value less
+        # than MaxInstances) would be when there is significant
+        # variability or uncertainty in the actual cloud resources
+        # available.  Upon reaching InitialQuotaEstimate the
+        # dispatcher will switch to a more conservative behavior with
+        # slower instance start to avoid over-shooting cloud resource
+        # limits.
+        InitialQuotaEstimate: 0
 
         # Maximum fraction of available instance capacity allowed to
         # run "supervisor" containers at any given time. A supervisor
@@ -1444,9 +1469,9 @@ Clusters:
         # containers who just create more work.
         #
         # For example, with the default MaxInstances of 64, it will
-        # schedule at most floor(64*0.30) = 19 concurrent workflows,
-        # ensuring 45 slots are available for work.
-        SupervisorFraction: 0.30
+        # schedule at most floor(64*0.50) = 32 concurrent workflow
+        # runners, ensuring 32 slots are available for work.
+        SupervisorFraction: 0.50
 
         # Interval between cloud provider syncs/updates ("list all
         # instances").
@@ -1531,10 +1556,23 @@ Clusters:
           SecretAccessKey: ""
 
           # (ec2) Instance configuration.
+
+          # (ec2) Region, like "us-east-1".
+          Region: ""
+
+          # (ec2) Security group IDs. Omit or use {} to use the
+          # default security group.
           SecurityGroupIDs:
             "SAMPLE": {}
+
+          # (ec2) One or more subnet IDs. Omit or leave empty to let
+          # AWS choose a default subnet from your default VPC. If
+          # multiple subnets are configured here (enclosed in brackets
+          # like [subnet-abc123, subnet-def456]) the cloud dispatcher
+          # will detect subnet-related errors and retry using a
+          # different subnet. Most sites specify one subnet.
           SubnetID: ""
-          Region: ""
+
           EBSVolumeType: gp2
           AdminUsername: debian
           # (ec2) name of the IAMInstanceProfile for instances started by