X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/952458464e04ff17e2c8fc0ce89812912e09737c..f4cb9b474add3becb58dccaf555c2a52d15cbd27:/lib/config/config.default.yml

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index d191356347..00903ce2ad 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -225,7 +225,12 @@ Clusters:
 
       # Maximum number of concurrent requests to accept in a single
       # service process, or 0 for no limit.
-      MaxConcurrentRequests: 0
+      MaxConcurrentRequests: 64
+
+      # Fraction of MaxConcurrentRequests that can be "log create"
+      # messages at any given time.  This is to prevent logging
+      # updates from crowding out more important requests.
+      LogCreateRequestFraction: 0.50
 
       # Maximum number of 64MiB memory buffers per Keepstore server process, or
       # 0 for no limit. When this limit is reached, up to
@@ -437,6 +442,15 @@ Clusters:
       # params_truncated.
       MaxRequestLogParamsSize: 2000
 
+      # In all services except RailsAPI, periodically check whether
+      # the incoming HTTP request queue is nearly full (see
+      # MaxConcurrentRequests) and, if so, write a snapshot of the
+      # request queue to {service}-requests.json in the specified
+      # directory.
+      #
+      # Leave blank to disable.
+      RequestQueueDumpDirectory: ""
+
     Collections:
 
       # Enable access controls for data stored in Keep. This should
@@ -529,7 +543,7 @@ Clusters:
       #
       # If SIGUSR1 is received during an idle period between operations,
       # the next operation will start immediately.
-      BalancePeriod: 10m
+      BalancePeriod: 6h
 
       # Limits the number of collections retrieved by keep-balance per
       # API transaction. If this is zero, page size is
@@ -538,11 +552,12 @@ Clusters:
       BalanceCollectionBatch: 0
 
       # The size of keep-balance's internal queue of
-      # collections. Higher values use more memory and improve throughput
-      # by allowing keep-balance to fetch the next page of collections
-      # while the current page is still being processed. If this is zero
-      # or omitted, pages are processed serially.
-      BalanceCollectionBuffers: 1000
+      # collections. Higher values may improve throughput by allowing
+      # keep-balance to fetch collections from the database while the
+      # current collection are still being processed, at the expense of
+      # using more memory.  If this is zero or omitted, pages are
+      # processed serially.
+      BalanceCollectionBuffers: 4
 
       # Maximum time for a rebalancing run. This ensures keep-balance
       # eventually gives up and retries if, for example, a network
@@ -904,6 +919,9 @@ Clusters:
       # probably want to include the other Workbench instances in the
       # federation in this list.
       #
+      # A wildcard like "https://*.example" will match client URLs
+      # like "https://a.example" and "https://a.b.c.example".
+      #
       # Example:
       #
       # TrustedClients:
@@ -992,6 +1010,11 @@ Clusters:
       # disk cache size will use a disk cache, sized to the
       # container's RAM requirement (but with minimum 2 GiB and
       # maximum 32 GiB).
+      #
+      # Note: If you change this value, containers that used the previous
+      # default value will only be reused by container requests that
+      # explicitly specify the previous value in their keep_cache_ram
+      # runtime constraint.
       DefaultKeepCacheRAM: 0
 
       # Number of times a container can be unlocked before being
@@ -1132,6 +1155,8 @@ Clusters:
 
         # Maximum bytes that may be logged by a single job.  Log bytes that are
         # silenced by throttling are not counted against this total.
+        # If you set this to zero, each container will only create a single
+        # log on the API server, noting for users that logging is throttled.
         LimitLogBytesPerJob: 67108864
 
         LogPartialLineThrottlePeriod: 5s
@@ -1329,6 +1354,21 @@ Clusters:
         # down.
         MaxInstances: 64
 
+        # Maximum fraction of CloudVMs.MaxInstances allowed to run
+        # "supervisor" containers at any given time. A supervisor is a
+        # container whose purpose is mainly to submit and manage other
+        # containers, such as arvados-cwl-runner workflow runner.
+        #
+        # If there is a hard limit on the amount of concurrent
+        # containers that the cluster can run, it is important to
+        # avoid crowding out the containers doing useful work with
+        # containers who just create more work.
+        #
+        # For example, with the default MaxInstances of 64, it will
+        # schedule at most floor(64*0.30) = 19 concurrent workflows,
+        # ensuring 45 slots are available for work.
+        SupervisorFraction: 0.30
+
         # Interval between cloud provider syncs/updates ("list all
         # instances").
         SyncInterval: 1m
@@ -1359,6 +1399,12 @@ Clusters:
         # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
+        # Shell script to run on new instances using the cloud
+        # provider's UserData (EC2) or CustomData (Azure) feature.
+        #
+        # It is not necessary to include a #!/bin/sh line.
+        InstanceInitCommand: ""
+
         # An executable file (located on the dispatcher host) to be
         # copied to cloud instances at runtime and used as the
         # container runner/supervisor. The default value is the
@@ -1559,8 +1605,6 @@ Clusters:
           ReadTimeout: 10m
           RaceWindow: 24h
           PrefixLength: 0
-          # Use aws-s3-go (v2) instead of goamz
-          UseAWSS3v2Driver: true
 
           # For S3 driver, potentially unsafe tuning parameter,
           # intentionally excluded from main documentation.