X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/24a1cdaa5b5a3a98b47ab6802d00e7c4d2a848df..cc1c83c261d289c7fa049637f8ae1fabe352059c:/lib/config/config.default.yml diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 29a4a640b5..a52d09f68b 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -225,7 +225,12 @@ Clusters: # Maximum number of concurrent requests to accept in a single # service process, or 0 for no limit. - MaxConcurrentRequests: 0 + MaxConcurrentRequests: 64 + + # Fraction of MaxConcurrentRequests that can be "log create" + # messages at any given time. This is to prevent logging + # updates from crowding out more important requests. + LogCreateRequestFraction: 0.50 # Maximum number of 64MiB memory buffers per Keepstore server process, or # 0 for no limit. When this limit is reached, up to @@ -437,6 +442,15 @@ Clusters: # params_truncated. MaxRequestLogParamsSize: 2000 + # In all services except RailsAPI, periodically check whether + # the incoming HTTP request queue is nearly full (see + # MaxConcurrentRequests) and, if so, write a snapshot of the + # request queue to {service}-requests.json in the specified + # directory. + # + # Leave blank to disable. + RequestQueueDumpDirectory: "" + Collections: # Enable access controls for data stored in Keep. This should @@ -529,7 +543,7 @@ Clusters: # # If SIGUSR1 is received during an idle period between operations, # the next operation will start immediately. - BalancePeriod: 10m + BalancePeriod: 6h # Limits the number of collections retrieved by keep-balance per # API transaction. If this is zero, page size is @@ -538,11 +552,12 @@ Clusters: BalanceCollectionBatch: 0 # The size of keep-balance's internal queue of - # collections. Higher values use more memory and improve throughput - # by allowing keep-balance to fetch the next page of collections - # while the current page is still being processed. If this is zero - # or omitted, pages are processed serially. - BalanceCollectionBuffers: 1000 + # collections. Higher values may improve throughput by allowing + # keep-balance to fetch collections from the database while the + # current collection are still being processed, at the expense of + # using more memory. If this is zero or omitted, pages are + # processed serially. + BalanceCollectionBuffers: 4 # Maximum time for a rebalancing run. This ensures keep-balance # eventually gives up and retries if, for example, a network @@ -904,6 +919,9 @@ Clusters: # probably want to include the other Workbench instances in the # federation in this list. # + # A wildcard like "https://*.example" will match client URLs + # like "https://a.example" and "https://a.b.c.example". + # # Example: # # TrustedClients: @@ -992,6 +1010,11 @@ Clusters: # disk cache size will use a disk cache, sized to the # container's RAM requirement (but with minimum 2 GiB and # maximum 32 GiB). + # + # Note: If you change this value, containers that used the previous + # default value will only be reused by container requests that + # explicitly specify the previous value in their keep_cache_ram + # runtime constraint. DefaultKeepCacheRAM: 0 # Number of times a container can be unlocked before being @@ -1005,13 +1028,6 @@ Clusters: # with the cancelled container. MaxRetryAttempts: 3 - # The maximum number of compute nodes that can be in use simultaneously - # If this limit is reduced, any existing nodes with slot number >= new limit - # will not be counted against the new limit. In other words, the new limit - # won't be strictly enforced until those nodes with higher slot numbers - # go down. - MaxComputeVMs: 64 - # Schedule all child containers on preemptible instances (e.g. AWS # Spot Instances) even if not requested by the submitter. # @@ -1035,6 +1051,10 @@ Clusters: # cloud dispatcher for executing containers on worker VMs. # Begins with "-----BEGIN RSA PRIVATE KEY-----\n" # and ends with "\n-----END RSA PRIVATE KEY-----\n". + # + # Use "file:///absolute/path/to/key" to load the key from a + # separate file instead of embedding it in the configuration + # file. DispatchPrivateKey: "" # Maximum time to wait for workers to come up before abandoning @@ -1139,6 +1159,8 @@ Clusters: # Maximum bytes that may be logged by a single job. Log bytes that are # silenced by throttling are not counted against this total. + # If you set this to zero, each container will only create a single + # log on the API server, noting for users that logging is throttled. LimitLogBytesPerJob: 67108864 LogPartialLineThrottlePeriod: 5s @@ -1327,6 +1349,30 @@ Clusters: # providers too, if desired. MaxConcurrentInstanceCreateOps: 1 + # The maximum number of instances to run at a time, or 0 for + # unlimited. + # + # If more instances than this are already running and busy + # when the dispatcher starts up, the running containers will + # be allowed to finish before the excess instances are shut + # down. + MaxInstances: 64 + + # Maximum fraction of CloudVMs.MaxInstances allowed to run + # "supervisor" containers at any given time. A supervisor is a + # container whose purpose is mainly to submit and manage other + # containers, such as arvados-cwl-runner workflow runner. + # + # If there is a hard limit on the amount of concurrent + # containers that the cluster can run, it is important to + # avoid crowding out the containers doing useful work with + # containers who just create more work. + # + # For example, with the default MaxInstances of 64, it will + # schedule at most floor(64*0.30) = 19 concurrent workflows, + # ensuring 45 slots are available for work. + SupervisorFraction: 0.30 + # Interval between cloud provider syncs/updates ("list all # instances"). SyncInterval: 1m @@ -1357,6 +1403,12 @@ Clusters: # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" + # Shell script to run on new instances using the cloud + # provider's UserData (EC2) or CustomData (Azure) feature. + # + # It is not necessary to include a #!/bin/sh line. + InstanceInitCommand: "" + # An executable file (located on the dispatcher host) to be # copied to cloud instances at runtime and used as the # container runner/supervisor. The default value is the @@ -1367,6 +1419,12 @@ Clusters: # version of crunch-run installed; see CrunchRunCommand above. DeployRunnerBinary: "/proc/self/exe" + # Install the Dispatcher's SSH public key (derived from + # DispatchPrivateKey) when creating new cloud + # instances. Change this to false if you are using a different + # mechanism to pre-install the public key on new instances. + DeployPublicKey: true + # Tags to add on all resources (VMs, NICs, disks) created by # the container dispatcher. (Arvados's own tags -- # InstanceType, IdleBehavior, and InstanceSecret -- will also @@ -1557,8 +1615,6 @@ Clusters: ReadTimeout: 10m RaceWindow: 24h PrefixLength: 0 - # Use aws-s3-go (v2) instead of goamz - UseAWSS3v2Driver: true # For S3 driver, potentially unsafe tuning parameter, # intentionally excluded from main documentation. @@ -1779,9 +1835,11 @@ Clusters: # This feature is disabled when set to zero. IdleTimeout: 0s - # URL to a file that is a fragment of text or HTML which should - # be rendered in Workbench as a banner. - BannerURL: "" + # UUID of a collection. This collection should be shared with + # all users. Workbench will look for a file "banner.html" in + # this collection and display its contents (should be + # HTML-formatted text) when users first log in to Workbench. + BannerUUID: "" # Workbench welcome screen, this is HTML text that will be # incorporated directly onto the page.