X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f68ba06c5e85b748f13f723373e1fbe79fa8e563..763f629e11df304e6202fb140adc27d3a08ac1a6:/lib/config/config.default.yml diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 09c068a0b9..33c1e497de 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -223,9 +223,23 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 - # Maximum number of concurrent requests to accept in a single - # service process, or 0 for no limit. - MaxConcurrentRequests: 0 + # Maximum number of concurrent requests to process concurrently + # in a single service process, or 0 for no limit. + MaxConcurrentRequests: 64 + + # Maximum number of incoming requests to hold in a priority + # queue waiting for one of the MaxConcurrentRequests slots to be + # free. When the queue is longer than this, respond 503 to the + # lowest priority request. + # + # If MaxQueuedRequests is 0, respond 503 immediately to + # additional requests while at the MaxConcurrentRequests limit. + MaxQueuedRequests: 64 + + # Fraction of MaxConcurrentRequests that can be "log create" + # messages at any given time. This is to prevent logging + # updates from crowding out more important requests. + LogCreateRequestFraction: 0.50 # Maximum number of 64MiB memory buffers per Keepstore server process, or # 0 for no limit. When this limit is reached, up to @@ -288,6 +302,9 @@ Clusters: # any user with "manage" permission can un-freeze. UnfreezeProjectRequiresAdmin: false + # (Experimental) Use row-level locking on update API calls. + LockBeforeUpdate: false + Users: # Config parameters to automatically setup new users. If enabled, # this users will be able to self-activate. Enable this if you want @@ -373,6 +390,12 @@ Clusters: # cluster. RoleGroupsVisibleToAll: true + # If CanCreateRoleGroups is true, regular (non-admin) users can + # create new role groups. + # + # If false, only admins can create new role groups. + CanCreateRoleGroups: true + # During each period, a log entry with event_type="activity" # will be recorded for each user who is active during that # period. The object_uuid attribute will indicate the user's @@ -428,6 +451,15 @@ Clusters: # params_truncated. MaxRequestLogParamsSize: 2000 + # In all services except RailsAPI, periodically check whether + # the incoming HTTP request queue is nearly full (see + # MaxConcurrentRequests) and, if so, write a snapshot of the + # request queue to {service}-requests.json in the specified + # directory. + # + # Leave blank to disable. + RequestQueueDumpDirectory: "" + Collections: # Enable access controls for data stored in Keep. This should @@ -520,7 +552,7 @@ Clusters: # # If SIGUSR1 is received during an idle period between operations, # the next operation will start immediately. - BalancePeriod: 10m + BalancePeriod: 6h # Limits the number of collections retrieved by keep-balance per # API transaction. If this is zero, page size is @@ -529,11 +561,12 @@ Clusters: BalanceCollectionBatch: 0 # The size of keep-balance's internal queue of - # collections. Higher values use more memory and improve throughput - # by allowing keep-balance to fetch the next page of collections - # while the current page is still being processed. If this is zero - # or omitted, pages are processed serially. - BalanceCollectionBuffers: 1000 + # collections. Higher values may improve throughput by allowing + # keep-balance to fetch collections from the database while the + # current collection are still being processed, at the expense of + # using more memory. If this is zero or omitted, pages are + # processed serially. + BalanceCollectionBuffers: 4 # Maximum time for a rebalancing run. This ensures keep-balance # eventually gives up and retries if, for example, a network @@ -797,6 +830,16 @@ Clusters: # Skip TLS certificate name verification. InsecureTLS: false + # Mininum TLS version to negotiate when connecting to server + # (ldaps://... or StartTLS). It may be necessary to set this + # to "1.1" for compatibility with older LDAP servers that fail + # with 'LDAP Result Code 200 "Network Error": TLS handshake + # failed (tls: server selected unsupported protocol version + # 301)'. + # + # If blank, use the recommended minimum version (1.2). + MinTLSVersion: "" + # Strip the @domain part if a user supplies an email-style # username with this domain. If "*", strip any user-provided # domain. If "", never strip the domain part. Example: @@ -885,6 +928,9 @@ Clusters: # probably want to include the other Workbench instances in the # federation in this list. # + # A wildcard like "https://*.example" will match client URLs + # like "https://a.example" and "https://a.b.c.example". + # # Example: # # TrustedClients: @@ -965,8 +1011,20 @@ Clusters: # troubleshooting purposes. LogReuseDecisions: false - # Default value for keep_cache_ram of a container's runtime_constraints. - DefaultKeepCacheRAM: 268435456 + # Default value for keep_cache_ram of a container's + # runtime_constraints. Note: this gets added to the RAM request + # used to allocate a VM or submit an HPC job. + # + # If this is zero, container requests that don't specify RAM or + # disk cache size will use a disk cache, sized to the + # container's RAM requirement (but with minimum 2 GiB and + # maximum 32 GiB). + # + # Note: If you change this value, containers that used the previous + # default value will only be reused by container requests that + # explicitly specify the previous value in their keep_cache_ram + # runtime constraint. + DefaultKeepCacheRAM: 0 # Number of times a container can be unlocked before being # automatically cancelled. @@ -979,13 +1037,6 @@ Clusters: # with the cancelled container. MaxRetryAttempts: 3 - # The maximum number of compute nodes that can be in use simultaneously - # If this limit is reduced, any existing nodes with slot number >= new limit - # will not be counted against the new limit. In other words, the new limit - # won't be strictly enforced until those nodes with higher slot numbers - # go down. - MaxComputeVMs: 64 - # Schedule all child containers on preemptible instances (e.g. AWS # Spot Instances) even if not requested by the submitter. # @@ -1009,6 +1060,10 @@ Clusters: # cloud dispatcher for executing containers on worker VMs. # Begins with "-----BEGIN RSA PRIVATE KEY-----\n" # and ends with "\n-----END RSA PRIVATE KEY-----\n". + # + # Use "file:///absolute/path/to/key" to load the key from a + # separate file instead of embedding it in the configuration + # file. DispatchPrivateKey: "" # Maximum time to wait for workers to come up before abandoning @@ -1028,7 +1083,7 @@ Clusters: # Extra RAM to reserve on the node, in addition to # the amount specified in the container's RuntimeConstraints - ReserveExtraRAM: 256MiB + ReserveExtraRAM: 550MiB # Minimum time between two attempts to run the same container MinRetryPeriod: 0s @@ -1113,6 +1168,8 @@ Clusters: # Maximum bytes that may be logged by a single job. Log bytes that are # silenced by throttling are not counted against this total. + # If you set this to zero, each container will only create a single + # log on the API server, noting for users that logging is throttled. LimitLogBytesPerJob: 67108864 LogPartialLineThrottlePeriod: 5s @@ -1301,6 +1358,30 @@ Clusters: # providers too, if desired. MaxConcurrentInstanceCreateOps: 1 + # The maximum number of instances to run at a time, or 0 for + # unlimited. + # + # If more instances than this are already running and busy + # when the dispatcher starts up, the running containers will + # be allowed to finish before the excess instances are shut + # down. + MaxInstances: 64 + + # Maximum fraction of CloudVMs.MaxInstances allowed to run + # "supervisor" containers at any given time. A supervisor is a + # container whose purpose is mainly to submit and manage other + # containers, such as arvados-cwl-runner workflow runner. + # + # If there is a hard limit on the amount of concurrent + # containers that the cluster can run, it is important to + # avoid crowding out the containers doing useful work with + # containers who just create more work. + # + # For example, with the default MaxInstances of 64, it will + # schedule at most floor(64*0.30) = 19 concurrent workflows, + # ensuring 45 slots are available for work. + SupervisorFraction: 0.30 + # Interval between cloud provider syncs/updates ("list all # instances"). SyncInterval: 1m @@ -1331,6 +1412,12 @@ Clusters: # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" + # Shell script to run on new instances using the cloud + # provider's UserData (EC2) or CustomData (Azure) feature. + # + # It is not necessary to include a #!/bin/sh line. + InstanceInitCommand: "" + # An executable file (located on the dispatcher host) to be # copied to cloud instances at runtime and used as the # container runner/supervisor. The default value is the @@ -1341,6 +1428,12 @@ Clusters: # version of crunch-run installed; see CrunchRunCommand above. DeployRunnerBinary: "/proc/self/exe" + # Install the Dispatcher's SSH public key (derived from + # DispatchPrivateKey) when creating new cloud + # instances. Change this to false if you are using a different + # mechanism to pre-install the public key on new instances. + DeployPublicKey: true + # Tags to add on all resources (VMs, NICs, disks) created by # the container dispatcher. (Arvados's own tags -- # InstanceType, IdleBehavior, and InstanceSecret -- will also @@ -1382,6 +1475,20 @@ Clusters: # the cloud dispatcher. Leave blank when not needed. IAMInstanceProfile: "" + # (ec2) how often to look up spot instance pricing data + # (only while running spot instances) for the purpose of + # calculating container cost estimates. A value of 0 + # disables spot price lookups entirely. + SpotPriceUpdateInterval: 24h + + # (ec2) per-GiB-month cost of EBS volumes. Matches + # EBSVolumeType. Used to account for AddedScratch when + # calculating container cost estimates. Note that + # https://aws.amazon.com/ebs/pricing/ defines GB to mean + # GiB, so an advertised price $0.10/GB indicates a real + # price of $0.10/GiB and can be entered here as 0.10. + EBSPrice: 0.10 + # (azure) Credentials. SubscriptionID: "" ClientID: "" @@ -1435,6 +1542,13 @@ Clusters: RAM: 128MiB IncludedScratch: 16GB AddedScratch: 0 + # Hourly price ($), used to select node types for containers, + # and to calculate estimated container costs. For spot + # instances on EC2, this is also used as the maximum price + # when launching spot instances, while the estimated container + # cost is computed based on the current spot price according + # to AWS. On Azure, and on-demand instances on EC2, the price + # given here is used to compute container cost estimates. Price: 0.1 Preemptible: false # Include this section if the node type includes GPU (CUDA) support @@ -1510,8 +1624,6 @@ Clusters: ReadTimeout: 10m RaceWindow: 24h PrefixLength: 0 - # Use aws-s3-go (v2) instead of goamz - UseAWSS3v2Driver: true # For S3 driver, potentially unsafe tuning parameter, # intentionally excluded from main documentation. @@ -1732,9 +1844,11 @@ Clusters: # This feature is disabled when set to zero. IdleTimeout: 0s - # URL to a file that is a fragment of text or HTML which should - # be rendered in Workbench as a banner. - BannerURL: "" + # UUID of a collection. This collection should be shared with + # all users. Workbench will look for a file "banner.html" in + # this collection and display its contents (should be + # HTML-formatted text) when users first log in to Workbench. + BannerUUID: "" # Workbench welcome screen, this is HTML text that will be # incorporated directly onto the page.