X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/bb506312949465d4503d7f0b4434cfcd435cda0a..4e1e7f762ff1acd13b18efed5974b32833a467e2:/lib/config/config.default.yml diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 5e46c290da..7932c1df3d 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -225,7 +225,12 @@ Clusters: # Maximum number of concurrent requests to accept in a single # service process, or 0 for no limit. - MaxConcurrentRequests: 0 + MaxConcurrentRequests: 64 + + # Fraction of MaxConcurrentRequests that can be "log create" + # messages at any given time. This is to prevent logging + # updates from crowding out more important requests. + LogCreateRequestFraction: 0.50 # Maximum number of 64MiB memory buffers per Keepstore server process, or # 0 for no limit. When this limit is reached, up to @@ -288,6 +293,9 @@ Clusters: # any user with "manage" permission can un-freeze. UnfreezeProjectRequiresAdmin: false + # (Experimental) Use row-level locking on update API calls. + LockBeforeUpdate: false + Users: # Config parameters to automatically setup new users. If enabled, # this users will be able to self-activate. Enable this if you want @@ -373,6 +381,12 @@ Clusters: # cluster. RoleGroupsVisibleToAll: true + # If CanCreateRoleGroups is true, regular (non-admin) users can + # create new role groups. + # + # If false, only admins can create new role groups. + CanCreateRoleGroups: true + # During each period, a log entry with event_type="activity" # will be recorded for each user who is active during that # period. The object_uuid attribute will indicate the user's @@ -428,6 +442,15 @@ Clusters: # params_truncated. MaxRequestLogParamsSize: 2000 + # In all services except RailsAPI, periodically check whether + # the incoming HTTP request queue is nearly full (see + # MaxConcurrentRequests) and, if so, write a snapshot of the + # request queue to {service}-requests.json in the specified + # directory. + # + # Leave blank to disable. + RequestQueueDumpDirectory: "" + Collections: # Enable access controls for data stored in Keep. This should @@ -520,7 +543,7 @@ Clusters: # # If SIGUSR1 is received during an idle period between operations, # the next operation will start immediately. - BalancePeriod: 10m + BalancePeriod: 6h # Limits the number of collections retrieved by keep-balance per # API transaction. If this is zero, page size is @@ -529,11 +552,12 @@ Clusters: BalanceCollectionBatch: 0 # The size of keep-balance's internal queue of - # collections. Higher values use more memory and improve throughput - # by allowing keep-balance to fetch the next page of collections - # while the current page is still being processed. If this is zero - # or omitted, pages are processed serially. - BalanceCollectionBuffers: 1000 + # collections. Higher values may improve throughput by allowing + # keep-balance to fetch collections from the database while the + # current collection are still being processed, at the expense of + # using more memory. If this is zero or omitted, pages are + # processed serially. + BalanceCollectionBuffers: 4 # Maximum time for a rebalancing run. This ensures keep-balance # eventually gives up and retries if, for example, a network @@ -797,6 +821,16 @@ Clusters: # Skip TLS certificate name verification. InsecureTLS: false + # Mininum TLS version to negotiate when connecting to server + # (ldaps://... or StartTLS). It may be necessary to set this + # to "1.1" for compatibility with older LDAP servers that fail + # with 'LDAP Result Code 200 "Network Error": TLS handshake + # failed (tls: server selected unsupported protocol version + # 301)'. + # + # If blank, use the recommended minimum version (1.2). + MinTLSVersion: "" + # Strip the @domain part if a user supplies an email-style # username with this domain. If "*", strip any user-provided # domain. If "", never strip the domain part. Example: @@ -878,16 +912,31 @@ Clusters: # by going through login again. IssueTrustedTokens: true - # When the token is returned to a client, the token itself may - # be restricted from viewing/creating other tokens based on whether - # the client is "trusted" or not. The local Workbench1 and - # Workbench2 are trusted by default, but if this is a - # LoginCluster, you probably want to include the other Workbench - # instances in the federation in this list. + # Origins (scheme://host[:port]) of clients trusted to receive + # new tokens via login process. The ExternalURLs of the local + # Workbench1 and Workbench2 are trusted implicitly and do not + # need to be listed here. If this is a LoginCluster, you + # probably want to include the other Workbench instances in the + # federation in this list. + # + # A wildcard like "https://*.example" will match client URLs + # like "https://a.example" and "https://a.b.c.example". + # + # Example: + # + # TrustedClients: + # "https://workbench.other-cluster.example": {} + # "https://workbench2.other-cluster.example": {} TrustedClients: - SAMPLE: - "https://workbench.federate1.example": {} - "https://workbench.federate2.example": {} + SAMPLE: {} + + # Treat any origin whose host part is "localhost" or a private + # IP address (e.g., http://10.0.0.123:3000/) as if it were + # listed in TrustedClients. + # + # Intended only for test/development use. Not appropriate for + # production use. + TrustPrivateNetworks: false Git: # Path to git or gitolite-shell executable. Each authenticated @@ -953,8 +1002,20 @@ Clusters: # troubleshooting purposes. LogReuseDecisions: false - # Default value for keep_cache_ram of a container's runtime_constraints. - DefaultKeepCacheRAM: 268435456 + # Default value for keep_cache_ram of a container's + # runtime_constraints. Note: this gets added to the RAM request + # used to allocate a VM or submit an HPC job. + # + # If this is zero, container requests that don't specify RAM or + # disk cache size will use a disk cache, sized to the + # container's RAM requirement (but with minimum 2 GiB and + # maximum 32 GiB). + # + # Note: If you change this value, containers that used the previous + # default value will only be reused by container requests that + # explicitly specify the previous value in their keep_cache_ram + # runtime constraint. + DefaultKeepCacheRAM: 0 # Number of times a container can be unlocked before being # automatically cancelled. @@ -967,13 +1028,6 @@ Clusters: # with the cancelled container. MaxRetryAttempts: 3 - # The maximum number of compute nodes that can be in use simultaneously - # If this limit is reduced, any existing nodes with slot number >= new limit - # will not be counted against the new limit. In other words, the new limit - # won't be strictly enforced until those nodes with higher slot numbers - # go down. - MaxComputeVMs: 64 - # Schedule all child containers on preemptible instances (e.g. AWS # Spot Instances) even if not requested by the submitter. # @@ -1016,7 +1070,7 @@ Clusters: # Extra RAM to reserve on the node, in addition to # the amount specified in the container's RuntimeConstraints - ReserveExtraRAM: 256MiB + ReserveExtraRAM: 550MiB # Minimum time between two attempts to run the same container MinRetryPeriod: 0s @@ -1101,6 +1155,8 @@ Clusters: # Maximum bytes that may be logged by a single job. Log bytes that are # silenced by throttling are not counted against this total. + # If you set this to zero, each container will only create a single + # log on the API server, noting for users that logging is throttled. LimitLogBytesPerJob: 67108864 LogPartialLineThrottlePeriod: 5s @@ -1289,6 +1345,30 @@ Clusters: # providers too, if desired. MaxConcurrentInstanceCreateOps: 1 + # The maximum number of instances to run at a time, or 0 for + # unlimited. + # + # If more instances than this are already running and busy + # when the dispatcher starts up, the running containers will + # be allowed to finish before the excess instances are shut + # down. + MaxInstances: 64 + + # Maximum fraction of CloudVMs.MaxInstances allowed to run + # "supervisor" containers at any given time. A supervisor is a + # container whose purpose is mainly to submit and manage other + # containers, such as arvados-cwl-runner workflow runner. + # + # If there is a hard limit on the amount of concurrent + # containers that the cluster can run, it is important to + # avoid crowding out the containers doing useful work with + # containers who just create more work. + # + # For example, with the default MaxInstances of 64, it will + # schedule at most floor(64*0.30) = 19 concurrent workflows, + # ensuring 45 slots are available for work. + SupervisorFraction: 0.30 + # Interval between cloud provider syncs/updates ("list all # instances"). SyncInterval: 1m @@ -1319,6 +1399,12 @@ Clusters: # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" + # Shell script to run on new instances using the cloud + # provider's UserData (EC2) or CustomData (Azure) feature. + # + # It is not necessary to include a #!/bin/sh line. + InstanceInitCommand: "" + # An executable file (located on the dispatcher host) to be # copied to cloud instances at runtime and used as the # container runner/supervisor. The default value is the @@ -1329,6 +1415,12 @@ Clusters: # version of crunch-run installed; see CrunchRunCommand above. DeployRunnerBinary: "/proc/self/exe" + # Install the Dispatcher's SSH public key (derived from + # DispatchPrivateKey) when creating new cloud + # instances. Change this to false if you are using a different + # mechanism to pre-install the public key on new instances. + DeployPublicKey: true + # Tags to add on all resources (VMs, NICs, disks) created by # the container dispatcher. (Arvados's own tags -- # InstanceType, IdleBehavior, and InstanceSecret -- will also @@ -1370,6 +1462,20 @@ Clusters: # the cloud dispatcher. Leave blank when not needed. IAMInstanceProfile: "" + # (ec2) how often to look up spot instance pricing data + # (only while running spot instances) for the purpose of + # calculating container cost estimates. A value of 0 + # disables spot price lookups entirely. + SpotPriceUpdateInterval: 24h + + # (ec2) per-GiB-month cost of EBS volumes. Matches + # EBSVolumeType. Used to account for AddedScratch when + # calculating container cost estimates. Note that + # https://aws.amazon.com/ebs/pricing/ defines GB to mean + # GiB, so an advertised price $0.10/GB indicates a real + # price of $0.10/GiB and can be entered here as 0.10. + EBSPrice: 0.10 + # (azure) Credentials. SubscriptionID: "" ClientID: "" @@ -1423,6 +1529,13 @@ Clusters: RAM: 128MiB IncludedScratch: 16GB AddedScratch: 0 + # Hourly price ($), used to select node types for containers, + # and to calculate estimated container costs. For spot + # instances on EC2, this is also used as the maximum price + # when launching spot instances, while the estimated container + # cost is computed based on the current spot price according + # to AWS. On Azure, and on-demand instances on EC2, the price + # given here is used to compute container cost estimates. Price: 0.1 Preemptible: false # Include this section if the node type includes GPU (CUDA) support @@ -1498,8 +1611,6 @@ Clusters: ReadTimeout: 10m RaceWindow: 24h PrefixLength: 0 - # Use aws-s3-go (v2) instead of goamz - UseAWSS3v2Driver: true # For S3 driver, potentially unsafe tuning parameter, # intentionally excluded from main documentation. @@ -1720,9 +1831,11 @@ Clusters: # This feature is disabled when set to zero. IdleTimeout: 0s - # URL to a file that is a fragment of text or HTML which should - # be rendered in Workbench as a banner. - BannerURL: "" + # UUID of a collection. This collection should be shared with + # all users. Workbench will look for a file "banner.html" in + # this collection and display its contents (should be + # HTML-formatted text) when users first log in to Workbench. + BannerUUID: "" # Workbench welcome screen, this is HTML text that will be # incorporated directly onto the page.