# parameter higher than this value, this value is used instead.
MaxItemsPerResponse: 1000
- # Maximum number of concurrent requests to accept in a single
- # service process, or 0 for no limit.
+ # Maximum number of concurrent requests to process concurrently
+ # in a single service process, or 0 for no limit.
+ #
+ # Note this applies to all Arvados services (controller, webdav,
+ # websockets, etc.). Concurrency in the controller service is
+ # also effectively limited by MaxConcurrentRailsRequests (see
+ # below) because most controller requests proxy through to the
+ # RailsAPI service.
MaxConcurrentRequests: 64
+ # Maximum number of concurrent requests to process concurrently
+ # in a single RailsAPI service process, or 0 for no limit.
+ MaxConcurrentRailsRequests: 8
+
+ # Maximum number of incoming requests to hold in a priority
+ # queue waiting for one of the MaxConcurrentRequests slots to be
+ # free. When the queue is longer than this, respond 503 to the
+ # lowest priority request.
+ #
+ # If MaxQueuedRequests is 0, respond 503 immediately to
+ # additional requests while at the MaxConcurrentRequests limit.
+ MaxQueuedRequests: 128
+
+ # Maximum time a "lock container" request is allowed to wait in
+ # the incoming request queue before returning 503.
+ MaxQueueTimeForLockRequests: 2s
+
# Fraction of MaxConcurrentRequests that can be "log create"
# messages at any given time. This is to prevent logging
# updates from crowding out more important requests.
# Use 0 to disable activity logging.
ActivityLoggingPeriod: 24h
+ # The SyncUser* options control what system resources are managed by
+ # arvados-login-sync on shell nodes. They correspond to:
+ # * SyncUserAccounts: The user's Unix account on the shell node
+ # * SyncUserGroups: The group memberships of that account
+ # * SyncUserSSHKeys: Whether to authorize the user's Arvados SSH keys
+ # * SyncUserAPITokens: Whether to set up the user's Arvados API token
+ # All default to true.
+ SyncUserAccounts: true
+ SyncUserGroups: true
+ SyncUserSSHKeys: true
+ SyncUserAPITokens: true
+
+ # If SyncUserGroups=true, then arvados-login-sync will ensure that all
+ # managed accounts are members of the Unix groups listed in
+ # SyncRequiredGroups, in addition to any groups listed in their Arvados
+ # login permission. The default list includes the "fuse" group so
+ # users can use arv-mount. You can require no groups by specifying an
+ # empty list (i.e., `SyncRequiredGroups: []`).
+ SyncRequiredGroups:
+ - fuse
+
+ # SyncIgnoredGroups is a list of group names. arvados-login-sync will
+ # never modify these groups. If user login permissions list any groups
+ # in SyncIgnoredGroups, they will be ignored. If a user's Unix account
+ # belongs to any of these groups, arvados-login-sync will not remove
+ # the account from that group. The default is a set of particularly
+ # security-sensitive groups across Debian- and Red Hat-based
+ # distributions.
+ SyncIgnoredGroups:
+ - adm
+ - disk
+ - kmem
+ - mem
+ - root
+ - shadow
+ - staff
+ - sudo
+ - sys
+ - utempter
+ - utmp
+ - wheel
+
AuditLogs:
# Time to keep audit logs, in seconds. (An audit log is a row added
# to the "logs" table in the PostgreSQL database each time an
# params_truncated.
MaxRequestLogParamsSize: 2000
+ # In all services except RailsAPI, periodically check whether
+ # the incoming HTTP request queue is nearly full (see
+ # MaxConcurrentRequests) and, if so, write a snapshot of the
+ # request queue to {service}-requests.json in the specified
+ # directory.
+ #
+ # Leave blank to disable.
+ RequestQueueDumpDirectory: ""
+
Collections:
# Enable access controls for data stored in Keep. This should
BalanceCollectionBatch: 0
# The size of keep-balance's internal queue of
- # collections. Higher values use more memory and improve throughput
- # by allowing keep-balance to fetch the next page of collections
- # while the current page is still being processed. If this is zero
- # or omitted, pages are processed serially.
- BalanceCollectionBuffers: 1000
+ # collections. Higher values may improve throughput by allowing
+ # keep-balance to fetch collections from the database while the
+ # current collection are still being processed, at the expense of
+ # using more memory. If this is zero or omitted, pages are
+ # processed serially.
+ BalanceCollectionBuffers: 4
# Maximum time for a rebalancing run. This ensures keep-balance
# eventually gives up and retries if, for example, a network
# Number of times a container can be unlocked before being
# automatically cancelled.
- MaxDispatchAttempts: 5
+ MaxDispatchAttempts: 10
# Default value for container_count_max for container requests. This is the
# number of times Arvados will create a new container to satisfy a container
# A price factor of 1.0 is a reasonable starting point.
PreemptiblePriceFactor: 0
+ # When the lowest-priced instance type for a given container is
+ # not available, try other instance types, up to the indicated
+ # maximum price factor.
+ #
+ # For example, with AvailabilityPriceFactor 1.5, if the
+ # lowest-cost instance type A suitable for a given container
+ # costs $2/h, Arvados may run the container on any instance type
+ # B costing $3/h or less when instance type A is not available
+ # or an idle instance of type B is already running.
+ MaximumPriceFactor: 1.5
+
# PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
# cloud dispatcher for executing containers on worker VMs.
# Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
# and ends with "\n-----END RSA PRIVATE KEY-----\n".
+ #
+ # Use "file:///absolute/path/to/key" to load the key from a
+ # separate file instead of embedding it in the configuration
+ # file.
DispatchPrivateKey: ""
# Maximum time to wait for workers to come up before abandoning
# before being silenced until the end of the period.
LogThrottleLines: 1024
- # Maximum bytes that may be logged by a single job. Log bytes that are
- # silenced by throttling are not counted against this total.
- LimitLogBytesPerJob: 67108864
+ # Maximum bytes that may be logged as legacy log events
+ # (records posted to the "logs" table). Starting with Arvados
+ # 2.7, container live logging has migrated to a new system
+ # (polling the container request live log endpoint) and this
+ # value should be 0. As of this writing, the container will
+ # still create a single log on the API server, noting for that
+ # log events are throttled.
+ LimitLogBytesPerJob: 0
LogPartialLineThrottlePeriod: 5s
# down.
MaxInstances: 64
- # Maximum fraction of CloudVMs.MaxInstances allowed to run
- # "supervisor" containers at any given time. A supervisor is a
- # container whose purpose is mainly to submit and manage other
- # containers, such as arvados-cwl-runner workflow runner.
+ # The minimum number of instances expected to be runnable
+ # without reaching a provider-imposed quota.
+ #
+ # This is used as the initial value for the dispatcher's
+ # dynamic instance limit, which increases (up to MaxInstances)
+ # as containers start up successfully and decreases in
+ # response to high API load and cloud quota errors.
+ #
+ # Setting this to 0 means the dynamic instance limit will
+ # start at MaxInstances.
+ #
+ # Situations where you may want to set this (to a value less
+ # than MaxInstances) would be when there is significant
+ # variability or uncertainty in the actual cloud resources
+ # available. Upon reaching InitialQuotaEstimate the
+ # dispatcher will switch to a more conservative behavior with
+ # slower instance start to avoid over-shooting cloud resource
+ # limits.
+ InitialQuotaEstimate: 0
+
+ # Maximum fraction of available instance capacity allowed to
+ # run "supervisor" containers at any given time. A supervisor
+ # is a container whose purpose is mainly to submit and manage
+ # other containers, such as arvados-cwl-runner workflow
+ # runner.
#
# If there is a hard limit on the amount of concurrent
# containers that the cluster can run, it is important to
# containers who just create more work.
#
# For example, with the default MaxInstances of 64, it will
- # schedule at most floor(64*0.30) = 19 concurrent workflows,
- # ensuring 45 slots are available for work.
- SupervisorFraction: 0.30
+ # schedule at most floor(64*0.50) = 32 concurrent workflow
+ # runners, ensuring 32 slots are available for work.
+ SupervisorFraction: 0.50
# Interval between cloud provider syncs/updates ("list all
# instances").
# https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
ImageID: ""
+ # Shell script to run on new instances using the cloud
+ # provider's UserData (EC2) or CustomData (Azure) feature.
+ #
+ # It is not necessary to include a #!/bin/sh line.
+ InstanceInitCommand: ""
+
# An executable file (located on the dispatcher host) to be
# copied to cloud instances at runtime and used as the
# container runner/supervisor. The default value is the
# version of crunch-run installed; see CrunchRunCommand above.
DeployRunnerBinary: "/proc/self/exe"
+ # Install the Dispatcher's SSH public key (derived from
+ # DispatchPrivateKey) when creating new cloud
+ # instances. Change this to false if you are using a different
+ # mechanism to pre-install the public key on new instances.
+ DeployPublicKey: true
+
# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
# InstanceType, IdleBehavior, and InstanceSecret -- will also
SecretAccessKey: ""
# (ec2) Instance configuration.
+
+ # (ec2) Region, like "us-east-1".
+ Region: ""
+
+ # (ec2) Security group IDs. Omit or use {} to use the
+ # default security group.
SecurityGroupIDs:
"SAMPLE": {}
+
+ # (ec2) One or more subnet IDs. Omit or leave empty to let
+ # AWS choose a default subnet from your default VPC. If
+ # multiple subnets are configured here (enclosed in brackets
+ # like [subnet-abc123, subnet-def456]) the cloud dispatcher
+ # will detect subnet-related errors and retry using a
+ # different subnet. Most sites specify one subnet.
SubnetID: ""
- Region: ""
+
EBSVolumeType: gp2
AdminUsername: debian
# (ec2) name of the IAMInstanceProfile for instances started by
ReadTimeout: 10m
RaceWindow: 24h
PrefixLength: 0
- # Use aws-s3-go (v2) instead of goamz
- UseAWSS3v2Driver: true
# For S3 driver, potentially unsafe tuning parameter,
# intentionally excluded from main documentation.