# parameter higher than this value, this value is used instead.
MaxItemsPerResponse: 1000
- # Maximum number of concurrent requests to accept in a single
- # service process, or 0 for no limit.
- MaxConcurrentRequests: 0
+ # Maximum number of concurrent requests to process concurrently
+ # in a single service process, or 0 for no limit.
+ MaxConcurrentRequests: 64
+
+ # Maximum number of incoming requests to hold in a priority
+ # queue waiting for one of the MaxConcurrentRequests slots to be
+ # free. When the queue is longer than this, respond 503 to the
+ # lowest priority request.
+ #
+ # If MaxQueuedRequests is 0, respond 503 immediately to
+ # additional requests while at the MaxConcurrentRequests limit.
+ MaxQueuedRequests: 64
+
+ # Maximum time a "lock container" request is allowed to wait in
+ # the incoming request queue before returning 503.
+ MaxQueueTimeForLockRequests: 2s
+
+ # Fraction of MaxConcurrentRequests that can be "log create"
+ # messages at any given time. This is to prevent logging
+ # updates from crowding out more important requests.
+ LogCreateRequestFraction: 0.50
# Maximum number of 64MiB memory buffers per Keepstore server process, or
# 0 for no limit. When this limit is reached, up to
# any user with "manage" permission can un-freeze.
UnfreezeProjectRequiresAdmin: false
+ # (Experimental) Use row-level locking on update API calls.
+ LockBeforeUpdate: false
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
# cluster.
RoleGroupsVisibleToAll: true
+ # If CanCreateRoleGroups is true, regular (non-admin) users can
+ # create new role groups.
+ #
+ # If false, only admins can create new role groups.
+ CanCreateRoleGroups: true
+
+ # During each period, a log entry with event_type="activity"
+ # will be recorded for each user who is active during that
+ # period. The object_uuid attribute will indicate the user's
+ # UUID.
+ #
+ # Multiple log entries for the same user may be generated during
+ # a period if there are multiple controller processes or a
+ # controller process is restarted.
+ #
+ # Use 0 to disable activity logging.
+ ActivityLoggingPeriod: 24h
+
AuditLogs:
# Time to keep audit logs, in seconds. (An audit log is a row added
# to the "logs" table in the PostgreSQL database each time an
# params_truncated.
MaxRequestLogParamsSize: 2000
+ # In all services except RailsAPI, periodically check whether
+ # the incoming HTTP request queue is nearly full (see
+ # MaxConcurrentRequests) and, if so, write a snapshot of the
+ # request queue to {service}-requests.json in the specified
+ # directory.
+ #
+ # Leave blank to disable.
+ RequestQueueDumpDirectory: ""
+
Collections:
# Enable access controls for data stored in Keep. This should
#
# If SIGUSR1 is received during an idle period between operations,
# the next operation will start immediately.
- BalancePeriod: 10m
+ BalancePeriod: 6h
# Limits the number of collections retrieved by keep-balance per
# API transaction. If this is zero, page size is
BalanceCollectionBatch: 0
# The size of keep-balance's internal queue of
- # collections. Higher values use more memory and improve throughput
- # by allowing keep-balance to fetch the next page of collections
- # while the current page is still being processed. If this is zero
- # or omitted, pages are processed serially.
- BalanceCollectionBuffers: 1000
+ # collections. Higher values may improve throughput by allowing
+ # keep-balance to fetch collections from the database while the
+ # current collection are still being processed, at the expense of
+ # using more memory. If this is zero or omitted, pages are
+ # processed serially.
+ BalanceCollectionBuffers: 4
# Maximum time for a rebalancing run. This ensures keep-balance
# eventually gives up and retries if, for example, a network
# Time to cache manifests, permission checks, and sessions.
TTL: 300s
- # Time to cache collection state.
- UUIDTTL: 5s
-
# Block cache entries. Each block consumes up to 64 MiB RAM.
MaxBlockEntries: 20
- # Collection cache entries.
- MaxCollectionEntries: 1000
-
- # Approximate memory limit (in bytes) for collection cache.
+ # Approximate memory limit (in bytes) for session cache.
+ #
+ # Note this applies to the in-memory representation of
+ # projects and collections -- metadata, block locators,
+ # filenames, etc. -- excluding cached file content, which is
+ # limited by MaxBlockEntries.
MaxCollectionBytes: 100000000
- # UUID cache entries.
- MaxUUIDEntries: 1000
-
# Persistent sessions.
MaxSessions: 100
# Skip TLS certificate name verification.
InsecureTLS: false
+ # Mininum TLS version to negotiate when connecting to server
+ # (ldaps://... or StartTLS). It may be necessary to set this
+ # to "1.1" for compatibility with older LDAP servers that fail
+ # with 'LDAP Result Code 200 "Network Error": TLS handshake
+ # failed (tls: server selected unsupported protocol version
+ # 301)'.
+ #
+ # If blank, use the recommended minimum version (1.2).
+ MinTLSVersion: ""
+
# Strip the @domain part if a user supplies an email-style
# username with this domain. If "*", strip any user-provided
# domain. If "", never strip the domain part. Example:
# by going through login again.
IssueTrustedTokens: true
- # When the token is returned to a client, the token itself may
- # be restricted from viewing/creating other tokens based on whether
- # the client is "trusted" or not. The local Workbench1 and
- # Workbench2 are trusted by default, but if this is a
- # LoginCluster, you probably want to include the other Workbench
- # instances in the federation in this list.
+ # Origins (scheme://host[:port]) of clients trusted to receive
+ # new tokens via login process. The ExternalURLs of the local
+ # Workbench1 and Workbench2 are trusted implicitly and do not
+ # need to be listed here. If this is a LoginCluster, you
+ # probably want to include the other Workbench instances in the
+ # federation in this list.
+ #
+ # A wildcard like "https://*.example" will match client URLs
+ # like "https://a.example" and "https://a.b.c.example".
+ #
+ # Example:
+ #
+ # TrustedClients:
+ # "https://workbench.other-cluster.example": {}
+ # "https://workbench2.other-cluster.example": {}
TrustedClients:
- SAMPLE:
- "https://workbench.federate1.example": {}
- "https://workbench.federate2.example": {}
+ SAMPLE: {}
+
+ # Treat any origin whose host part is "localhost" or a private
+ # IP address (e.g., http://10.0.0.123:3000/) as if it were
+ # listed in TrustedClients.
+ #
+ # Intended only for test/development use. Not appropriate for
+ # production use.
+ TrustPrivateNetworks: false
Git:
# Path to git or gitolite-shell executable. Each authenticated
Repositories: /var/lib/arvados/git/repositories
TLS:
- # Use "file:///var/lib/acme/live/example.com/cert" and ".../key"
- # to load externally managed certificates.
+ # Use "file:///var/lib/acme/live/example.com/cert" and
+ # ".../privkey" to load externally managed certificates.
Certificate: ""
Key: ""
# troubleshooting purposes.
LogReuseDecisions: false
- # Default value for keep_cache_ram of a container's runtime_constraints.
- DefaultKeepCacheRAM: 268435456
+ # Default value for keep_cache_ram of a container's
+ # runtime_constraints. Note: this gets added to the RAM request
+ # used to allocate a VM or submit an HPC job.
+ #
+ # If this is zero, container requests that don't specify RAM or
+ # disk cache size will use a disk cache, sized to the
+ # container's RAM requirement (but with minimum 2 GiB and
+ # maximum 32 GiB).
+ #
+ # Note: If you change this value, containers that used the previous
+ # default value will only be reused by container requests that
+ # explicitly specify the previous value in their keep_cache_ram
+ # runtime constraint.
+ DefaultKeepCacheRAM: 0
# Number of times a container can be unlocked before being
# automatically cancelled.
# with the cancelled container.
MaxRetryAttempts: 3
- # The maximum number of compute nodes that can be in use simultaneously
- # If this limit is reduced, any existing nodes with slot number >= new limit
- # will not be counted against the new limit. In other words, the new limit
- # won't be strictly enforced until those nodes with higher slot numbers
- # go down.
- MaxComputeVMs: 64
-
# Schedule all child containers on preemptible instances (e.g. AWS
# Spot Instances) even if not requested by the submitter.
#
# cloud dispatcher for executing containers on worker VMs.
# Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
# and ends with "\n-----END RSA PRIVATE KEY-----\n".
+ #
+ # Use "file:///absolute/path/to/key" to load the key from a
+ # separate file instead of embedding it in the configuration
+ # file.
DispatchPrivateKey: ""
# Maximum time to wait for workers to come up before abandoning
# Extra RAM to reserve on the node, in addition to
# the amount specified in the container's RuntimeConstraints
- ReserveExtraRAM: 256MiB
+ ReserveExtraRAM: 550MiB
# Minimum time between two attempts to run the same container
MinRetryPeriod: 0s
LocalKeepLogsToContainerLog: none
Logging:
- # When you run the db:delete_old_container_logs task, it will find
- # containers that have been finished for at least this many seconds,
+ # Periodically (see SweepInterval) Arvados will check for
+ # containers that have been finished for at least this long,
# and delete their stdout, stderr, arv-mount, crunch-run, and
# crunchstat logs from the logs table.
MaxAge: 720h
+ # How often to delete cached log entries for finished
+ # containers (see MaxAge).
+ SweepInterval: 12h
+
# These two settings control how frequently log events are flushed to the
# database. Log lines are buffered until either crunch_log_bytes_per_event
# has been reached or crunch_log_seconds_between_events has elapsed since
# Maximum bytes that may be logged by a single job. Log bytes that are
# silenced by throttling are not counted against this total.
+ # If you set this to zero, each container will only create a single
+ # log on the API server, noting for users that logging is throttled.
LimitLogBytesPerJob: 67108864
LogPartialLineThrottlePeriod: 5s
# providers too, if desired.
MaxConcurrentInstanceCreateOps: 1
+ # The maximum number of instances to run at a time, or 0 for
+ # unlimited.
+ #
+ # If more instances than this are already running and busy
+ # when the dispatcher starts up, the running containers will
+ # be allowed to finish before the excess instances are shut
+ # down.
+ MaxInstances: 64
+
+ # Maximum fraction of CloudVMs.MaxInstances allowed to run
+ # "supervisor" containers at any given time. A supervisor is a
+ # container whose purpose is mainly to submit and manage other
+ # containers, such as arvados-cwl-runner workflow runner.
+ #
+ # If there is a hard limit on the amount of concurrent
+ # containers that the cluster can run, it is important to
+ # avoid crowding out the containers doing useful work with
+ # containers who just create more work.
+ #
+ # For example, with the default MaxInstances of 64, it will
+ # schedule at most floor(64*0.30) = 19 concurrent workflows,
+ # ensuring 45 slots are available for work.
+ SupervisorFraction: 0.30
+
# Interval between cloud provider syncs/updates ("list all
# instances").
SyncInterval: 1m
# https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
ImageID: ""
+ # Shell script to run on new instances using the cloud
+ # provider's UserData (EC2) or CustomData (Azure) feature.
+ #
+ # It is not necessary to include a #!/bin/sh line.
+ InstanceInitCommand: ""
+
# An executable file (located on the dispatcher host) to be
# copied to cloud instances at runtime and used as the
# container runner/supervisor. The default value is the
# version of crunch-run installed; see CrunchRunCommand above.
DeployRunnerBinary: "/proc/self/exe"
+ # Install the Dispatcher's SSH public key (derived from
+ # DispatchPrivateKey) when creating new cloud
+ # instances. Change this to false if you are using a different
+ # mechanism to pre-install the public key on new instances.
+ DeployPublicKey: true
+
# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
# InstanceType, IdleBehavior, and InstanceSecret -- will also
# the cloud dispatcher. Leave blank when not needed.
IAMInstanceProfile: ""
+ # (ec2) how often to look up spot instance pricing data
+ # (only while running spot instances) for the purpose of
+ # calculating container cost estimates. A value of 0
+ # disables spot price lookups entirely.
+ SpotPriceUpdateInterval: 24h
+
+ # (ec2) per-GiB-month cost of EBS volumes. Matches
+ # EBSVolumeType. Used to account for AddedScratch when
+ # calculating container cost estimates. Note that
+ # https://aws.amazon.com/ebs/pricing/ defines GB to mean
+ # GiB, so an advertised price $0.10/GB indicates a real
+ # price of $0.10/GiB and can be entered here as 0.10.
+ EBSPrice: 0.10
+
# (azure) Credentials.
SubscriptionID: ""
ClientID: ""
RAM: 128MiB
IncludedScratch: 16GB
AddedScratch: 0
+ # Hourly price ($), used to select node types for containers,
+ # and to calculate estimated container costs. For spot
+ # instances on EC2, this is also used as the maximum price
+ # when launching spot instances, while the estimated container
+ # cost is computed based on the current spot price according
+ # to AWS. On Azure, and on-demand instances on EC2, the price
+ # given here is used to compute container cost estimates.
Price: 0.1
Preemptible: false
# Include this section if the node type includes GPU (CUDA) support
ReadTimeout: 10m
RaceWindow: 24h
PrefixLength: 0
- # Use aws-s3-go (v2) instead of goamz
- UseAWSS3v2Driver: false
# For S3 driver, potentially unsafe tuning parameter,
# intentionally excluded from main documentation.
# This feature is disabled when set to zero.
IdleTimeout: 0s
+ # UUID of a collection. This collection should be shared with
+ # all users. Workbench will look for a file "banner.html" in
+ # this collection and display its contents (should be
+ # HTML-formatted text) when users first log in to Workbench.
+ BannerUUID: ""
+
# Workbench welcome screen, this is HTML text that will be
# incorporated directly onto the page.
WelcomePageHTML: |