X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aab9cf1b5d30c5e49152bff09dc1ee18d38973dc..92d6698f3c2d1c8109a9cf08f25da408c3a7b442:/lib/config/config.default.yml diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 7932c1df3d..b3e0d0657c 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -74,12 +74,6 @@ Clusters: Keepbalance: InternalURLs: {SAMPLE: {ListenURL: ""}} ExternalURL: "" - GitHTTP: - InternalURLs: {SAMPLE: {ListenURL: ""}} - ExternalURL: "" - GitSSH: - InternalURLs: {SAMPLE: {ListenURL: ""}} - ExternalURL: "" DispatchCloud: InternalURLs: {SAMPLE: {ListenURL: ""}} ExternalURL: "" @@ -223,10 +217,44 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 - # Maximum number of concurrent requests to accept in a single - # service process, or 0 for no limit. + # Maximum number of concurrent requests to process concurrently + # in a single service process, or 0 for no limit. + # + # Note this applies to all Arvados services (controller, webdav, + # websockets, etc.). Concurrency in the controller service is + # also effectively limited by MaxConcurrentRailsRequests (see + # below) because most controller requests proxy through to the + # RailsAPI service. + # + # HTTP proxies and load balancers downstream of arvados services + # should be configured to allow at least {MaxConcurrentRequest + + # MaxQueuedRequests + MaxGatewayTunnels} concurrent requests. MaxConcurrentRequests: 64 + # Maximum number of concurrent requests to process concurrently + # in a single RailsAPI service process, or 0 for no limit. + MaxConcurrentRailsRequests: 8 + + # Maximum number of incoming requests to hold in a priority + # queue waiting for one of the MaxConcurrentRequests slots to be + # free. When the queue is longer than this, respond 503 to the + # lowest priority request. + # + # If MaxQueuedRequests is 0, respond 503 immediately to + # additional requests while at the MaxConcurrentRequests limit. + MaxQueuedRequests: 128 + + # Maximum time a "lock container" request is allowed to wait in + # the incoming request queue before returning 503. + MaxQueueTimeForLockRequests: 2s + + # Maximum number of active gateway tunnel connections. One slot + # is consumed by each "container shell" connection. If using an + # HPC dispatcher (LSF or Slurm), one slot is consumed by each + # running container. These do not count toward + # MaxConcurrentRequests. + MaxGatewayTunnels: 1000 + # Fraction of MaxConcurrentRequests that can be "log create" # messages at any given time. This is to prevent logging # updates from crowding out more important requests. @@ -306,7 +334,6 @@ Clusters: # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup. AutoSetupNewUsers: false AutoSetupNewUsersWithVmUUID: "" - AutoSetupNewUsersWithRepository: false AutoSetupUsernameBlacklist: arvados: {} git: {} @@ -331,34 +358,59 @@ Clusters: # false. ActivatedUsersAreVisibleToOthers: true - # The e-mail address of the user you would like to become marked as an admin - # user on their first login. + # If a user creates an account with this email address, they + # will be automatically set to admin. AutoAdminUserWithEmail: "" # If AutoAdminFirstUser is set to true, the first user to log in when no # other admin users exist will automatically become an admin user. AutoAdminFirstUser: false - # Email address to notify whenever a user creates a profile for the - # first time + # Recipient for notification email sent out when a user sets a + # profile on their account. UserProfileNotificationAddress: "" + + # When sending a NewUser, NewInactiveUser, or UserProfile + # notification, this is the 'From' address to use AdminNotifierEmailFrom: arvados@example.com + + # Prefix for email subjects for NewUser and NewInactiveUser emails EmailSubjectPrefix: "[ARVADOS] " + + # When sending a welcome email to the user, the 'From' address to use UserNotifierEmailFrom: arvados@example.com - UserNotifierEmailBcc: {} - NewUserNotificationRecipients: {} - NewInactiveUserNotificationRecipients: {} + + # The welcome email sent to new users will be blind copied to + # these addresses. + UserNotifierEmailBcc: + SAMPLE: {} + + # Recipients for notification email sent out when a user account + # is created and already set up to be able to log in + NewUserNotificationRecipients: + SAMPLE: {} + + # Recipients for notification email sent out when a user account + # has been created but the user cannot log in until they are + # set up by an admin. + NewInactiveUserNotificationRecipients: + SAMPLE: {} # Set AnonymousUserToken to enable anonymous user access. Populate this # field with a random string at least 50 characters long. AnonymousUserToken: "" - # If a new user has an alternate email address (local@domain) - # with the domain given here, its local part becomes the new - # user's default username. Otherwise, the user's primary email - # address is used. + # The login provider for a user may supply a primary email + # address and one or more alternate email addresses. If a new + # user has an alternate email address with the domain given + # here, use the username from the alternate email to generate + # the user's Arvados username. Otherwise, the username from + # user's primary email address is used for the Arvados username. + # Currently implemented for OpenID Connect only. PreferDomainForUsername: "" + # Ruby ERB template used for the email sent out to users when + # they have been set up. UserSetupMailText: | <% if not @user.full_name.empty? -%> <%= @user.full_name %>, @@ -399,6 +451,48 @@ Clusters: # Use 0 to disable activity logging. ActivityLoggingPeriod: 24h + # The SyncUser* options control what system resources are managed by + # arvados-login-sync on shell nodes. They correspond to: + # * SyncUserAccounts: The user's Unix account on the shell node + # * SyncUserGroups: The group memberships of that account + # * SyncUserSSHKeys: Whether to authorize the user's Arvados SSH keys + # * SyncUserAPITokens: Whether to set up the user's Arvados API token + # All default to true. + SyncUserAccounts: true + SyncUserGroups: true + SyncUserSSHKeys: true + SyncUserAPITokens: true + + # If SyncUserGroups=true, then arvados-login-sync will ensure that all + # managed accounts are members of the Unix groups listed in + # SyncRequiredGroups, in addition to any groups listed in their Arvados + # login permission. The default list includes the "fuse" group so + # users can use arv-mount. You can require no groups by specifying an + # empty list (i.e., `SyncRequiredGroups: []`). + SyncRequiredGroups: + - fuse + + # SyncIgnoredGroups is a list of group names. arvados-login-sync will + # never modify these groups. If user login permissions list any groups + # in SyncIgnoredGroups, they will be ignored. If a user's Unix account + # belongs to any of these groups, arvados-login-sync will not remove + # the account from that group. The default is a set of particularly + # security-sensitive groups across Debian- and Red Hat-based + # distributions. + SyncIgnoredGroups: + - adm + - disk + - kmem + - mem + - root + - shadow + - staff + - sudo + - sys + - utempter + - utmp + - wheel + AuditLogs: # Time to keep audit logs, in seconds. (An audit log is a row added # to the "logs" table in the PostgreSQL database each time an @@ -573,6 +667,15 @@ Clusters: # once. BalanceUpdateLimit: 100000 + # Maximum number of "pull block from other server" and "trash + # block" requests to send to each keepstore server at a + # time. Smaller values use less memory in keepstore and + # keep-balance. Larger values allow more progress per + # keep-balance iteration. A zero value computes all of the + # needed changes but does not apply any. + BalancePullLimit: 100000 + BalanceTrashLimit: 100000 + # Default lifetime for ephemeral collections: 2 weeks. This must not # be less than BlobSigningTTL. DefaultTrashLifetime: 336h @@ -647,16 +750,18 @@ Clusters: # Time to cache manifests, permission checks, and sessions. TTL: 300s - # Block cache entries. Each block consumes up to 64 MiB RAM. - MaxBlockEntries: 20 + # Maximum amount of data cached in /var/cache/arvados/keep. + # Can be given as a percentage ("10%") or a number of bytes + # ("10 GiB") + DiskCacheSize: 10% # Approximate memory limit (in bytes) for session cache. # # Note this applies to the in-memory representation of # projects and collections -- metadata, block locators, - # filenames, etc. -- excluding cached file content, which is - # limited by MaxBlockEntries. - MaxCollectionBytes: 100000000 + # filenames, etc. -- not the file data itself (see + # DiskCacheSize). + MaxCollectionBytes: 100 MB # Persistent sessions. MaxSessions: 100 @@ -689,6 +794,14 @@ Clusters: # load on the API server and you don't need it. WebDAVLogEvents: true + # Per-connection output buffer for WebDAV downloads. May improve + # throughput for large files, particularly when storage volumes + # have high latency. + # + # Size be specified as a number of bytes ("0") or with units + # ("128KiB", "1 MB"). + WebDAVOutputBuffer: 0 + Login: # One of the following mechanisms (Google, PAM, LDAP, or # LoginCluster) should be enabled; see @@ -751,7 +864,7 @@ Clusters: # OpenID claim field containing the email verification # flag. Normally "email_verified". To accept every returned # email address without checking a "verified" field at all, - # use the empty string "". + # use an empty string "". EmailVerifiedClaim: "email_verified" # OpenID claim field containing the user's preferred @@ -938,24 +1051,6 @@ Clusters: # production use. TrustPrivateNetworks: false - Git: - # Path to git or gitolite-shell executable. Each authenticated - # request will execute this program with the single argument "http-backend" - GitCommand: /usr/bin/git - - # Path to Gitolite's home directory. If a non-empty path is given, - # the CGI environment will be set up to support the use of - # gitolite-shell as a GitCommand: for example, if GitoliteHome is - # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh, - # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1. - GitoliteHome: "" - - # Git repositories must be readable by api server, or you won't be - # able to submit crunch jobs. To pass the test suites, put a clone - # of the arvados tree in {git_repositories_dir}/arvados.git or - # {git_repositories_dir}/arvados/.git - Repositories: /var/lib/arvados/git/repositories - TLS: # Use "file:///var/lib/acme/live/example.com/cert" and # ".../privkey" to load externally managed certificates. @@ -1019,7 +1114,7 @@ Clusters: # Number of times a container can be unlocked before being # automatically cancelled. - MaxDispatchAttempts: 5 + MaxDispatchAttempts: 10 # Default value for container_count_max for container requests. This is the # number of times Arvados will create a new container to satisfy a container @@ -1047,10 +1142,25 @@ Clusters: # A price factor of 1.0 is a reasonable starting point. PreemptiblePriceFactor: 0 + # When the lowest-priced instance type for a given container is + # not available, try other instance types, up to the indicated + # maximum price factor. + # + # For example, with AvailabilityPriceFactor 1.5, if the + # lowest-cost instance type A suitable for a given container + # costs $2/h, Arvados may run the container on any instance type + # B costing $3/h or less when instance type A is not available + # or an idle instance of type B is already running. + MaximumPriceFactor: 1.5 + # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the # cloud dispatcher for executing containers on worker VMs. # Begins with "-----BEGIN RSA PRIVATE KEY-----\n" # and ends with "\n-----END RSA PRIVATE KEY-----\n". + # + # Use "file:///absolute/path/to/key" to load the key from a + # separate file instead of embedding it in the configuration + # file. DispatchPrivateKey: "" # Maximum time to wait for workers to come up before abandoning @@ -1135,32 +1245,6 @@ Clusters: # containers (see MaxAge). SweepInterval: 12h - # These two settings control how frequently log events are flushed to the - # database. Log lines are buffered until either crunch_log_bytes_per_event - # has been reached or crunch_log_seconds_between_events has elapsed since - # the last flush. - LogBytesPerEvent: 4096 - LogSecondsBetweenEvents: 5s - - # The sample period for throttling logs. - LogThrottlePeriod: 60s - - # Maximum number of bytes that job can log over crunch_log_throttle_period - # before being silenced until the end of the period. - LogThrottleBytes: 65536 - - # Maximum number of lines that job can log over crunch_log_throttle_period - # before being silenced until the end of the period. - LogThrottleLines: 1024 - - # Maximum bytes that may be logged by a single job. Log bytes that are - # silenced by throttling are not counted against this total. - # If you set this to zero, each container will only create a single - # log on the API server, noting for users that logging is throttled. - LimitLogBytesPerJob: 67108864 - - LogPartialLineThrottlePeriod: 5s - # Container logs are written to Keep and saved in a # collection, which is updated periodically while the # container runs. This value sets the interval between @@ -1197,47 +1281,6 @@ Clusters: SbatchArgumentsList: [] SbatchEnvironmentVariables: SAMPLE: "" - Managed: - # Path to dns server configuration directory - # (e.g. /etc/unbound.d/conf.d). If false, do not write any config - # files or touch restart.txt (see below). - DNSServerConfDir: "" - - # Template file for the dns server host snippets. See - # unbound.template in this directory for an example. If false, do - # not write any config files. - DNSServerConfTemplate: "" - - # String to write to {dns_server_conf_dir}/restart.txt (with a - # trailing newline) after updating local data. If false, do not - # open or write the restart.txt file. - DNSServerReloadCommand: "" - - # Command to run after each DNS update. Template variables will be - # substituted; see the "unbound" example below. If false, do not run - # a command. - DNSServerUpdateCommand: "" - - ComputeNodeDomain: "" - ComputeNodeNameservers: - "192.168.1.1": {} - SAMPLE: {} - - # Hostname to assign to a compute node when it sends a "ping" and the - # hostname in its Node record is nil. - # During bootstrapping, the "ping" script is expected to notice the - # hostname given in the ping response, and update its unix hostname - # accordingly. - # If false, leave the hostname alone (this is appropriate if your compute - # nodes' hostnames are already assigned by some other mechanism). - # - # One way or another, the hostnames of your node records should agree - # with your DNS records and your /etc/slurm-llnl/slurm.conf files. - # - # Example for compute0000, compute0001, ....: - # assign_node_hostname: compute%04d - # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.) - AssignNodeHostname: "compute%d" LSF: # Arguments to bsub when submitting Arvados containers as LSF jobs. @@ -1249,15 +1292,23 @@ Clusters: # %M memory in MB # %T tmp in MB # %G number of GPU devices (runtime_constraints.cuda.device_count) + # %W maximum run time in minutes (see MaxRunTimeOverhead and + # MaxRunTimeDefault below) # - # Use %% to express a literal %. The %%J in the default will be changed - # to %J, which is interpreted by bsub itself. + # Use %% to express a literal %. For example, the %%J in the + # default argument list will be changed to %J, which is + # interpreted by bsub itself. # # Note that the default arguments cause LSF to write two files # in /tmp on the compute node each time an Arvados container # runs. Ensure you have something in place to delete old files # from /tmp, or adjust the "-o" and "-e" arguments accordingly. - BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"] + # + # If ["-We", "%W"] or ["-W", "%W"] appear in this argument + # list, and MaxRunTimeDefault is not set (see below), both of + # those arguments will be dropped from the argument list when + # running a container that has no max_run_time value. + BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"] # Arguments that will be appended to the bsub command line # when submitting Arvados containers as LSF jobs with @@ -1272,6 +1323,19 @@ Clusters: # Arvados LSF dispatcher runs ("submission host"). BsubSudoUser: "crunch" + # When passing the scheduling_constraints.max_run_time value + # to LSF via "%W", add this much time to account for + # crunch-run startup/shutdown overhead. + MaxRunTimeOverhead: 5m + + # If non-zero, MaxRunTimeDefault is used as the default value + # for max_run_time for containers that do not specify a time + # limit. MaxRunTimeOverhead will be added to this. + # + # Example: + # MaxRunTimeDefault: 2h + MaxRunTimeDefault: 0 + JobsAPI: # Enable the legacy 'jobs' API (crunch v1). This value must be a string. # @@ -1284,12 +1348,6 @@ Clusters: # 'false' -- disable the Jobs API despite presence of existing records. Enable: 'auto' - # Git repositories must be readable by api server, or you won't be - # able to submit crunch jobs. To pass the test suites, put a clone - # of the arvados tree in {git_repositories_dir}/arvados.git or - # {git_repositories_dir}/arvados/.git - GitInternalDir: /var/lib/arvados/internal.git - CloudVMs: # Enable the cloud scheduler. Enable: false @@ -1354,10 +1412,31 @@ Clusters: # down. MaxInstances: 64 - # Maximum fraction of CloudVMs.MaxInstances allowed to run - # "supervisor" containers at any given time. A supervisor is a - # container whose purpose is mainly to submit and manage other - # containers, such as arvados-cwl-runner workflow runner. + # The minimum number of instances expected to be runnable + # without reaching a provider-imposed quota. + # + # This is used as the initial value for the dispatcher's + # dynamic instance limit, which increases (up to MaxInstances) + # as containers start up successfully and decreases in + # response to high API load and cloud quota errors. + # + # Setting this to 0 means the dynamic instance limit will + # start at MaxInstances. + # + # Situations where you may want to set this (to a value less + # than MaxInstances) would be when there is significant + # variability or uncertainty in the actual cloud resources + # available. Upon reaching InitialQuotaEstimate the + # dispatcher will switch to a more conservative behavior with + # slower instance start to avoid over-shooting cloud resource + # limits. + InitialQuotaEstimate: 0 + + # Maximum fraction of available instance capacity allowed to + # run "supervisor" containers at any given time. A supervisor + # is a container whose purpose is mainly to submit and manage + # other containers, such as arvados-cwl-runner workflow + # runner. # # If there is a hard limit on the amount of concurrent # containers that the cluster can run, it is important to @@ -1365,9 +1444,9 @@ Clusters: # containers who just create more work. # # For example, with the default MaxInstances of 64, it will - # schedule at most floor(64*0.30) = 19 concurrent workflows, - # ensuring 45 slots are available for work. - SupervisorFraction: 0.30 + # schedule at most floor(64*0.50) = 32 concurrent workflow + # runners, ensuring 32 slots are available for work. + SupervisorFraction: 0.50 # Interval between cloud provider syncs/updates ("list all # instances"). @@ -1410,7 +1489,7 @@ Clusters: # container runner/supervisor. The default value is the # dispatcher program itself. # - # Use the empty string to disable this step: nothing will be + # Use an empty string to disable this step: nothing will be # copied, and cloud instances are assumed to have a suitable # version of crunch-run installed; see CrunchRunCommand above. DeployRunnerBinary: "/proc/self/exe" @@ -1452,10 +1531,23 @@ Clusters: SecretAccessKey: "" # (ec2) Instance configuration. + + # (ec2) Region, like "us-east-1". + Region: "" + + # (ec2) Security group IDs. Omit or use {} to use the + # default security group. SecurityGroupIDs: "SAMPLE": {} + + # (ec2) One or more subnet IDs. Omit or leave empty to let + # AWS choose a default subnet from your default VPC. If + # multiple subnets are configured here (enclosed in brackets + # like [subnet-abc123, subnet-def456]) the cloud dispatcher + # will detect subnet-related errors and retry using a + # different subnet. Most sites specify one subnet. SubnetID: "" - Region: "" + EBSVolumeType: gp2 AdminUsername: debian # (ec2) name of the IAMInstanceProfile for instances started by @@ -1588,6 +1680,11 @@ Clusters: ReadOnly: false "http://host1.example:25107": {} ReadOnly: false + # AllowTrashWhenReadOnly enables unused and overreplicated + # blocks to be trashed/deleted even when ReadOnly is + # true. Normally, this is false and ReadOnly prevents all + # trash/delete operations as well as writes. + AllowTrashWhenReadOnly: false Replication: 1 StorageClasses: # If you have configured storage classes (see StorageClasses @@ -1656,8 +1753,18 @@ Clusters: Serialize: false Mail: - MailchimpAPIKey: "" - MailchimpListID: "" + # In order to send mail, Arvados expects a default SMTP server + # on localhost:25. It cannot require authentication on + # connections from localhost. That server should be configured + # to relay mail to a "real" SMTP server that is able to send + # email on behalf of your domain. + + # See also the "Users" configuration section for additional + # email-related options. + + # When a user has been set up (meaning they are able to log in) + # they will receive an email using the template specified + # earlier in Users.UserSetupMailText SendUserSetupNotificationEmail: true # Bug/issue report notification to and from addresses @@ -1667,6 +1774,10 @@ Clusters: # Generic issue email from EmailFrom: "arvados@example.com" + + # No longer supported, to be removed. + MailchimpAPIKey: "" + MailchimpListID: "" RemoteClusters: "*": Host: "" @@ -1700,18 +1811,12 @@ Clusters: ArvadosDocsite: https://doc.arvados.org ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public ShowUserAgreementInline: false - SecretKeyBase: "" # Set this configuration to true to avoid providing an easy way for users # to share data with unauthenticated users; this may be necessary on # installations where strict data access controls are needed. DisableSharingURLsUI: false - # Scratch directory used by the remote repository browsing - # feature. If it doesn't exist, it (and any missing parents) will be - # created using mkdir_p. - RepositoryCache: /var/www/arvados-workbench/current/tmp/git - # Below is a sample setting of user_profile_form_fields config parameter. # This configuration parameter should be set to either false (to disable) or # to a map as shown below. @@ -1758,71 +1863,7 @@ Clusters: # to display on the profile page. UserProfileFormMessage: 'Welcome to Arvados. All required fields must be completed before you can proceed.' - # Mimetypes of applications for which the view icon - # would be enabled in a collection's show page. - # It is sufficient to list only applications here. - # No need to list text and image types. - ApplicationMimetypesWithViewIcon: - cwl: {} - fasta: {} - go: {} - javascript: {} - json: {} - pdf: {} - python: {} - x-python: {} - r: {} - rtf: {} - sam: {} - x-sh: {} - vnd.realvnc.bed: {} - xml: {} - xsl: {} - SAMPLE: {} - - # The maximum number of bytes to load in the log viewer - LogViewerMaxBytes: 1M - - # When anonymous_user_token is configured, show public projects page - EnablePublicProjectsPage: true - - # By default, disable the "Getting Started" popup which is specific to Arvados playground - EnableGettingStartedPopup: false - - # Ask Arvados API server to compress its response payloads. - APIResponseCompression: true - - # Timeouts for API requests. - APIClientConnectTimeout: 2m - APIClientReceiveTimeout: 5m - - # Maximum number of historic log records of a running job to fetch - # and display in the Log tab, while subscribing to web sockets. - RunningJobLogRecordsToFetch: 2000 - - # In systems with many shared projects, loading of dashboard and topnav - # can be slow due to collections indexing; use the following parameters - # to suppress these properties - ShowRecentCollectionsOnDashboard: true - ShowUserNotifications: true - - # Enable/disable "multi-site search" in top nav ("true"/"false"), or - # a link to the multi-site search page on a "home" Workbench site. - # - # Example: - # https://workbench.zzzzz.arvadosapi.com/collections/multisite - MultiSiteSearch: "" - - # Should workbench allow management of local git repositories? Set to false if - # the jobs api is disabled and there are no local git repositories. - Repositories: true - SiteName: Arvados Workbench - ProfilingEnabled: false - - # This is related to obsolete Google OpenID 1.0 login - # but some workbench stuff still expects it to be set. - DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id" # Workbench2 configs FileViewersConfigURL: ""