Merge branch '21834-stdout-timestamps'

[arvados.git] / lib / config / config.default.yml
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 4e5c051602a994070df7d99adc4cce4acdec314d..f84869d7fbee46c41646b787728423e8c827abf1 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -74,12 +74,6 @@ Clusters:
        Keepbalance:
          InternalURLs: {SAMPLE: {ListenURL: ""}}
          ExternalURL: ""
-      GitHTTP:
-        InternalURLs: {SAMPLE: {ListenURL: ""}}
-        ExternalURL: ""
-      GitSSH:
-        InternalURLs: {SAMPLE: {ListenURL: ""}}
-        ExternalURL: ""
        DispatchCloud:
          InternalURLs: {SAMPLE: {ListenURL: ""}}
          ExternalURL: ""
@@ -225,7 +219,21 @@ Clusters:
  
        # Maximum number of concurrent requests to process concurrently
        # in a single service process, or 0 for no limit.
-      MaxConcurrentRequests: 8
+      #
+      # Note this applies to all Arvados services (controller, webdav,
+      # websockets, etc.). Concurrency in the controller service is
+      # also effectively limited by MaxConcurrentRailsRequests (see
+      # below) because most controller requests proxy through to the
+      # RailsAPI service.
+      #
+      # HTTP proxies and load balancers downstream of arvados services
+      # should be configured to allow at least {MaxConcurrentRequest +
+      # MaxQueuedRequests + MaxGatewayTunnels} concurrent requests.
+      MaxConcurrentRequests: 64
+
+      # Maximum number of concurrent requests to process concurrently
+      # in a single RailsAPI service process, or 0 for no limit.
+      MaxConcurrentRailsRequests: 8
  
        # Maximum number of incoming requests to hold in a priority
        # queue waiting for one of the MaxConcurrentRequests slots to be
@@ -240,6 +248,13 @@ Clusters:
        # the incoming request queue before returning 503.
        MaxQueueTimeForLockRequests: 2s
  
+      # Maximum number of active gateway tunnel connections. One slot
+      # is consumed by each "container shell" connection. If using an
+      # HPC dispatcher (LSF or Slurm), one slot is consumed by each
+      # running container.  These do not count toward
+      # MaxConcurrentRequests.
+      MaxGatewayTunnels: 1000
+
        # Fraction of MaxConcurrentRequests that can be "log create"
        # messages at any given time.  This is to prevent logging
        # updates from crowding out more important requests.
@@ -319,7 +334,6 @@ Clusters:
        # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
        AutoSetupNewUsers: false
        AutoSetupNewUsersWithVmUUID: ""
-      AutoSetupNewUsersWithRepository: false
        AutoSetupUsernameBlacklist:
          arvados: {}
          git: {}
@@ -344,34 +358,59 @@ Clusters:
        # false.
        ActivatedUsersAreVisibleToOthers: true
  
-      # The e-mail address of the user you would like to become marked as an admin
-      # user on their first login.
+      # If a user creates an account with this email address, they
+      # will be automatically set to admin.
        AutoAdminUserWithEmail: ""
  
        # If AutoAdminFirstUser is set to true, the first user to log in when no
        # other admin users exist will automatically become an admin user.
        AutoAdminFirstUser: false
  
-      # Email address to notify whenever a user creates a profile for the
-      # first time
+      # Recipient for notification email sent out when a user sets a
+      # profile on their account.
        UserProfileNotificationAddress: ""
+
+      # When sending a NewUser, NewInactiveUser, or UserProfile
+      # notification, this is the 'From' address to use
        AdminNotifierEmailFrom: arvados@example.com
+
+      # Prefix for email subjects for NewUser and NewInactiveUser emails
        EmailSubjectPrefix: "[ARVADOS] "
+
+      # When sending a welcome email to the user, the 'From' address to use
        UserNotifierEmailFrom: arvados@example.com
-      UserNotifierEmailBcc: {}
-      NewUserNotificationRecipients: {}
-      NewInactiveUserNotificationRecipients: {}
+
+      # The welcome email sent to new users will be blind copied to
+      # these addresses.
+      UserNotifierEmailBcc:
+        SAMPLE: {}
+
+      # Recipients for notification email sent out when a user account
+      # is created and already set up to be able to log in
+      NewUserNotificationRecipients:
+        SAMPLE: {}
+
+      # Recipients for notification email sent out when a user account
+      # has been created but the user cannot log in until they are
+      # set up by an admin.
+      NewInactiveUserNotificationRecipients:
+        SAMPLE: {}
  
        # Set AnonymousUserToken to enable anonymous user access. Populate this
        # field with a random string at least 50 characters long.
        AnonymousUserToken: ""
  
-      # If a new user has an alternate email address (local@domain)
-      # with the domain given here, its local part becomes the new
-      # user's default username. Otherwise, the user's primary email
-      # address is used.
+      # The login provider for a user may supply a primary email
+      # address and one or more alternate email addresses.  If a new
+      # user has an alternate email address with the domain given
+      # here, use the username from the alternate email to generate
+      # the user's Arvados username. Otherwise, the username from
+      # user's primary email address is used for the Arvados username.
+      # Currently implemented for OpenID Connect only.
        PreferDomainForUsername: ""
  
+      # Ruby ERB template used for the email sent out to users when
+      # they have been set up.
        UserSetupMailText: |
          <% if not @user.full_name.empty? -%>
          <%= @user.full_name %>,
@@ -628,6 +667,15 @@ Clusters:
        # once.
        BalanceUpdateLimit: 100000
  
+      # Maximum number of "pull block from other server" and "trash
+      # block" requests to send to each keepstore server at a
+      # time. Smaller values use less memory in keepstore and
+      # keep-balance. Larger values allow more progress per
+      # keep-balance iteration. A zero value computes all of the
+      # needed changes but does not apply any.
+      BalancePullLimit: 100000
+      BalanceTrashLimit: 100000
+
        # Default lifetime for ephemeral collections: 2 weeks. This must not
        # be less than BlobSigningTTL.
        DefaultTrashLifetime: 336h
@@ -702,16 +750,18 @@ Clusters:
          # Time to cache manifests, permission checks, and sessions.
          TTL: 300s
  
-        # Block cache entries. Each block consumes up to 64 MiB RAM.
-        MaxBlockEntries: 20
+        # Maximum amount of data cached in /var/cache/arvados/keep.
+        # Can be given as a percentage of filesystem size ("10%") or a
+        # number of bytes ("10 GiB")
+        DiskCacheSize: 10%
  
          # Approximate memory limit (in bytes) for session cache.
          #
          # Note this applies to the in-memory representation of
          # projects and collections -- metadata, block locators,
-        # filenames, etc. -- excluding cached file content, which is
-        # limited by MaxBlockEntries.
-        MaxCollectionBytes: 100000000
+        # filenames, etc. -- not the file data itself (see
+        # DiskCacheSize).
+        MaxCollectionBytes: 100 MB
  
          # Persistent sessions.
          MaxSessions: 100
@@ -744,6 +794,14 @@ Clusters:
        # load on the API server and you don't need it.
        WebDAVLogEvents: true
  
+      # Per-connection output buffer for WebDAV downloads. May improve
+      # throughput for large files, particularly when storage volumes
+      # have high latency.
+      #
+      # Size be specified as a number of bytes ("0") or with units
+      # ("128KiB", "1 MB").
+      WebDAVOutputBuffer: 0
+
      Login:
        # One of the following mechanisms (Google, PAM, LDAP, or
        # LoginCluster) should be enabled; see
@@ -806,7 +864,7 @@ Clusters:
          # OpenID claim field containing the email verification
          # flag. Normally "email_verified".  To accept every returned
          # email address without checking a "verified" field at all,
-        # use the empty string "".
+        # use an empty string "".
          EmailVerifiedClaim: "email_verified"
  
          # OpenID claim field containing the user's preferred
@@ -993,24 +1051,6 @@ Clusters:
        # production use.
        TrustPrivateNetworks: false
  
-    Git:
-      # Path to git or gitolite-shell executable. Each authenticated
-      # request will execute this program with the single argument "http-backend"
-      GitCommand: /usr/bin/git
-
-      # Path to Gitolite's home directory. If a non-empty path is given,
-      # the CGI environment will be set up to support the use of
-      # gitolite-shell as a GitCommand: for example, if GitoliteHome is
-      # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
-      # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
-      GitoliteHome: ""
-
-      # Git repositories must be readable by api server, or you won't be
-      # able to submit crunch jobs. To pass the test suites, put a clone
-      # of the arvados tree in {git_repositories_dir}/arvados.git or
-      # {git_repositories_dir}/arvados/.git
-      Repositories: /var/lib/arvados/git/repositories
-
      TLS:
        # Use "file:///var/lib/acme/live/example.com/cert" and
        # ".../privkey" to load externally managed certificates.
@@ -1102,6 +1142,17 @@ Clusters:
        # A price factor of 1.0 is a reasonable starting point.
        PreemptiblePriceFactor: 0
  
+      # When the lowest-priced instance type for a given container is
+      # not available, try other instance types, up to the indicated
+      # maximum price factor.
+      #
+      # For example, with AvailabilityPriceFactor 1.5, if the
+      # lowest-cost instance type A suitable for a given container
+      # costs $2/h, Arvados may run the container on any instance type
+      # B costing $3/h or less when instance type A is not available
+      # or an idle instance of type B is already running.
+      MaximumPriceFactor: 1.5
+
        # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
        # cloud dispatcher for executing containers on worker VMs.
        # Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
@@ -1184,42 +1235,6 @@ Clusters:
        LocalKeepLogsToContainerLog: none
  
        Logging:
-        # Periodically (see SweepInterval) Arvados will check for
-        # containers that have been finished for at least this long,
-        # and delete their stdout, stderr, arv-mount, crunch-run, and
-        # crunchstat logs from the logs table.
-        MaxAge: 720h
-
-        # How often to delete cached log entries for finished
-        # containers (see MaxAge).
-        SweepInterval: 12h
-
-        # These two settings control how frequently log events are flushed to the
-        # database.  Log lines are buffered until either crunch_log_bytes_per_event
-        # has been reached or crunch_log_seconds_between_events has elapsed since
-        # the last flush.
-        LogBytesPerEvent: 4096
-        LogSecondsBetweenEvents: 5s
-
-        # The sample period for throttling logs.
-        LogThrottlePeriod: 60s
-
-        # Maximum number of bytes that job can log over crunch_log_throttle_period
-        # before being silenced until the end of the period.
-        LogThrottleBytes: 65536
-
-        # Maximum number of lines that job can log over crunch_log_throttle_period
-        # before being silenced until the end of the period.
-        LogThrottleLines: 1024
-
-        # Maximum bytes that may be logged by a single job.  Log bytes that are
-        # silenced by throttling are not counted against this total.
-        # If you set this to zero, each container will only create a single
-        # log on the API server, noting for users that logging is throttled.
-        LimitLogBytesPerJob: 67108864
-
-        LogPartialLineThrottlePeriod: 5s
-
          # Container logs are written to Keep and saved in a
          # collection, which is updated periodically while the
          # container runs.  This value sets the interval between
@@ -1256,47 +1271,6 @@ Clusters:
          SbatchArgumentsList: []
          SbatchEnvironmentVariables:
            SAMPLE: ""
-        Managed:
-          # Path to dns server configuration directory
-          # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
-          # files or touch restart.txt (see below).
-          DNSServerConfDir: ""
-
-          # Template file for the dns server host snippets. See
-          # unbound.template in this directory for an example. If false, do
-          # not write any config files.
-          DNSServerConfTemplate: ""
-
-          # String to write to {dns_server_conf_dir}/restart.txt (with a
-          # trailing newline) after updating local data. If false, do not
-          # open or write the restart.txt file.
-          DNSServerReloadCommand: ""
-
-          # Command to run after each DNS update. Template variables will be
-          # substituted; see the "unbound" example below. If false, do not run
-          # a command.
-          DNSServerUpdateCommand: ""
-
-          ComputeNodeDomain: ""
-          ComputeNodeNameservers:
-            "192.168.1.1": {}
-            SAMPLE: {}
-
-          # Hostname to assign to a compute node when it sends a "ping" and the
-          # hostname in its Node record is nil.
-          # During bootstrapping, the "ping" script is expected to notice the
-          # hostname given in the ping response, and update its unix hostname
-          # accordingly.
-          # If false, leave the hostname alone (this is appropriate if your compute
-          # nodes' hostnames are already assigned by some other mechanism).
-          #
-          # One way or another, the hostnames of your node records should agree
-          # with your DNS records and your /etc/slurm-llnl/slurm.conf files.
-          #
-          # Example for compute0000, compute0001, ....:
-          # assign_node_hostname: compute%<slot_number>04d
-          # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
-          AssignNodeHostname: "compute%<slot_number>d"
  
        LSF:
          # Arguments to bsub when submitting Arvados containers as LSF jobs.
@@ -1308,15 +1282,23 @@ Clusters:
          # %M memory in MB
          # %T tmp in MB
          # %G number of GPU devices (runtime_constraints.cuda.device_count)
+        # %W maximum run time in minutes (see MaxRunTimeOverhead and
+        #    MaxRunTimeDefault below)
          #
-        # Use %% to express a literal %. The %%J in the default will be changed
-        # to %J, which is interpreted by bsub itself.
+        # Use %% to express a literal %. For example, the %%J in the
+        # default argument list will be changed to %J, which is
+        # interpreted by bsub itself.
          #
          # Note that the default arguments cause LSF to write two files
          # in /tmp on the compute node each time an Arvados container
          # runs. Ensure you have something in place to delete old files
          # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
+        #
+        # If ["-We", "%W"] or ["-W", "%W"] appear in this argument
+        # list, and MaxRunTimeDefault is not set (see below), both of
+        # those arguments will be dropped from the argument list when
+        # running a container that has no max_run_time value.
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]
  
          # Arguments that will be appended to the bsub command line
          # when submitting Arvados containers as LSF jobs with
@@ -1331,6 +1313,19 @@ Clusters:
          # Arvados LSF dispatcher runs ("submission host").
          BsubSudoUser: "crunch"
  
+        # When passing the scheduling_constraints.max_run_time value
+        # to LSF via "%W", add this much time to account for
+        # crunch-run startup/shutdown overhead.
+        MaxRunTimeOverhead: 5m
+
+        # If non-zero, MaxRunTimeDefault is used as the default value
+        # for max_run_time for containers that do not specify a time
+        # limit.  MaxRunTimeOverhead will be added to this.
+        #
+        # Example:
+        # MaxRunTimeDefault: 2h
+        MaxRunTimeDefault: 0
+
        JobsAPI:
          # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
          #
@@ -1343,12 +1338,6 @@ Clusters:
          # 'false' -- disable the Jobs API despite presence of existing records.
          Enable: 'auto'
  
-        # Git repositories must be readable by api server, or you won't be
-        # able to submit crunch jobs. To pass the test suites, put a clone
-        # of the arvados tree in {git_repositories_dir}/arvados.git or
-        # {git_repositories_dir}/arvados/.git
-        GitInternalDir: /var/lib/arvados/internal.git
-
        CloudVMs:
          # Enable the cloud scheduler.
          Enable: false
@@ -1421,16 +1410,17 @@ Clusters:
          # as containers start up successfully and decreases in
          # response to high API load and cloud quota errors.
          #
-        # Setting this too high creates a risk that the dispatcher
-        # will cause deadlock by starting so many supervisor
-        # containers (based on SupervisorFraction and MaxInstances)
-        # that the cloud quota prevents them from running any child
-        # containers.
+        # Setting this to 0 means the dynamic instance limit will
+        # start at MaxInstances.
          #
-        # Setting this too low causes the dispatcher to be
-        # unnecessarily slow to start up new instances after a
-        # restart.
-        InitialQuotaEstimate: 16
+        # Situations where you may want to set this (to a value less
+        # than MaxInstances) would be when there is significant
+        # variability or uncertainty in the actual cloud resources
+        # available.  Upon reaching InitialQuotaEstimate the
+        # dispatcher will switch to a more conservative behavior with
+        # slower instance start to avoid over-shooting cloud resource
+        # limits.
+        InitialQuotaEstimate: 0
  
          # Maximum fraction of available instance capacity allowed to
          # run "supervisor" containers at any given time. A supervisor
@@ -1444,9 +1434,9 @@ Clusters:
          # containers who just create more work.
          #
          # For example, with the default MaxInstances of 64, it will
-        # schedule at most floor(64*0.30) = 19 concurrent workflows,
-        # ensuring 45 slots are available for work.
-        SupervisorFraction: 0.30
+        # schedule at most floor(64*0.50) = 32 concurrent workflow
+        # runners, ensuring 32 slots are available for work.
+        SupervisorFraction: 0.50
  
          # Interval between cloud provider syncs/updates ("list all
          # instances").
@@ -1489,7 +1479,7 @@ Clusters:
          # container runner/supervisor. The default value is the
          # dispatcher program itself.
          #
-        # Use the empty string to disable this step: nothing will be
+        # Use an empty string to disable this step: nothing will be
          # copied, and cloud instances are assumed to have a suitable
          # version of crunch-run installed; see CrunchRunCommand above.
          DeployRunnerBinary: "/proc/self/exe"
@@ -1531,10 +1521,23 @@ Clusters:
            SecretAccessKey: ""
  
            # (ec2) Instance configuration.
+
+          # (ec2) Region, like "us-east-1".
+          Region: ""
+
+          # (ec2) Security group IDs. Omit or use {} to use the
+          # default security group.
            SecurityGroupIDs:
              "SAMPLE": {}
+
+          # (ec2) One or more subnet IDs. Omit or leave empty to let
+          # AWS choose a default subnet from your default VPC. If
+          # multiple subnets are configured here (enclosed in brackets
+          # like [subnet-abc123, subnet-def456]) the cloud dispatcher
+          # will detect subnet-related errors and retry using a
+          # different subnet. Most sites specify one subnet.
            SubnetID: ""
-          Region: ""
+
            EBSVolumeType: gp2
            AdminUsername: debian
            # (ec2) name of the IAMInstanceProfile for instances started by
@@ -1667,6 +1670,11 @@ Clusters:
              ReadOnly: false
            "http://host1.example:25107": {}
          ReadOnly: false
+        # AllowTrashWhenReadOnly enables unused and overreplicated
+        # blocks to be trashed/deleted even when ReadOnly is
+        # true. Normally, this is false and ReadOnly prevents all
+        # trash/delete operations as well as writes.
+        AllowTrashWhenReadOnly: false
          Replication: 1
          StorageClasses:
            # If you have configured storage classes (see StorageClasses
@@ -1677,7 +1685,6 @@ Clusters:
          DriverParameters:
            # for s3 driver -- see
            # https://doc.arvados.org/install/configure-s3-object-storage.html
-          IAMRole: aaaaa
            AccessKeyID: aaaaa
            SecretAccessKey: aaaaa
            Endpoint: ""
@@ -1735,8 +1742,18 @@ Clusters:
            Serialize: false
  
      Mail:
-      MailchimpAPIKey: ""
-      MailchimpListID: ""
+      # In order to send mail, Arvados expects a default SMTP server
+      # on localhost:25.  It cannot require authentication on
+      # connections from localhost.  That server should be configured
+      # to relay mail to a "real" SMTP server that is able to send
+      # email on behalf of your domain.
+
+      # See also the "Users" configuration section for additional
+      # email-related options.
+
+      # When a user has been set up (meaning they are able to log in)
+      # they will receive an email using the template specified
+      # earlier in Users.UserSetupMailText
        SendUserSetupNotificationEmail: true
  
        # Bug/issue report notification to and from addresses
@@ -1746,6 +1763,10 @@ Clusters:
  
        # Generic issue email from
        EmailFrom: "arvados@example.com"
+
+      # No longer supported, to be removed.
+      MailchimpAPIKey: ""
+      MailchimpListID: ""
      RemoteClusters:
        "*":
          Host: ""
@@ -1779,18 +1800,12 @@ Clusters:
        ArvadosDocsite: https://doc.arvados.org
        ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
        ShowUserAgreementInline: false
-      SecretKeyBase: ""
  
        # Set this configuration to true to avoid providing an easy way for users
        # to share data with unauthenticated users; this may be necessary on
        # installations where strict data access controls are needed.
        DisableSharingURLsUI: false
  
-      # Scratch directory used by the remote repository browsing
-      # feature. If it doesn't exist, it (and any missing parents) will be
-      # created using mkdir_p.
-      RepositoryCache: /var/www/arvados-workbench/current/tmp/git
-
        # Below is a sample setting of user_profile_form_fields config parameter.
        # This configuration parameter should be set to either false (to disable) or
        # to a map as shown below.
@@ -1837,71 +1852,7 @@ Clusters:
        # to display on the profile page.
        UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'
  
-      # Mimetypes of applications for which the view icon
-      # would be enabled in a collection's show page.
-      # It is sufficient to list only applications here.
-      # No need to list text and image types.
-      ApplicationMimetypesWithViewIcon:
-        cwl: {}
-        fasta: {}
-        go: {}
-        javascript: {}
-        json: {}
-        pdf: {}
-        python: {}
-        x-python: {}
-        r: {}
-        rtf: {}
-        sam: {}
-        x-sh: {}
-        vnd.realvnc.bed: {}
-        xml: {}
-        xsl: {}
-        SAMPLE: {}
-
-      # The maximum number of bytes to load in the log viewer
-      LogViewerMaxBytes: 1M
-
-      # When anonymous_user_token is configured, show public projects page
-      EnablePublicProjectsPage: true
-
-      # By default, disable the "Getting Started" popup which is specific to Arvados playground
-      EnableGettingStartedPopup: false
-
-      # Ask Arvados API server to compress its response payloads.
-      APIResponseCompression: true
-
-      # Timeouts for API requests.
-      APIClientConnectTimeout: 2m
-      APIClientReceiveTimeout: 5m
-
-      # Maximum number of historic log records of a running job to fetch
-      # and display in the Log tab, while subscribing to web sockets.
-      RunningJobLogRecordsToFetch: 2000
-
-      # In systems with many shared projects, loading of dashboard and topnav
-      # can be slow due to collections indexing; use the following parameters
-      # to suppress these properties
-      ShowRecentCollectionsOnDashboard: true
-      ShowUserNotifications: true
-
-      # Enable/disable "multi-site search" in top nav ("true"/"false"), or
-      # a link to the multi-site search page on a "home" Workbench site.
-      #
-      # Example:
-      #   https://workbench.zzzzz.arvadosapi.com/collections/multisite
-      MultiSiteSearch: ""
-
-      # Should workbench allow management of local git repositories? Set to false if
-      # the jobs api is disabled and there are no local git repositories.
-      Repositories: true
-
        SiteName: Arvados Workbench
-      ProfilingEnabled: false
-
-      # This is related to obsolete Google OpenID 1.0 login
-      # but some workbench stuff still expects it to be set.
-      DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id"
  
        # Workbench2 configs
        FileViewersConfigURL: ""